Skip to content

Commit

Permalink
easier debugging for integration
Browse files Browse the repository at this point in the history
  • Loading branch information
fs-eire committed Feb 19, 2025
1 parent a4dfd1a commit 81d2223
Show file tree
Hide file tree
Showing 9 changed files with 222 additions and 9 deletions.
3 changes: 2 additions & 1 deletion cmake/onnxruntime_webassembly.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,8 @@ jsepDownload:_pp_")
"SHELL:-s ASSERTIONS=0"
"SHELL:-s SAFE_HEAP=0"
"SHELL:-s STACK_OVERFLOW_CHECK=0"
--closure 1
## comment out closure compiler so that it's easier to debug
# --closure 1
)
endif()

Expand Down
97 changes: 93 additions & 4 deletions cmake/patches/dawn/dawn.patch
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,74 @@ index efd6491cd6..8ebc5d28b6 100644
emcmake cmake -GNinja -DDAWN_EMSCRIPTEN_TOOLCHAIN="path/to/emscripten" ../..

ninja
diff --git a/third_party/emdawnwebgpu/library_webgpu.js b/third_party/emdawnwebgpu/library_webgpu.js
index 5862ce4045..45df259bb7 100644
--- a/third_party/emdawnwebgpu/library_webgpu.js
+++ b/third_party/emdawnwebgpu/library_webgpu.js
@@ -811,6 +811,61 @@ var LibraryWebGPU = {
{{{ runtimeKeepalivePush() }}}
WebGPU.Internals.futureInsert(futureId, adapter.requestDevice(desc).then((device) => {
{{{ runtimeKeepalivePop() }}}
+
+ if (globalThis["WEBGPU_STAT"]) {
+ // a set that caches all active buffers
+ const buffers = WebGPU.Internals.buffers ??= new Set();
+ // key is buffer usage, value is total size of buffers with that usage
+ const buffersTotalSize = WebGPU.Internals.buffersTotalSize ??= new Map();
+
+ WebGPU.Internals.buffersCreated ??= 0;
+ WebGPU.Internals.buffersDestroyed ??= 0;
+ WebGPU.Internals.buffersUploads ??= 0;
+ WebGPU.Internals.buffersExternalUploads ??= 0;
+ WebGPU.Internals.buffersDownloads ??= 0;
+ WebGPU.Internals.buffersExternalDownloads ??= 0;
+
+ // create a proxy so that we can monitor buffer usages
+ device = new Proxy(device, {
+ // when call device.createBuffer(), the returned buffer should be added into buffers
+ get: (target, prop, _receiver) => {
+ if (prop === 'createBuffer') {
+ return (desc) => {
+ const buffer = target.createBuffer(desc);
+ const originalDestroy = buffer.destroy.bind(buffer);
+ buffer.destroy = () => {
+ const previousTotal = buffersTotalSize.get(buffer.usage);
+ buffersTotalSize.set(buffer.usage, previousTotal - buffer.size);
+ buffers.delete(buffer);
+ WebGPU.Internals.buffersDestroyed++;
+ originalDestroy();
+ };
+
+ if (buffer.usage === (GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC)) {
+ WebGPU.Internals.buffersUploads++;
+ }
+ if (buffer.usage === (GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ)) {
+ WebGPU.Internals.buffersDownloads++;
+ }
+
+ buffers.add(buffer);
+ const previousTotal = buffersTotalSize.get(buffer.usage) ?? 0;
+ buffersTotalSize.set(buffer.usage, previousTotal + buffer.size);
+ WebGPU.Internals.buffersCreated++;
+ return buffer;
+ };
+ }
+ const propertyValue = Reflect.get(target, prop);
+ if (typeof propertyValue === 'function') {
+ return propertyValue.bind(target);
+ } else {
+ return propertyValue;
+ }
+ },
+ set: (target, prop, value, _receiver) => Reflect.set(target, prop, value),
+ });
+ }
+
WebGPU.Internals.jsObjectInsert(queuePtr, device.queue);
WebGPU.Internals.jsObjectInsert(devicePtr, device);

diff --git a/third_party/emdawnwebgpu/webgpu.cpp b/third_party/emdawnwebgpu/webgpu.cpp
index ca52b1237b..b11462fb87 100644
index ca52b1237b..a30ca583c3 100644
--- a/third_party/emdawnwebgpu/webgpu.cpp
+++ b/third_party/emdawnwebgpu/webgpu.cpp
@@ -131,7 +131,6 @@ class RefCounted : NonMovable {
Expand All @@ -62,7 +128,14 @@ index ca52b1237b..b11462fb87 100644

void Destroy();
const void* GetConstMappedRange(size_t offset, size_t size);
@@ -1168,7 +1169,11 @@ WGPUBuffer emwgpuCreateBuffer(const EventSource* source,
@@ -1164,11 +1165,17 @@ WGPUAdapter emwgpuCreateAdapter(const EventSource* source) {

WGPUBuffer emwgpuCreateBuffer(const EventSource* source,
bool mappedAtCreation = false) {
- return new WGPUBufferImpl(source, mappedAtCreation);
+ auto x = new WGPUBufferImpl(source, mappedAtCreation);
+ // printf(" #C++: emwgpuCreateBuffer %p\n", x);
+ return x;
}

WGPUDevice emwgpuCreateDevice(const EventSource* source, WGPUQueue queue) {
Expand All @@ -75,7 +148,7 @@ index ca52b1237b..b11462fb87 100644
}

WGPUQueue emwgpuCreateQueue(const EventSource* source) {
@@ -1284,6 +1289,10 @@ WGPUBufferImpl::WGPUBufferImpl(const EventSource* source, bool mappedAtCreation)
@@ -1284,6 +1291,10 @@ WGPUBufferImpl::WGPUBufferImpl(const EventSource* source, bool mappedAtCreation)
}
}

Expand All @@ -86,11 +159,27 @@ index ca52b1237b..b11462fb87 100644
void WGPUBufferImpl::Destroy() {
emwgpuBufferDestroy(this);
AbortPendingMap("Buffer was destroyed before mapping was resolved.");
@@ -1504,6 +1513,7 @@ WGPUFuture WGPUShaderModuleImpl::GetCompilationInfo(
@@ -1504,6 +1515,7 @@ WGPUFuture WGPUShaderModuleImpl::GetCompilationInfo(
void wgpu##Name##Release(WGPU##Name o) { \
if (o->Release()) { \
delete o; \
+ emwgpuDelete(o); \
} \
}
WGPU_OBJECTS(DEFINE_WGPU_DEFAULT_ADDREF_RELEASE)
@@ -1587,6 +1599,7 @@ WGPUFuture wgpuAdapterRequestDevice(
// ----------------------------------------------------------------------------

void wgpuBufferDestroy(WGPUBuffer buffer) {
+ // printf(" #C++: wgpuBufferDestroy %p\n", buffer);
buffer->Destroy();
}

@@ -1639,6 +1652,7 @@ void wgpuBufferUnmap(WGPUBuffer buffer) {
WGPUBuffer wgpuDeviceCreateBuffer(WGPUDevice device,
const WGPUBufferDescriptor* descriptor) {
WGPUBuffer buffer = new WGPUBufferImpl(device, descriptor->mappedAtCreation);
+ // printf(" #C++: wgpuDeviceCreateBuffer %p\n", buffer);
emwgpuDeviceCreateBuffer(device, descriptor, buffer);
return buffer;
}
72 changes: 72 additions & 0 deletions js/web/lib/wasm/jsep/backend-webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,16 @@ export class WebGpuBackend {
private queryTimeBase?: bigint;
queryType: TimestampQuery;

buffers = new Set();
buffersTotalSize = new Map();

buffersCreated = 0;
buffersDestroyed = 0;
buffersUploads = 0;
buffersExternalUploads = 0;
buffersDownloads = 0;
buffersExternalDownloads = 0;

env: Env;
sessionStatus: SessionState = 'default';
/**
Expand Down Expand Up @@ -280,6 +290,67 @@ export class WebGpuBackend {
}

this.device = await adapter.requestDevice(deviceDescriptor);

// @ts-expect-error Element implicitly has an 'any' type because type 'typeof globalThis' has no index signature.ts(7017)
if (globalThis.WEBGPU_STAT) {
const buffers = this.buffers;
const buffersTotalSize = this.buffersTotalSize;

const buffersUploadsIncrement = () => {
this.buffersUploads++;
};
const buffersDownloadsIncrement = () => {
this.buffersDownloads++;
};
const buffersCreatedIncrement = () => {
this.buffersCreated++;
};
const buffersDestroyedIncrement = () => {
this.buffersDestroyed++;
};

this.device = new Proxy(this.device, {
// when call device.createBuffer(), the returned buffer should be added into buffers
get: (target, prop, _receiver) => {
if (prop === 'createBuffer') {
return (desc: GPUBufferDescriptor) => {
const buffer = target.createBuffer(desc);
const originalDestroy = buffer.destroy.bind(buffer);
buffer.destroy = () => {
const previousTotal = buffersTotalSize.get(buffer.usage);
buffersTotalSize.set(buffer.usage, previousTotal - buffer.size);
buffers.delete(buffer);
buffersDestroyedIncrement();
originalDestroy();
};

// eslint-disable-next-line no-bitwise
if (buffer.usage === (GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC)) {
buffersUploadsIncrement();
}
// eslint-disable-next-line no-bitwise
if (buffer.usage === (GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ)) {
buffersDownloadsIncrement();
}

buffers.add(buffer);
const previousTotal = buffersTotalSize.get(buffer.usage) ?? 0;
buffersTotalSize.set(buffer.usage, previousTotal + buffer.size);
buffersCreatedIncrement();
return buffer;
};
}
const propertyValue = Reflect.get(target, prop);
if (typeof propertyValue === 'function') {
return propertyValue.bind(target);
} else {
return propertyValue;
}
},
set: (target, prop, value, _receiver) => Reflect.set(target, prop, value),
});
}

this.deviceInfo = new DeviceInfoImpl(this.device);
this.adapterInfo = new AdapterInfoImpl(adapter.info || (await adapter.requestAdapterInfo()));
this.gpuDataManager = createGpuDataManager(this);
Expand Down Expand Up @@ -844,6 +915,7 @@ export class WebGpuBackend {
): () => Promise<Tensor.DataType> {
return async () => {
const data = await downloadGpuData(this, gpuBuffer, size);
this.buffersExternalDownloads++;
return createView(data.buffer, type);
};
}
Expand Down
13 changes: 13 additions & 0 deletions js/web/lib/wasm/wasm-core-impl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,8 @@ export const createSession = async (
let modelDataOffset: number, modelDataLength: number;
const wasm = getInstance();

wasm.webgpuStat?.('createSession_start');

if (Array.isArray(modelData)) {
// if model data is an array, it must be a 2-elements tuple containing the pointer and size of the model data
[modelDataOffset, modelDataLength] = modelData;
Expand Down Expand Up @@ -327,6 +329,7 @@ export const createSession = async (
}

wasm.jsepOnCreateSession?.();
wasm.webgpuStat?.('createSession_end');

// clear current MLContext after session creation
if (wasm.currentContext) {
Expand Down Expand Up @@ -436,6 +439,7 @@ export const createSession = async (

export const releaseSession = (sessionId: number): void => {
const wasm = getInstance();
wasm.webgpuStat?.('releaseSession_start');
const session = activeSessions.get(sessionId);
if (!session) {
throw new Error(`cannot release session. invalid session id: ${sessionId}`);
Expand All @@ -462,6 +466,8 @@ export const releaseSession = (sessionId: number): void => {
checkLastError("Can't release session.");
}
activeSessions.delete(sessionId);

wasm.webgpuStat?.('releaseSession_end');
};

export const prepareInputOutputTensor = async (
Expand Down Expand Up @@ -633,6 +639,8 @@ export const run = async (
const outputValuesOffset = wasm.stackAlloc(outputCount * ptrSize);
const outputNamesOffset = wasm.stackAlloc(outputCount * ptrSize);

wasm.webgpuStat?.('run_start');

try {
[runOptionsHandle, runOptionsAllocs] = setRunOptions(options);

Expand Down Expand Up @@ -722,6 +730,7 @@ export const run = async (
}

wasm.jsepOnRunStart?.(sessionHandle);
//wasm.webgpuStat?.('run_beforeAPI');

let errorCode: number;
if (!BUILD_DEFS.DISABLE_JSEP && ioBindingState) {
Expand All @@ -745,6 +754,8 @@ export const run = async (
);
}

//wasm.webgpuStat?.('run_afterAPI');

if (errorCode !== 0) {
checkLastError('failed to call OrtRun().');
}
Expand Down Expand Up @@ -926,6 +937,8 @@ export const run = async (
false,
]);
}
wasm.webgpuStat?.('run_end');

return output;
} finally {
wasm.stackRestore(beforeRunStack);
Expand Down
1 change: 1 addition & 0 deletions js/web/lib/wasm/wasm-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ export declare namespace WebGpu {
webgpuUnregisterBuffer(buffer: GPUBuffer): void;
webgpuGetBuffer(bufferHandle: number): GPUBuffer;
webgpuCreateDownloader(gpuBuffer: GPUBuffer, size: number, sessionHandle: number): () => Promise<ArrayBuffer>;
webgpuStat(label?: string): void;
}
}

Expand Down
2 changes: 1 addition & 1 deletion js/web/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
"import": "./dist/ort.node.min.mjs",
"require": "./dist/ort.node.min.js"
},
"import": "./dist/ort.bundle.min.mjs",
"import": "./dist/ort.bundle.mjs",
"require": "./dist/ort.min.js"
},
"./all": {
Expand Down
15 changes: 12 additions & 3 deletions js/web/script/build.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,17 @@ const DEBUG = process.env.npm_config_debug || args.debug; // boolean|'verbose'|'

/**
* --webgpu-ep
* --no-webgpu-ep (default)
* --no-webgpu-ep
*
* Enable or disable the use of WebGPU EP. If enabled, the WebGPU EP will be used. If disabled, the WebGPU backend will
* be used with JSEP.
*
* The default value is not set. If not set, onnxruntime-web will determine whether to use WebGPU EP or JSEP based on
* the environment (globalThis.WEBGPU_EP).
*
* (temporary) This flag is used to test the WebGPU EP integration. It will be removed in the future.
*/
const USE_WEBGPU_EP = process.env.npm_config_webgpu_ep ?? args['webgpu-ep'] ?? true;
const USE_WEBGPU_EP = process.env.npm_config_webgpu_ep ?? args['webgpu-ep'];

/**
* Root folder of the source code: `<ORT_ROOT>/js/`
Expand All @@ -69,7 +72,7 @@ const DEFAULT_DEFINE = {
'BUILD_DEFS.DISABLE_WASM': 'false',
'BUILD_DEFS.DISABLE_WASM_PROXY': 'false',
'BUILD_DEFS.ENABLE_BUNDLE_WASM_JS': 'false',
'BUILD_DEFS.USE_WEBGPU_EP': JSON.stringify(!!USE_WEBGPU_EP),
'BUILD_DEFS.USE_WEBGPU_EP': USE_WEBGPU_EP === undefined ? 'globalThis.WEBGPU_EP' : JSON.stringify(!!USE_WEBGPU_EP),

'BUILD_DEFS.IS_ESM': 'false',
'BUILD_DEFS.ESM_IMPORT_META_URL': 'undefined',
Expand Down Expand Up @@ -601,6 +604,12 @@ async function main() {
outputName: 'ort',
define: { ...DEFAULT_DEFINE, 'BUILD_DEFS.DISABLE_WEBGL': 'true' },
});
// ort.bundle.mjs
await buildOrt({
outputName: 'ort.bundle',
format: 'esm',
define: { ...DEFAULT_DEFINE, 'BUILD_DEFS.DISABLE_WEBGL': 'true', 'BUILD_DEFS.ENABLE_BUNDLE_WASM_JS': 'true' },
});
// ort.bundle.min.mjs
await buildOrt({
isProduction: true,
Expand Down
19 changes: 19 additions & 0 deletions onnxruntime/wasm/post-webgpu.js
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ Module["webgpuInit"] = (setDefaultDevice) => {

await gpuReadBuffer.mapAsync(GPUMapMode.READ);

WebGPU.Internals.buffersExternalDownloads++;

const arrayBuffer = gpuReadBuffer.getMappedRange();
return arrayBuffer.slice(0, originalSize);
} finally {
Expand Down Expand Up @@ -258,6 +260,23 @@ Module["webgpuInit"] = (setDefaultDevice) => {
size
);
webgpuCurrentDevice.queue.submit([commandEncoder.finish()]);
WebGPU.Internals.buffersExternalUploads++;
gpuBufferForUploading.destroy();
};

Module["webgpuStat"] = (label) => {
if (globalThis["WEBGPU_STAT"]) {
console.log(
`[${label}] BufferCount: ${
WebGPU.Internals.buffers?.size ?? 0
}, Created: ${WebGPU.Internals.buffersCreated ?? 0}, Destroyed: ${
WebGPU.Internals.buffersDestroyed ?? 0
} Uploads: ${WebGPU.Internals.buffersUploads ?? 0}, Downloads: ${
WebGPU.Internals.buffersDownloads ?? 0
}, ExtUploads: ${
WebGPU.Internals.buffersExternalUploads ?? 0
}, ExtDownloads: ${WebGPU.Internals.buffersExternalDownloads ?? 0}`
);
}
};
};
Loading

0 comments on commit 81d2223

Please sign in to comment.