diff --git a/build_all.bat b/build_all.bat new file mode 100644 index 0000000000000..2b90fd9cda2b7 --- /dev/null +++ b/build_all.bat @@ -0,0 +1,3 @@ +build --config Release --build_wasm --enable_wasm_simd --enable_wasm_threads --use_extensions --cmake_extra_defines onnxruntime_WEBASSEMBLY_DEFAULT_EXTENSION_FLAGS=ON --target onnxruntime_webassembly --skip_tests --enable_wasm_api_exception_catching --disable_rtti --build_dir ./build_wasm_inferencing +build --config Release --build_wasm --enable_wasm_simd --enable_wasm_threads --use_extensions --cmake_extra_defines onnxruntime_WEBASSEMBLY_DEFAULT_EXTENSION_FLAGS=ON --target onnxruntime_webassembly --skip_tests --enable_wasm_api_exception_catching --disable_rtti --build_dir ./build_wasm_inferencing_jsep --use_jsep --use_webnn +build --config Release --build_wasm --enable_wasm_simd --enable_wasm_threads --use_extensions --cmake_extra_defines onnxruntime_WEBASSEMBLY_DEFAULT_EXTENSION_FLAGS=ON --target onnxruntime_webassembly --skip_tests --enable_wasm_api_exception_catching --disable_rtti --build_dir ./build_wasm_inferencing_webgpu --use_webgpu --use_jsep --use_webnn diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index 754821c92b2a0..589f26c3a1d78 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -661,6 +661,13 @@ if (onnxruntime_USE_WEBGPU) if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten") set(DAWN_EMSCRIPTEN_TOOLCHAIN "${REPO_ROOT}/cmake/external/emsdk/upstream/emscripten" CACHE STRING "" FORCE) + + # Update a few files in Emscripten + # + # The following files should be updated in Emscripten. We are waiting for the next Emscripten release to include + # these changes. For now, we apply the changes manually. + # - ${DAWN_EMSCRIPTEN_TOOLCHAIN}/src/closure-externs/webgpu-externs.js + execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${PROJECT_SOURCE_DIR}/patches/emscripten/webgpu-externs.js" "${DAWN_EMSCRIPTEN_TOOLCHAIN}/src/closure-externs/webgpu-externs.js") else() if (onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY) set(DAWN_BUILD_MONOLITHIC_LIBRARY ON CACHE BOOL "" FORCE) diff --git a/cmake/onnxruntime_webassembly.cmake b/cmake/onnxruntime_webassembly.cmake index 2c4cdebcf450a..59332558bd262 100644 --- a/cmake/onnxruntime_webassembly.cmake +++ b/cmake/onnxruntime_webassembly.cmake @@ -209,10 +209,14 @@ else() target_link_libraries(onnxruntime_webassembly PRIVATE tensorboard) endif() + set(onnxruntime_webassembly_script_deps "${ONNXRUNTIME_ROOT}/wasm/pre.js") + + set(EXPORTED_FUNCTIONS "_malloc,_free") if (onnxruntime_USE_JSEP) - set(EXPORTED_FUNCTIONS "_malloc,_free,_JsepOutput,_JsepGetNodeName") - else() - set(EXPORTED_FUNCTIONS "_malloc,_free") + string(APPEND EXPORTED_FUNCTIONS ",_JsepOutput,_JsepGetNodeName") + endif() + if (onnxruntime_USE_WEBGPU) + string(APPEND EXPORTED_FUNCTIONS ",_wgpuBufferRelease,_wgpuCreateInstance") endif() if (onnxruntime_ENABLE_WEBASSEMBLY_MEMORY64) @@ -310,13 +314,15 @@ else() target_compile_options(noexcep_operators PRIVATE ${SMEMORY_FLAG} -Wno-experimental) endif() target_link_options(onnxruntime_webassembly PRIVATE - --post-js "${ONNXRUNTIME_ROOT}/wasm/js_post_js_64.js" + "SHELL:--post-js \"${ONNXRUNTIME_ROOT}/wasm/js_post_js_64.js\"" ) + list(APPEND onnxruntime_webassembly_script_deps "${ONNXRUNTIME_ROOT}/wasm/js_post_js_64.js") else () set(MAXIMUM_MEMORY "4294967296") target_link_options(onnxruntime_webassembly PRIVATE - --post-js "${ONNXRUNTIME_ROOT}/wasm/js_post_js.js" + "SHELL:--post-js \"${ONNXRUNTIME_ROOT}/wasm/js_post_js.js\"" ) + list(APPEND onnxruntime_webassembly_script_deps "${ONNXRUNTIME_ROOT}/wasm/js_post_js.js") endif () target_link_options(onnxruntime_webassembly PRIVATE @@ -370,7 +376,6 @@ jsepDownload:_pp_") "SHELL:-s SIGNATURE_CONVERSIONS='${SIGNATURE_CONVERSIONS}'" ) endif () - set_target_properties(onnxruntime_webassembly PROPERTIES LINK_DEPENDS ${ONNXRUNTIME_ROOT}/wasm/pre.js) if (onnxruntime_USE_JSEP) # NOTE: "-s ASYNCIFY=1" is required for JSEP to work with WebGPU @@ -380,10 +385,8 @@ jsepDownload:_pp_") target_compile_definitions(onnxruntime_webassembly PRIVATE USE_JSEP=1) target_link_options(onnxruntime_webassembly PRIVATE "SHELL:--pre-js \"${ONNXRUNTIME_ROOT}/wasm/pre-jsep.js\"" - "SHELL:-s ASYNCIFY=1" - "SHELL:-s ASYNCIFY_STACK_SIZE=65536" ) - set_target_properties(onnxruntime_webassembly PROPERTIES LINK_DEPENDS ${ONNXRUNTIME_ROOT}/wasm/pre-jsep.js) + list(APPEND onnxruntime_webassembly_script_deps "${ONNXRUNTIME_ROOT}/wasm/pre-jsep.js") if (onnxruntime_ENABLE_WEBASSEMBLY_MEMORY64) target_link_options(onnxruntime_webassembly PRIVATE @@ -395,6 +398,20 @@ jsepDownload:_pp_") if (onnxruntime_USE_WEBGPU) target_compile_definitions(onnxruntime_webassembly PRIVATE USE_WEBGPU=1) + target_link_options(onnxruntime_webassembly PRIVATE + "SHELL:--post-js \"${ONNXRUNTIME_ROOT}/wasm/post-webgpu.js\"" + ) + list(APPEND onnxruntime_webassembly_script_deps "${ONNXRUNTIME_ROOT}/wasm/post-webgpu.js") + endif() + + if (onnxruntime_USE_JSEP OR onnxruntime_USE_WEBGPU OR onnxruntime_USE_WEBNN) + # if any of the above is enabled, we need to use the asyncify library + target_link_options(onnxruntime_webassembly PRIVATE + "SHELL:--pre-js \"${ONNXRUNTIME_ROOT}/wasm/pre-async.js\"" + "SHELL:-s ASYNCIFY=1" + "SHELL:-s ASYNCIFY_STACK_SIZE=65536" + ) + list(APPEND onnxruntime_webassembly_script_deps "${ONNXRUNTIME_ROOT}/wasm/pre-async.js") endif() if (onnxruntime_EMSCRIPTEN_SETTINGS) @@ -449,6 +466,8 @@ jsepDownload:_pp_") ) endif() + set_target_properties(onnxruntime_webassembly PROPERTIES LINK_DEPENDS "${onnxruntime_webassembly_script_deps}") + set(target_name_list ort) if (onnxruntime_ENABLE_TRAINING_APIS) diff --git a/cmake/patches/dawn/dawn.patch b/cmake/patches/dawn/dawn.patch index e7fd935cb9cb1..6f5a50530a88a 100644 --- a/cmake/patches/dawn/dawn.patch +++ b/cmake/patches/dawn/dawn.patch @@ -21,3 +21,76 @@ index 6e8ae37593..633af91eef 100644 -q "${EM_BUILD_GEN_DIR}/struct_info_webgpu.json" "-I=${EM_BUILD_GEN_DIR}/include" +diff --git a/src/emdawnwebgpu/README.md b/src/emdawnwebgpu/README.md +index efd6491cd6..8ebc5d28b6 100644 +--- a/src/emdawnwebgpu/README.md ++++ b/src/emdawnwebgpu/README.md +@@ -56,7 +56,7 @@ Set up the build directory using emcmake + mkdir out/cmake-wasm + cd out/cmake-wasm + +-# Make sure the path is to the source checkout of Emscripten, not emsdk's release. ++# If using Emscripten v4.0.2 or lower, make sure the path is to the source checkout of Emscripten, not emsdk's release. + emcmake cmake -GNinja -DDAWN_EMSCRIPTEN_TOOLCHAIN="path/to/emscripten" ../.. + + ninja +diff --git a/third_party/emdawnwebgpu/webgpu.cpp b/third_party/emdawnwebgpu/webgpu.cpp +index ca52b1237b..b11462fb87 100644 +--- a/third_party/emdawnwebgpu/webgpu.cpp ++++ b/third_party/emdawnwebgpu/webgpu.cpp +@@ -131,7 +131,6 @@ class RefCounted : NonMovable { + bool Release() { + if (mRefCount.fetch_sub(1u, std::memory_order_release) == 1u) { + std::atomic_thread_fence(std::memory_order_acquire); +- emwgpuDelete(this); + return true; + } + return false; +@@ -234,6 +233,7 @@ class Ref { + static void Release(T value) { + if (value != nullptr && value->RefCounted::Release()) { + delete value; ++ emwgpuDelete(value); + } + } + +@@ -642,6 +642,7 @@ struct WGPUBufferImpl final : public EventSource, + public RefCountedWithExternalCount { + public: + WGPUBufferImpl(const EventSource* source, bool mappedAtCreation); ++ ~WGPUBufferImpl(); + + void Destroy(); + const void* GetConstMappedRange(size_t offset, size_t size); +@@ -1168,7 +1169,11 @@ WGPUBuffer emwgpuCreateBuffer(const EventSource* source, + } + + WGPUDevice emwgpuCreateDevice(const EventSource* source, WGPUQueue queue) { +- return new WGPUDeviceImpl(source, queue); ++ // This function is only called from JS via `importJsDevice()`, which ++ // needs to increment the external ref count to fix the behavior. ++ WGPUDeviceImpl* device = new WGPUDeviceImpl(source, queue); ++ device->AddExternalRef(); ++ return device; + } + + WGPUQueue emwgpuCreateQueue(const EventSource* source) { +@@ -1284,6 +1289,10 @@ WGPUBufferImpl::WGPUBufferImpl(const EventSource* source, bool mappedAtCreation) + } + } + ++WGPUBufferImpl::~WGPUBufferImpl() { ++ Destroy(); ++} ++ + void WGPUBufferImpl::Destroy() { + emwgpuBufferDestroy(this); + AbortPendingMap("Buffer was destroyed before mapping was resolved."); +@@ -1504,6 +1513,7 @@ WGPUFuture WGPUShaderModuleImpl::GetCompilationInfo( + void wgpu##Name##Release(WGPU##Name o) { \ + if (o->Release()) { \ + delete o; \ ++ emwgpuDelete(o); \ + } \ + } + WGPU_OBJECTS(DEFINE_WGPU_DEFAULT_ADDREF_RELEASE) diff --git a/cmake/patches/emscripten/webgpu-externs.js b/cmake/patches/emscripten/webgpu-externs.js new file mode 100644 index 0000000000000..9dc1a6943ed51 --- /dev/null +++ b/cmake/patches/emscripten/webgpu-externs.js @@ -0,0 +1,577 @@ +/* + * WebGPU globals + * Generated using https://github.com/kainino0x/webidl-to-closure-externs + * against the spec's WebIDL: https://gpuweb.github.io/gpuweb/webgpu.idl + */ + +/** @type {?GPU} */ +Navigator.prototype.gpu; + +/** @type {?GPU} */ +WorkerNavigator.prototype.gpu; + +const GPUBufferUsage = {}; +/** @type {number} */ +GPUBufferUsage.MAP_READ; +/** @type {number} */ +GPUBufferUsage.MAP_WRITE; +/** @type {number} */ +GPUBufferUsage.COPY_SRC; +/** @type {number} */ +GPUBufferUsage.COPY_DST; +/** @type {number} */ +GPUBufferUsage.INDEX; +/** @type {number} */ +GPUBufferUsage.VERTEX; +/** @type {number} */ +GPUBufferUsage.UNIFORM; +/** @type {number} */ +GPUBufferUsage.STORAGE; +/** @type {number} */ +GPUBufferUsage.INDIRECT; +/** @type {number} */ +GPUBufferUsage.QUERY_RESOLVE; + +const GPUMapMode = {}; +/** @type {number} */ +GPUMapMode.READ; +/** @type {number} */ +GPUMapMode.WRITE; + +const GPUTextureUsage = {}; +/** @type {number} */ +GPUTextureUsage.COPY_SRC; +/** @type {number} */ +GPUTextureUsage.COPY_DST; +/** @type {number} */ +GPUTextureUsage.TEXTURE_BINDING; +/** @type {number} */ +GPUTextureUsage.STORAGE_BINDING; +/** @type {number} */ +GPUTextureUsage.RENDER_ATTACHMENT; + +const GPUShaderStage = {}; +/** @type {number} */ +GPUShaderStage.VERTEX; +/** @type {number} */ +GPUShaderStage.FRAGMENT; +/** @type {number} */ +GPUShaderStage.COMPUTE; + +const GPUColorWrite = {}; +/** @type {number} */ +GPUColorWrite.RED; +/** @type {number} */ +GPUColorWrite.GREEN; +/** @type {number} */ +GPUColorWrite.BLUE; +/** @type {number} */ +GPUColorWrite.ALPHA; +/** @type {number} */ +GPUColorWrite.ALL; + +/** @constructor */ +function GPUSupportedLimits() {} +/** @type {number} */ +GPUSupportedLimits.prototype.maxTextureDimension1D; +/** @type {number} */ +GPUSupportedLimits.prototype.maxTextureDimension2D; +/** @type {number} */ +GPUSupportedLimits.prototype.maxTextureDimension3D; +/** @type {number} */ +GPUSupportedLimits.prototype.maxTextureArrayLayers; +/** @type {number} */ +GPUSupportedLimits.prototype.maxBindGroups; +/** @type {number} */ +GPUSupportedLimits.prototype.maxBindGroupsPlusVertexBuffers; +/** @type {number} */ +GPUSupportedLimits.prototype.maxBindingsPerBindGroup; +/** @type {number} */ +GPUSupportedLimits.prototype.maxDynamicUniformBuffersPerPipelineLayout; +/** @type {number} */ +GPUSupportedLimits.prototype.maxDynamicStorageBuffersPerPipelineLayout; +/** @type {number} */ +GPUSupportedLimits.prototype.maxSampledTexturesPerShaderStage; +/** @type {number} */ +GPUSupportedLimits.prototype.maxSamplersPerShaderStage; +/** @type {number} */ +GPUSupportedLimits.prototype.maxStorageBuffersPerShaderStage; +/** @type {number} */ +GPUSupportedLimits.prototype.maxStorageTexturesPerShaderStage; +/** @type {number} */ +GPUSupportedLimits.prototype.maxUniformBuffersPerShaderStage; +/** @type {number} */ +GPUSupportedLimits.prototype.maxUniformBufferBindingSize; +/** @type {number} */ +GPUSupportedLimits.prototype.maxStorageBufferBindingSize; +/** @type {number} */ +GPUSupportedLimits.prototype.minUniformBufferOffsetAlignment; +/** @type {number} */ +GPUSupportedLimits.prototype.minStorageBufferOffsetAlignment; +/** @type {number} */ +GPUSupportedLimits.prototype.maxVertexBuffers; +/** @type {number} */ +GPUSupportedLimits.prototype.maxBufferSize; +/** @type {number} */ +GPUSupportedLimits.prototype.maxVertexAttributes; +/** @type {number} */ +GPUSupportedLimits.prototype.maxVertexBufferArrayStride; +/** @type {number} */ +GPUSupportedLimits.prototype.maxInterStageShaderComponents; +/** @type {number} */ +GPUSupportedLimits.prototype.maxInterStageShaderVariables; +/** @type {number} */ +GPUSupportedLimits.prototype.maxColorAttachments; +/** @type {number} */ +GPUSupportedLimits.prototype.maxColorAttachmentBytesPerSample; +/** @type {number} */ +GPUSupportedLimits.prototype.maxComputeWorkgroupStorageSize; +/** @type {number} */ +GPUSupportedLimits.prototype.maxComputeInvocationsPerWorkgroup; +/** @type {number} */ +GPUSupportedLimits.prototype.maxComputeWorkgroupSizeX; +/** @type {number} */ +GPUSupportedLimits.prototype.maxComputeWorkgroupSizeY; +/** @type {number} */ +GPUSupportedLimits.prototype.maxComputeWorkgroupSizeZ; +/** @type {number} */ +GPUSupportedLimits.prototype.maxComputeWorkgroupsPerDimension; + +/** @constructor */ +function GPUSupportedFeatures() {} +/** @type {number} */ +GPUSupportedFeatures.prototype.size; +/** @return {!Iterable} */ +GPUSupportedFeatures.prototype.entries = function() {}; +/** @return {!Iterable} */ +GPUSupportedFeatures.prototype.keys = function() {}; +/** @return {!Iterable} */ +GPUSupportedFeatures.prototype.values = function() {}; +/** @return {undefined} */ +GPUSupportedFeatures.prototype.forEach = function() {}; +/** @return {boolean} */ +GPUSupportedFeatures.prototype.has = function() {}; + +/** @constructor */ +function WGSLLanguageFeatures() {} +/** @type {number} */ +WGSLLanguageFeatures.prototype.size; +/** @return {!Iterable} */ +WGSLLanguageFeatures.prototype.entries = function() {}; +/** @return {!Iterable} */ +WGSLLanguageFeatures.prototype.keys = function() {}; +/** @return {!Iterable} */ +WGSLLanguageFeatures.prototype.values = function() {}; +/** @return {undefined} */ +WGSLLanguageFeatures.prototype.forEach = function() {}; +/** @return {boolean} */ +WGSLLanguageFeatures.prototype.has = function() {}; + +/** @constructor */ +function GPUAdapterInfo() {} +/** @type {string} */ +GPUAdapterInfo.prototype.vendor; +/** @type {string} */ +GPUAdapterInfo.prototype.architecture; +/** @type {string} */ +GPUAdapterInfo.prototype.device; +/** @type {string} */ +GPUAdapterInfo.prototype.description; + +/** @constructor */ +function GPU() {} +/** @return {!Promise} */ +GPU.prototype.requestAdapter = function() {}; +/** @return {string} */ +GPU.prototype.getPreferredCanvasFormat = function() {}; +/** @type {!WGSLLanguageFeatures} */ +GPU.prototype.wgslLanguageFeatures; + +/** @constructor */ +function GPUAdapter() {} +/** @type {!GPUSupportedFeatures} */ +GPUAdapter.prototype.features; +/** @type {!GPUSupportedLimits} */ +GPUAdapter.prototype.limits; +/** @type {boolean} */ +GPUAdapter.prototype.isFallbackAdapter; +/** @return {!Promise} */ +GPUAdapter.prototype.requestDevice = function() {}; +/** @return {!Promise} */ +GPUAdapter.prototype.requestAdapterInfo = function() {}; +/** @type {!GPUAdapterInfo} */ +GPUAdapter.prototype.info; + +/** @constructor */ +function GPUDevice() {} +/** @type {string} */ +GPUDevice.prototype.label; +/** @type {!GPUSupportedFeatures} */ +GPUDevice.prototype.features; +/** @type {!GPUSupportedLimits} */ +GPUDevice.prototype.limits; +/** @type {!GPUQueue} */ +GPUDevice.prototype.queue; +/** @return {undefined} */ +GPUDevice.prototype.destroy = function() {}; +/** @return {!GPUBuffer} */ +GPUDevice.prototype.createBuffer = function() {}; +/** @return {!GPUTexture} */ +GPUDevice.prototype.createTexture = function() {}; +/** @return {!GPUSampler} */ +GPUDevice.prototype.createSampler = function() {}; +/** @return {!GPUExternalTexture} */ +GPUDevice.prototype.importExternalTexture = function() {}; +/** @return {!GPUBindGroupLayout} */ +GPUDevice.prototype.createBindGroupLayout = function() {}; +/** @return {!GPUPipelineLayout} */ +GPUDevice.prototype.createPipelineLayout = function() {}; +/** @return {!GPUBindGroup} */ +GPUDevice.prototype.createBindGroup = function() {}; +/** @return {!GPUShaderModule} */ +GPUDevice.prototype.createShaderModule = function() {}; +/** @return {!GPUComputePipeline} */ +GPUDevice.prototype.createComputePipeline = function() {}; +/** @return {!GPURenderPipeline} */ +GPUDevice.prototype.createRenderPipeline = function() {}; +/** @return {!Promise} */ +GPUDevice.prototype.createComputePipelineAsync = function() {}; +/** @return {!Promise} */ +GPUDevice.prototype.createRenderPipelineAsync = function() {}; +/** @return {!GPUCommandEncoder} */ +GPUDevice.prototype.createCommandEncoder = function() {}; +/** @return {!GPURenderBundleEncoder} */ +GPUDevice.prototype.createRenderBundleEncoder = function() {}; +/** @return {!GPUQuerySet} */ +GPUDevice.prototype.createQuerySet = function() {}; +/** @type {!Promise} */ +GPUDevice.prototype.lost; +/** @return {undefined} */ +GPUDevice.prototype.pushErrorScope = function() {}; +/** @return {!Promise} */ +GPUDevice.prototype.popErrorScope = function() {}; +/** @type {!Function} */ +GPUDevice.prototype.onuncapturederror; +/** @type {!GPUAdapterInfo} */ +GPUDevice.prototype.adapterInfo; + +/** @constructor */ +function GPUBuffer() {} +/** @type {string} */ +GPUBuffer.prototype.label; +/** @type {number} */ +GPUBuffer.prototype.size; +/** @type {number} */ +GPUBuffer.prototype.usage; +/** @type {string} */ +GPUBuffer.prototype.mapState; +/** @return {!Promise} */ +GPUBuffer.prototype.mapAsync = function() {}; +/** @return {!ArrayBuffer} */ +GPUBuffer.prototype.getMappedRange = function() {}; +/** @return {undefined} */ +GPUBuffer.prototype.unmap = function() {}; +/** @return {undefined} */ +GPUBuffer.prototype.destroy = function() {}; + +/** @constructor */ +function GPUTexture() {} +/** @type {string} */ +GPUTexture.prototype.label; +/** @return {!GPUTextureView} */ +GPUTexture.prototype.createView = function() {}; +/** @return {undefined} */ +GPUTexture.prototype.destroy = function() {}; +/** @type {number} */ +GPUTexture.prototype.width; +/** @type {number} */ +GPUTexture.prototype.height; +/** @type {number} */ +GPUTexture.prototype.depthOrArrayLayers; +/** @type {number} */ +GPUTexture.prototype.mipLevelCount; +/** @type {number} */ +GPUTexture.prototype.sampleCount; +/** @type {string} */ +GPUTexture.prototype.dimension; +/** @type {string} */ +GPUTexture.prototype.format; +/** @type {number} */ +GPUTexture.prototype.usage; + +/** @constructor */ +function GPUTextureView() {} +/** @type {string} */ +GPUTextureView.prototype.label; + +/** @constructor */ +function GPUExternalTexture() {} +/** @type {string} */ +GPUExternalTexture.prototype.label; + +/** @constructor */ +function GPUSampler() {} +/** @type {string} */ +GPUSampler.prototype.label; + +/** @constructor */ +function GPUBindGroupLayout() {} +/** @type {string} */ +GPUBindGroupLayout.prototype.label; + +/** @constructor */ +function GPUBindGroup() {} +/** @type {string} */ +GPUBindGroup.prototype.label; + +/** @constructor */ +function GPUPipelineLayout() {} +/** @type {string} */ +GPUPipelineLayout.prototype.label; + +/** @constructor */ +function GPUShaderModule() {} +/** @type {string} */ +GPUShaderModule.prototype.label; +/** @return {!Promise} */ +GPUShaderModule.prototype.getCompilationInfo = function() {}; + +/** @constructor */ +function GPUCompilationMessage() {} +/** @type {string} */ +GPUCompilationMessage.prototype.message; +/** @type {string} */ +GPUCompilationMessage.prototype.type; +/** @type {number} */ +GPUCompilationMessage.prototype.lineNum; +/** @type {number} */ +GPUCompilationMessage.prototype.linePos; +/** @type {number} */ +GPUCompilationMessage.prototype.offset; +/** @type {number} */ +GPUCompilationMessage.prototype.length; + +/** @constructor */ +function GPUCompilationInfo() {} +/** @type {!Array} */ +GPUCompilationInfo.prototype.messages; + +/** @constructor */ +function GPUPipelineError() {} +/** @type {string} */ +GPUPipelineError.prototype.reason; + +/** @constructor */ +function GPUComputePipeline() {} +/** @type {string} */ +GPUComputePipeline.prototype.label; +/** @return {!GPUBindGroupLayout} */ +GPUComputePipeline.prototype.getBindGroupLayout = function() {}; + +/** @constructor */ +function GPURenderPipeline() {} +/** @type {string} */ +GPURenderPipeline.prototype.label; +/** @return {!GPUBindGroupLayout} */ +GPURenderPipeline.prototype.getBindGroupLayout = function() {}; + +/** @constructor */ +function GPUCommandBuffer() {} +/** @type {string} */ +GPUCommandBuffer.prototype.label; + +/** @constructor */ +function GPUCommandEncoder() {} +/** @type {string} */ +GPUCommandEncoder.prototype.label; +/** @return {undefined} */ +GPUCommandEncoder.prototype.pushDebugGroup = function() {}; +/** @return {undefined} */ +GPUCommandEncoder.prototype.popDebugGroup = function() {}; +/** @return {undefined} */ +GPUCommandEncoder.prototype.insertDebugMarker = function() {}; +/** @return {!GPURenderPassEncoder} */ +GPUCommandEncoder.prototype.beginRenderPass = function() {}; +/** @return {!GPUComputePassEncoder} */ +GPUCommandEncoder.prototype.beginComputePass = function() {}; +/** @return {undefined} */ +GPUCommandEncoder.prototype.copyBufferToBuffer = function() {}; +/** @return {undefined} */ +GPUCommandEncoder.prototype.copyBufferToTexture = function() {}; +/** @return {undefined} */ +GPUCommandEncoder.prototype.copyTextureToBuffer = function() {}; +/** @return {undefined} */ +GPUCommandEncoder.prototype.copyTextureToTexture = function() {}; +/** @return {undefined} */ +GPUCommandEncoder.prototype.clearBuffer = function() {}; +/** @return {undefined} */ +GPUCommandEncoder.prototype.resolveQuerySet = function() {}; +/** @return {!GPUCommandBuffer} */ +GPUCommandEncoder.prototype.finish = function() {}; + +/** @constructor */ +function GPUComputePassEncoder() {} +/** @type {string} */ +GPUComputePassEncoder.prototype.label; +/** @return {undefined} */ +GPUComputePassEncoder.prototype.pushDebugGroup = function() {}; +/** @return {undefined} */ +GPUComputePassEncoder.prototype.popDebugGroup = function() {}; +/** @return {undefined} */ +GPUComputePassEncoder.prototype.insertDebugMarker = function() {}; +/** @return {undefined} */ +GPUComputePassEncoder.prototype.setBindGroup = function() {}; +/** @return {undefined} */ +GPUComputePassEncoder.prototype.setBindGroup = function() {}; +/** @return {undefined} */ +GPUComputePassEncoder.prototype.setPipeline = function() {}; +/** @return {undefined} */ +GPUComputePassEncoder.prototype.dispatchWorkgroups = function() {}; +/** @return {undefined} */ +GPUComputePassEncoder.prototype.dispatchWorkgroupsIndirect = function() {}; +/** @return {undefined} */ +GPUComputePassEncoder.prototype.end = function() {}; + +/** @constructor */ +function GPURenderPassEncoder() {} +/** @type {string} */ +GPURenderPassEncoder.prototype.label; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.pushDebugGroup = function() {}; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.popDebugGroup = function() {}; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.insertDebugMarker = function() {}; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.setBindGroup = function() {}; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.setBindGroup = function() {}; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.setPipeline = function() {}; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.setIndexBuffer = function() {}; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.setVertexBuffer = function() {}; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.draw = function() {}; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.drawIndexed = function() {}; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.drawIndirect = function() {}; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.drawIndexedIndirect = function() {}; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.setViewport = function() {}; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.setScissorRect = function() {}; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.setBlendConstant = function() {}; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.setStencilReference = function() {}; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.beginOcclusionQuery = function() {}; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.endOcclusionQuery = function() {}; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.executeBundles = function() {}; +/** @return {undefined} */ +GPURenderPassEncoder.prototype.end = function() {}; + +/** @constructor */ +function GPURenderBundle() {} +/** @type {string} */ +GPURenderBundle.prototype.label; + +/** @constructor */ +function GPURenderBundleEncoder() {} +/** @type {string} */ +GPURenderBundleEncoder.prototype.label; +/** @return {undefined} */ +GPURenderBundleEncoder.prototype.pushDebugGroup = function() {}; +/** @return {undefined} */ +GPURenderBundleEncoder.prototype.popDebugGroup = function() {}; +/** @return {undefined} */ +GPURenderBundleEncoder.prototype.insertDebugMarker = function() {}; +/** @return {undefined} */ +GPURenderBundleEncoder.prototype.setBindGroup = function() {}; +/** @return {undefined} */ +GPURenderBundleEncoder.prototype.setBindGroup = function() {}; +/** @return {undefined} */ +GPURenderBundleEncoder.prototype.setPipeline = function() {}; +/** @return {undefined} */ +GPURenderBundleEncoder.prototype.setIndexBuffer = function() {}; +/** @return {undefined} */ +GPURenderBundleEncoder.prototype.setVertexBuffer = function() {}; +/** @return {undefined} */ +GPURenderBundleEncoder.prototype.draw = function() {}; +/** @return {undefined} */ +GPURenderBundleEncoder.prototype.drawIndexed = function() {}; +/** @return {undefined} */ +GPURenderBundleEncoder.prototype.drawIndirect = function() {}; +/** @return {undefined} */ +GPURenderBundleEncoder.prototype.drawIndexedIndirect = function() {}; +/** @return {!GPURenderBundle} */ +GPURenderBundleEncoder.prototype.finish = function() {}; + +/** @constructor */ +function GPUQueue() {} +/** @type {string} */ +GPUQueue.prototype.label; +/** @return {undefined} */ +GPUQueue.prototype.submit = function() {}; +/** @return {!Promise} */ +GPUQueue.prototype.onSubmittedWorkDone = function() {}; +/** @return {undefined} */ +GPUQueue.prototype.writeBuffer = function() {}; +/** @return {undefined} */ +GPUQueue.prototype.writeTexture = function() {}; +/** @return {undefined} */ +GPUQueue.prototype.copyExternalImageToTexture = function() {}; + +/** @constructor */ +function GPUQuerySet() {} +/** @type {string} */ +GPUQuerySet.prototype.label; +/** @return {undefined} */ +GPUQuerySet.prototype.destroy = function() {}; +/** @type {string} */ +GPUQuerySet.prototype.type; +/** @type {number} */ +GPUQuerySet.prototype.count; + +/** @constructor */ +function GPUCanvasContext() {} +/** @type {!HTMLCanvasElement|!OffscreenCanvas} */ +GPUCanvasContext.prototype.canvas; +/** @return {undefined} */ +GPUCanvasContext.prototype.configure = function() {}; +/** @return {undefined} */ +GPUCanvasContext.prototype.unconfigure = function() {}; +/** @return {!GPUTexture} */ +GPUCanvasContext.prototype.getCurrentTexture = function() {}; + +/** @constructor */ +function GPUDeviceLostInfo() {} +/** @type {string} */ +GPUDeviceLostInfo.prototype.reason; +/** @type {string} */ +GPUDeviceLostInfo.prototype.message; + +/** @constructor */ +function GPUError() {} +/** @type {string} */ +GPUError.prototype.message; + +/** @constructor */ +function GPUValidationError() {} + +/** @constructor */ +function GPUOutOfMemoryError() {} + +/** @constructor */ +function GPUInternalError() {} + +/** @constructor */ +function GPUUncapturedErrorEvent() {} +/** @type {!GPUError} */ +GPUUncapturedErrorEvent.prototype.error; diff --git a/js/build_webgpu.bat b/js/build_webgpu.bat new file mode 100644 index 0000000000000..95413509e701d --- /dev/null +++ b/js/build_webgpu.bat @@ -0,0 +1,79 @@ +@echo off + +rem build_webgpu.bat --- build onnxruntime-web with WebGPU EP +rem +rem Usage: +rem build_webgpu.bat config [clean] +rem +rem Options: +rem config Build configuration, "d" or "r" +rem clean Perform a clean build, "clean" or empty + +setlocal enabledelayedexpansion + +set ROOT=%~dp0..\ +set BUILD_DIR=%ROOT%build_webgpu + +:arg1 +if ["%~1"]==["d"] ( + set CONFIG=Debug + set CONFIG_EXTRA_FLAG= + @rem --enable_wasm_profiling --wasm_run_tests_in_browser + @rem --cmake_extra_defines onnxruntime_ENABLE_WEBASSEMBLY_OUTPUT_OPTIMIZED_MODEL=1 + @rem --enable_wasm_debug_info + goto :arg2 +) +if ["%~1"]==["r"] ( + set CONFIG=Release + set CONFIG_EXTRA_FLAG= + @rem --enable_wasm_api_exception_catching --disable_rtti + goto :arg2 +) +echo Invalid configuration "%~1", must be "d"(Debug) or "r"(Release) +exit /b 1 + +:arg2 +if ["%~2"]==["clean"] ( + goto :clean +) +if not exist "%ROOT%js\web\dist" ( + goto :npm_ci +) + +goto :build_wasm + +:clean +if exist "%BUILD_DIR%" ( + rd /s /q %BUILD_DIR% +) + +pushd %ROOT% +git submodule sync --recursive +git submodule update --init --recursive +popd + +:npm_ci +pushd %ROOT%js +call npm ci +popd +pushd %ROOT%js\common +call npm ci +popd +pushd %ROOT%js\web +call npm ci +call npm run pull:wasm +popd + +:build_wasm + +set PATH=C:\Program Files\Git\usr\bin;%PATH% + +call %ROOT%build.bat --config %CONFIG% %CONFIG_EXTRA_FLAG% --skip_submodule_sync --build_wasm --target onnxruntime_webassembly --skip_tests^ + --enable_wasm_simd --enable_wasm_threads --use_jsep --use_webnn --use_webgpu --build_dir %BUILD_DIR% + +IF NOT "%ERRORLEVEL%" == "0" ( + exit /b %ERRORLEVEL% +) + +copy /Y %BUILD_DIR%\%CONFIG%\ort-wasm-simd-threaded.jsep.wasm %ROOT%js\web\dist\ +copy /Y %BUILD_DIR%\%CONFIG%\ort-wasm-simd-threaded.jsep.mjs %ROOT%js\web\dist\ diff --git a/js/web/lib/build-def.d.ts b/js/web/lib/build-def.d.ts index 59f64a3179605..83a52ebaefe05 100644 --- a/js/web/lib/build-def.d.ts +++ b/js/web/lib/build-def.d.ts @@ -40,6 +40,13 @@ interface BuildDefinitions { */ readonly ENABLE_BUNDLE_WASM_JS: boolean; + /** + * defines whether to use WebGPU EP instead of JSEP for WebGPU backend. + * + * This flag requires the corresponding WebAssembly artifact to be built with `--use_webgpu` flag. + */ + readonly USE_WEBGPU_EP: boolean; + // #endregion // #region Build definitions for ESM diff --git a/js/web/lib/wasm/jsep/init.ts b/js/web/lib/wasm/jsep/init.ts index b4071eae51c8f..fe9576b87ad72 100644 --- a/js/web/lib/wasm/jsep/init.ts +++ b/js/web/lib/wasm/jsep/init.ts @@ -1,17 +1,17 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -import { Env } from 'onnxruntime-common'; +import type { Env } from 'onnxruntime-common'; import { calculateTensorSizeInBytes, DataType } from '../wasm-common'; import type { OrtWasmModule } from '../wasm-types'; -import { WebGpuBackend } from './backend-webgpu'; +import type { WebGpuBackend } from './backend-webgpu'; import { LOG_DEBUG } from './log'; -import { TensorView } from './tensor-view'; +import type { TensorView } from './tensor-view'; import { ShapeUtil } from './util'; -import { +import type { AdapterInfo, ComputeContext, ComputeContextInputsOutputsMapping, @@ -205,79 +205,83 @@ export const init = async ( } if (name === 'webgpu') { - const backend = new WebGpuBackend(); - await backend.initialize(env, gpuAdapter!); + if (!BUILD_DEFS.USE_WEBGPU_EP) { + // eslint-disable-next-line @typescript-eslint/no-require-imports, @typescript-eslint/no-var-requires + const webGpuBackendImpl = require('./backend-webgpu').WebGpuBackend; + const backend = new webGpuBackendImpl(); + await backend.initialize(env, gpuAdapter!); - jsepInit('webgpu', [ - // backend - backend, + jsepInit('webgpu', [ + // backend + backend, + + // jsepAlloc() + (size: number) => backend.alloc(Number(size)), - // jsepAlloc() - (size: number) => backend.alloc(Number(size)), + // jsepFree() + (ptr: number) => backend.free(ptr), - // jsepFree() - (ptr: number) => backend.free(ptr), + // jsepCopy(src, dst, size, isSourceGpu) + (src: number, dst: number, size: number, isSourceGpu = false) => { + if (isSourceGpu) { + LOG_DEBUG( + 'verbose', + () => `[WebGPU] jsepCopyGpuToGpu: src=${Number(src)}, dst=${Number(dst)}, size=${Number(size)}`, + ); + backend.memcpy(Number(src), Number(dst)); + } else { + LOG_DEBUG( + 'verbose', + () => + `[WebGPU] jsepCopyCpuToGpu: dataOffset=${Number(src)}, gpuDataId=${Number(dst)}, size=${Number(size)}`, + ); + const data = module.HEAPU8.subarray(Number(src >>> 0), Number(src >>> 0) + Number(size)); + backend.upload(Number(dst), data); + } + }, - // jsepCopy(src, dst, size, isSourceGpu) - (src: number, dst: number, size: number, isSourceGpu = false) => { - if (isSourceGpu) { + // jsepCopyAsync(src, dst, size) + async (gpuDataId: number, dataOffset: number, size: number): Promise => { LOG_DEBUG( 'verbose', - () => `[WebGPU] jsepCopyGpuToGpu: src=${Number(src)}, dst=${Number(dst)}, size=${Number(size)}`, + () => `[WebGPU] jsepCopyGpuToCpu: gpuDataId=${gpuDataId}, dataOffset=${dataOffset}, size=${size}`, ); - backend.memcpy(Number(src), Number(dst)); - } else { - LOG_DEBUG( - 'verbose', - () => - `[WebGPU] jsepCopyCpuToGpu: dataOffset=${Number(src)}, gpuDataId=${Number(dst)}, size=${Number(size)}`, - ); - const data = module.HEAPU8.subarray(Number(src >>> 0), Number(src >>> 0) + Number(size)); - backend.upload(Number(dst), data); - } - }, - // jsepCopyAsync(src, dst, size) - async (gpuDataId: number, dataOffset: number, size: number): Promise => { - LOG_DEBUG( - 'verbose', - () => `[WebGPU] jsepCopyGpuToCpu: gpuDataId=${gpuDataId}, dataOffset=${dataOffset}, size=${size}`, - ); - - await backend.download(Number(gpuDataId), () => - module.HEAPU8.subarray(Number(dataOffset) >>> 0, Number(dataOffset + size) >>> 0), - ); - }, + await backend.download(Number(gpuDataId), () => + module.HEAPU8.subarray(Number(dataOffset) >>> 0, Number(dataOffset + size) >>> 0), + ); + }, - // jsepCreateKernel - (kernelType: string, kernelId: number, attribute: unknown) => - backend.createKernel( - kernelType, - Number(kernelId), - attribute, - module.UTF8ToString(module._JsepGetNodeName!(Number(kernelId))), - ), + // jsepCreateKernel + (kernelType: string, kernelId: number, attribute: unknown) => + backend.createKernel( + kernelType, + Number(kernelId), + attribute, + module.UTF8ToString(module._JsepGetNodeName!(Number(kernelId))), + ), - // jsepReleaseKernel - (kernel: number) => backend.releaseKernel(kernel), + // jsepReleaseKernel + (kernel: number) => backend.releaseKernel(kernel), - // jsepRun - (kernel: number, contextDataOffset: number, sessionHandle: number, errors: Array>) => { - LOG_DEBUG( - 'verbose', - () => - `[WebGPU] jsepRun: sessionHandle=${sessionHandle}, kernel=${kernel}, contextDataOffset=${contextDataOffset}`, - ); - const context = new ComputeContextImpl(module, backend, Number(contextDataOffset)); - return backend.computeKernel(Number(kernel), context, errors); - }, - // jsepCaptureBegin - () => backend.captureBegin(), - // jsepCaptureEnd - () => backend.captureEnd(), - // jsepReplay - () => backend.replay(), - ]); + // jsepRun + (kernel: number, contextDataOffset: number, sessionHandle: number, errors: Array>) => { + LOG_DEBUG( + 'verbose', + () => + `[WebGPU] jsepRun: sessionHandle=${sessionHandle}, kernel=${kernel}, contextDataOffset=${contextDataOffset}`, + ); + const context = new ComputeContextImpl(module, backend, Number(contextDataOffset)); + return backend.computeKernel(Number(kernel), context, errors); + }, + // jsepCaptureBegin + () => backend.captureBegin(), + // jsepCaptureEnd + () => backend.captureEnd(), + // jsepReplay + () => backend.replay(), + ]); + } } else { const backend = new WebNNBackend(env); jsepInit('webnn', [ diff --git a/js/web/lib/wasm/session-options.ts b/js/web/lib/wasm/session-options.ts index 17e564247863d..89a4484e5a1c4 100644 --- a/js/web/lib/wasm/session-options.ts +++ b/js/web/lib/wasm/session-options.ts @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -import { InferenceSession } from 'onnxruntime-common'; +import type { InferenceSession } from 'onnxruntime-common'; import { getInstance } from './wasm-factory'; import { allocWasmString, checkLastError, iterateExtraOptions } from './wasm-utils'; @@ -54,13 +54,28 @@ const appendDefaultOptions = (options: InferenceSession.SessionOptions): void => } }; -const setExecutionProviders = ( +const appendSessionConfig = (sessionOptionsHandle: number, key: string, value: string, allocs: number[]): void => { + const keyDataOffset = allocWasmString(key, allocs); + const valueDataOffset = allocWasmString(value, allocs); + if (getInstance()._OrtAddSessionConfigEntry(sessionOptionsHandle, keyDataOffset, valueDataOffset) !== 0) { + checkLastError(`Can't set a session config entry: ${key} - ${value}.`); + } +}; + +const appendEpOption = (epOptions: Array<[number, number]>, key: string, value: string, allocs: number[]): void => { + const keyDataOffset = allocWasmString(key, allocs); + const valueDataOffset = allocWasmString(value, allocs); + epOptions.push([keyDataOffset, valueDataOffset]); +}; + +const setExecutionProviders = async ( sessionOptionsHandle: number, executionProviders: readonly InferenceSession.ExecutionProviderConfig[], allocs: number[], -): void => { +): Promise => { for (const ep of executionProviders) { let epName = typeof ep === 'string' ? ep : ep.name; + const epOptions: Array<[number, number]> = []; // check EP name switch (epName) { @@ -71,26 +86,44 @@ const setExecutionProviders = ( // const context = (webnnOptions as InferenceSession.WebNNOptionsWithMLContext)?.context; const deviceType = (webnnOptions as InferenceSession.WebNNContextOptions)?.deviceType; if (deviceType) { - const keyDataOffset = allocWasmString('deviceType', allocs); - const valueDataOffset = allocWasmString(deviceType, allocs); - if (getInstance()._OrtAddSessionConfigEntry(sessionOptionsHandle, keyDataOffset, valueDataOffset) !== 0) { - checkLastError(`Can't set a session config entry: 'deviceType' - ${deviceType}.`); - } + appendSessionConfig(sessionOptionsHandle, 'deviceType', deviceType, allocs); } } break; case 'webgpu': - epName = 'JS'; - if (typeof ep !== 'string') { - const webgpuOptions = ep as InferenceSession.WebGpuExecutionProviderOption; - if (webgpuOptions?.preferredLayout) { - if (webgpuOptions.preferredLayout !== 'NCHW' && webgpuOptions.preferredLayout !== 'NHWC') { - throw new Error(`preferredLayout must be either 'NCHW' or 'NHWC': ${webgpuOptions.preferredLayout}`); + if (BUILD_DEFS.USE_WEBGPU_EP) { + epName = 'WebGPU'; + let customDevice: GPUDevice | undefined; + + if (typeof ep !== 'string') { + const customOptions = ep as unknown as { device: GPUDevice }; + if (customOptions.device) { + if (typeof GPUDevice !== 'undefined' && customOptions.device instanceof GPUDevice) { + customDevice = customOptions.device; + } else { + throw new Error('Invalid GPU device set in WebGPU EP options.'); + } } - const keyDataOffset = allocWasmString('preferredLayout', allocs); - const valueDataOffset = allocWasmString(webgpuOptions.preferredLayout, allocs); - if (getInstance()._OrtAddSessionConfigEntry(sessionOptionsHandle, keyDataOffset, valueDataOffset) !== 0) { - checkLastError(`Can't set a session config entry: 'preferredLayout' - ${webgpuOptions.preferredLayout}.`); + + // TODO: handle more options + } + + const info = getInstance().webgpuRegisterDevice!(customDevice); + if (info) { + const [deviceId, instanceHandle, deviceHandle] = info; + appendEpOption(epOptions, 'deviceId', deviceId.toString(), allocs); + appendEpOption(epOptions, 'webgpuInstance', instanceHandle.toString(), allocs); + appendEpOption(epOptions, 'webgpuDevice', deviceHandle.toString(), allocs); + } + } else { + epName = 'JS'; + if (typeof ep !== 'string') { + const webgpuOptions = ep as InferenceSession.WebGpuExecutionProviderOption; + if (webgpuOptions?.preferredLayout) { + if (webgpuOptions.preferredLayout !== 'NCHW' && webgpuOptions.preferredLayout !== 'NHWC') { + throw new Error(`preferredLayout must be either 'NCHW' or 'NHWC': ${webgpuOptions.preferredLayout}`); + } + appendSessionConfig(sessionOptionsHandle, 'preferredLayout', webgpuOptions.preferredLayout, allocs); } } } @@ -103,13 +136,34 @@ const setExecutionProviders = ( } const epNameDataOffset = allocWasmString(epName, allocs); - if (getInstance()._OrtAppendExecutionProvider(sessionOptionsHandle, epNameDataOffset) !== 0) { + const epOptionsCount = epOptions.length; + let keysOffset = 0; + let valuesOffset = 0; + if (epOptionsCount > 0) { + keysOffset = getInstance()._malloc(epOptionsCount * getInstance().PTR_SIZE); + allocs.push(keysOffset); + valuesOffset = getInstance()._malloc(epOptionsCount * getInstance().PTR_SIZE); + allocs.push(valuesOffset); + for (let i = 0; i < epOptionsCount; i++) { + getInstance().setValue(keysOffset + i * getInstance().PTR_SIZE, epOptions[i][0], '*'); + getInstance().setValue(valuesOffset + i * getInstance().PTR_SIZE, epOptions[i][1], '*'); + } + } + if ( + (await getInstance()._OrtAppendExecutionProvider( + sessionOptionsHandle, + epNameDataOffset, + keysOffset, + valuesOffset, + epOptionsCount, + )) !== 0 + ) { checkLastError(`Can't append execution provider: ${epName}.`); } } }; -export const setSessionOptions = (options?: InferenceSession.SessionOptions): [number, number[]] => { +export const setSessionOptions = async (options?: InferenceSession.SessionOptions): Promise<[number, number[]]> => { const wasm = getInstance(); let sessionOptionsHandle = 0; const allocs: number[] = []; @@ -155,20 +209,19 @@ export const setSessionOptions = (options?: InferenceSession.SessionOptions): [n } if (sessionOptions.executionProviders) { - setExecutionProviders(sessionOptionsHandle, sessionOptions.executionProviders, allocs); + await setExecutionProviders(sessionOptionsHandle, sessionOptions.executionProviders, allocs); } if (sessionOptions.enableGraphCapture !== undefined) { if (typeof sessionOptions.enableGraphCapture !== 'boolean') { throw new Error(`enableGraphCapture must be a boolean value: ${sessionOptions.enableGraphCapture}`); } - const keyDataOffset = allocWasmString('enableGraphCapture', allocs); - const valueDataOffset = allocWasmString(sessionOptions.enableGraphCapture.toString(), allocs); - if (wasm._OrtAddSessionConfigEntry(sessionOptionsHandle, keyDataOffset, valueDataOffset) !== 0) { - checkLastError( - `Can't set a session config entry: 'enableGraphCapture' - ${sessionOptions.enableGraphCapture}.`, - ); - } + appendSessionConfig( + sessionOptionsHandle, + 'enableGraphCapture', + sessionOptions.enableGraphCapture.toString(), + allocs, + ); } if (sessionOptions.freeDimensionOverrides) { @@ -188,12 +241,7 @@ export const setSessionOptions = (options?: InferenceSession.SessionOptions): [n if (sessionOptions.extra !== undefined) { iterateExtraOptions(sessionOptions.extra, '', new WeakSet>(), (key, value) => { - const keyDataOffset = allocWasmString(key, allocs); - const valueDataOffset = allocWasmString(value, allocs); - - if (wasm._OrtAddSessionConfigEntry(sessionOptionsHandle, keyDataOffset, valueDataOffset) !== 0) { - checkLastError(`Can't set a session config entry: ${key} - ${value}.`); - } + appendSessionConfig(sessionOptionsHandle, key, value, allocs); }); } diff --git a/js/web/lib/wasm/wasm-core-impl.ts b/js/web/lib/wasm/wasm-core-impl.ts index 4bccfa76fdda3..f167764f64cac 100644 --- a/js/web/lib/wasm/wasm-core-impl.ts +++ b/js/web/lib/wasm/wasm-core-impl.ts @@ -102,11 +102,20 @@ export const initRuntime = async (env: Env): Promise => { * @param epName */ export const initEp = async (env: Env, epName: string): Promise => { + // initialize ASYNCIFY support + getInstance().asyncInit?.(); + + if (BUILD_DEFS.USE_WEBGPU_EP) { + getInstance().webgpuInit!((device) => { + env.webgpu.device = device; + }); + } + if (!BUILD_DEFS.DISABLE_JSEP) { // eslint-disable-next-line @typescript-eslint/no-require-imports, @typescript-eslint/no-var-requires const initJsep = require('./jsep/init').init; - if (epName === 'webgpu') { + if (epName === 'webgpu' && !BUILD_DEFS.USE_WEBGPU_EP) { // perform WebGPU availability check if (typeof navigator === 'undefined' || !navigator.gpu) { throw new Error('WebGPU is not supported in current environment'); @@ -270,7 +279,7 @@ export const createSession = async ( const outputNamesUTF8Encoded = []; try { - [sessionOptionsHandle, allocs] = setSessionOptions(options); + [sessionOptionsHandle, allocs] = await setSessionOptions(options); if (options?.externalData && wasm.mountExternalData) { const loadingPromises = []; @@ -278,7 +287,7 @@ export const createSession = async ( const path = typeof file === 'string' ? file : file.path; loadingPromises.push( loadFile(typeof file === 'string' ? file : file.data).then((data) => { - wasm.mountExternalData!(path, data); + wasm.mountExternalData(path, data); }), ); } @@ -312,6 +321,7 @@ export const createSession = async ( } sessionHandle = await wasm._OrtCreateSession(modelDataOffset, modelDataLength, sessionOptionsHandle); + wasm.webgpuOnCreateSession?.(sessionHandle); if (sessionHandle === 0) { checkLastError("Can't create a session."); } @@ -444,6 +454,7 @@ export const releaseSession = (sessionId: number): void => { } wasm.jsepOnReleaseSession?.(sessionId); + wasm.webgpuOnReleaseSession?.(sessionId); inputNamesUTF8Encoded.forEach((buf) => wasm._OrtFree(buf)); outputNamesUTF8Encoded.forEach((buf) => wasm._OrtFree(buf)); @@ -491,11 +502,20 @@ export const prepareInputOutputTensor = async ( const gpuBuffer = tensor[2].gpuBuffer; dataByteLength = calculateTensorSizeInBytes(tensorDataTypeStringToEnum(dataType), dims)!; - const registerBuffer = wasm.jsepRegisterBuffer; - if (!registerBuffer) { - throw new Error('Tensor location "gpu-buffer" is not supported without using WebGPU.'); + if (BUILD_DEFS.USE_WEBGPU_EP) { + const registerBuffer = wasm.webgpuRegisterBuffer; + if (!registerBuffer) { + throw new Error('Tensor location "gpu-buffer" is not supported without using WebGPU.'); + } + + rawData = registerBuffer(gpuBuffer, sessionId); + } else { + const registerBuffer = wasm.jsepRegisterBuffer; + if (!registerBuffer) { + throw new Error('Tensor location "gpu-buffer" is not supported without using WebGPU.'); + } + rawData = registerBuffer(sessionId, index, gpuBuffer, dataByteLength); } - rawData = registerBuffer(sessionId, index, gpuBuffer, dataByteLength); } else if (location === 'ml-tensor') { const mlTensor = tensor[2].mlTensor as MLTensor; dataByteLength = calculateTensorSizeInBytes(tensorDataTypeStringToEnum(dataType), dims)!; @@ -791,7 +811,7 @@ export const run = async ( // If a certain output's preferred location is GPU but the tensor is empty, we still need to create a CPU // tensor for it. There is no mapping GPU buffer for an empty tensor. if (preferredLocation === 'gpu-buffer' && size > 0) { - const getBuffer = wasm.jsepGetBuffer; + const getBuffer = BUILD_DEFS.USE_WEBGPU_EP ? wasm.webgpuGetBuffer : wasm.jsepGetBuffer; if (!getBuffer) { throw new Error('preferredLocation "gpu-buffer" is not supported without using WebGPU.'); } @@ -804,20 +824,43 @@ export const run = async ( // do not release the tensor right now. it will be released when user calls tensor.dispose(). keepOutputTensor = true; - output.push([ - type, - dims, - { - gpuBuffer, - download: wasm.jsepCreateDownloader!(gpuBuffer, bufferSize, type), - dispose: () => { - if (wasm._OrtReleaseTensor(tensor) !== 0) { - checkLastError("Can't release tensor."); - } + if (BUILD_DEFS.USE_WEBGPU_EP) { + wasm.webgpuRegisterBuffer!(gpuBuffer, sessionId, dataOffset); + const downloadDataFunction = wasm.webgpuCreateDownloader!(gpuBuffer, bufferSize, sessionId); + output.push([ + type, + dims, + { + gpuBuffer, + download: async () => { + const arrayBuffer = await downloadDataFunction(); + const data = new (tensorTypeToTypedArrayConstructor(type!))(arrayBuffer); + return data as Tensor.DataTypeMap[Tensor.GpuBufferDataTypes]; + }, + dispose: () => { + if (wasm._OrtReleaseTensor(tensor) !== 0) { + checkLastError("Can't release tensor."); + } + }, }, - }, - 'gpu-buffer', - ]); + 'gpu-buffer', + ]); + } else { + output.push([ + type, + dims, + { + gpuBuffer, + download: wasm.jsepCreateDownloader!(gpuBuffer, bufferSize, type), + dispose: () => { + if (wasm._OrtReleaseTensor(tensor) !== 0) { + checkLastError("Can't release tensor."); + } + }, + }, + 'gpu-buffer', + ]); + } } else if (preferredLocation === 'ml-tensor' && size > 0) { const ensureTensor = wasm.jsepEnsureTensor; if (!ensureTensor) { @@ -887,6 +930,18 @@ export const run = async ( } finally { wasm.stackRestore(beforeRunStack); + if (BUILD_DEFS.USE_WEBGPU_EP) { + inputTensors.forEach((t) => { + if (t && t[3] === 'gpu-buffer') { + wasm.webgpuUnregisterBuffer!(t[2].gpuBuffer); + } + }); + outputTensors.forEach((t) => { + if (t && t[3] === 'gpu-buffer') { + wasm.webgpuUnregisterBuffer!(t[2].gpuBuffer); + } + }); + } inputTensorHandles.forEach((v) => wasm._OrtReleaseTensor(v)); outputTensorHandles.forEach((v) => wasm._OrtReleaseTensor(v)); inputOutputAllocs.forEach((p) => wasm._free(p)); diff --git a/js/web/lib/wasm/wasm-types.ts b/js/web/lib/wasm/wasm-types.ts index b4871e145f4d7..9b2ec71fd351d 100644 --- a/js/web/lib/wasm/wasm-types.ts +++ b/js/web/lib/wasm/wasm-types.ts @@ -41,18 +41,6 @@ export declare namespace JSEP { type DownloadTensorFunction = (tensorId: number, dstBuffer: ArrayBufferView | ArrayBuffer) => Promise; export interface Module extends WebGpuModule, WebNnModule { - /** - * Mount the external data file to an internal map, which will be used during session initialization. - * - * @param externalDataFilePath - specify the relative path of the external data file. - * @param externalDataFileData - specify the content data. - */ - mountExternalData(externalDataFilePath: string, externalDataFileData: Uint8Array): void; - /** - * Unmount all external data files from the internal map. - */ - unmountExternalData(): void; - /** * This is the entry of JSEP initialization. This function is called once when initializing ONNX Runtime per * backend. This function initializes Asyncify support. If name is 'webgpu', also initializes WebGPU backend and @@ -294,6 +282,21 @@ export declare namespace JSEP { } } +export declare namespace WebGpu { + export interface Module { + webgpuInit(setDefaultDevice: (device: GPUDevice) => void): void; + webgpuRegisterDevice( + device?: GPUDevice, + ): undefined | [deviceId: number, instanceHandle: number, deviceHandle: number]; + webgpuOnCreateSession(sessionHandle: number): void; + webgpuOnReleaseSession(sessionHandle: number): void; + webgpuRegisterBuffer(buffer: GPUBuffer, sessionHandle: number, bufferHandle?: number): number; + webgpuUnregisterBuffer(buffer: GPUBuffer): void; + webgpuGetBuffer(bufferHandle: number): GPUBuffer; + webgpuCreateDownloader(gpuBuffer: GPUBuffer, size: number, sessionHandle: number): () => Promise; + } +} + export interface OrtInferenceAPIs { _OrtInit(numThreads: number, loggingLevel: number): number; @@ -358,7 +361,13 @@ export interface OrtInferenceAPIs { logVerbosityLevel: number, optimizedModelFilePath: number, ): number; - _OrtAppendExecutionProvider(sessionOptionsHandle: number, name: number): number; + _OrtAppendExecutionProvider( + sessionOptionsHandle: number, + name: number, + providerOptionsKeys: number, + providerOptionsValues: number, + numKeys: number, + ): Promise; _OrtAddFreeDimensionOverride(sessionOptionsHandle: number, name: number, dim: number): number; _OrtAddSessionConfigEntry(sessionOptionsHandle: number, configKey: number, configValue: number): number; _OrtReleaseSessionOptions(sessionOptionsHandle: number): number; @@ -373,8 +382,11 @@ export interface OrtInferenceAPIs { /** * The interface of the WebAssembly module for ONNX Runtime, compiled from C++ source code by Emscripten. */ -export interface OrtWasmModule extends EmscriptenModule, OrtInferenceAPIs, Partial { - PTR_SIZE: number; +export interface OrtWasmModule + extends EmscriptenModule, + OrtInferenceAPIs, + Partial, + Partial { // #region emscripten functions stackSave(): number; stackRestore(stack: number): void; @@ -387,7 +399,31 @@ export interface OrtWasmModule extends EmscriptenModule, OrtInferenceAPIs, Parti stringToUTF8(str: string, offset: number, maxBytes: number): void; // #endregion + // #region ORT shared + + readonly PTR_SIZE: 4 | 8; + + /** + * Mount the external data file to an internal map, which will be used during session initialization. + * + * @param externalDataFilePath - specify the relative path of the external data file. + * @param externalDataFileData - specify the content data. + */ + mountExternalData(externalDataFilePath: string, externalDataFileData: Uint8Array): void; + /** + * Unmount all external data files from the internal map. + */ + unmountExternalData(): void; + + /** + * This function patches the WebAssembly module to support Asyncify. This function should be called at least once + * before any ORT API is called. + */ + asyncInit?(): void; + + // #endregion + // #region config - numThreads?: number; + readonly numThreads?: number; // #endregion } diff --git a/js/web/script/build.ts b/js/web/script/build.ts index 6006de62b41b6..fd9224a2dcf8b 100644 --- a/js/web/script/build.ts +++ b/js/web/script/build.ts @@ -27,7 +27,8 @@ const args = minimist(process.argv.slice(2)); * --bundle-mode=node * Build a single ort-web bundle for nodejs. */ -const BUNDLE_MODE: 'prod' | 'dev' | 'perf' | 'node' = args['bundle-mode'] || 'prod'; +const BUNDLE_MODE: 'prod' | 'dev' | 'perf' | 'node' = + process.env.npm_config_bundle_mode || args['bundle-mode'] || 'prod'; /** * --debug @@ -41,7 +42,18 @@ const BUNDLE_MODE: 'prod' | 'dev' | 'perf' | 'node' = args['bundle-mode'] || 'pr * Enable debug mode. In this mode, esbuild metafile feature will be enabled. Full bundle analysis will be saved to a * file as JSON. */ -const DEBUG = args.debug; // boolean|'verbose'|'save' +const DEBUG = process.env.npm_config_debug || args.debug; // boolean|'verbose'|'save' + +/** + * --webgpu-ep + * --no-webgpu-ep (default) + * + * Enable or disable the use of WebGPU EP. If enabled, the WebGPU EP will be used. If disabled, the WebGPU backend will + * be used with JSEP. + * + * (temporary) This flag is used to test the WebGPU EP integration. It will be removed in the future. + */ +const USE_WEBGPU_EP = process.env.npm_config_webgpu_ep ?? args['webgpu-ep'] ?? true; /** * Root folder of the source code: `/js/` @@ -57,6 +69,7 @@ const DEFAULT_DEFINE = { 'BUILD_DEFS.DISABLE_WASM': 'false', 'BUILD_DEFS.DISABLE_WASM_PROXY': 'false', 'BUILD_DEFS.ENABLE_BUNDLE_WASM_JS': 'false', + 'BUILD_DEFS.USE_WEBGPU_EP': JSON.stringify(!!USE_WEBGPU_EP), 'BUILD_DEFS.IS_ESM': 'false', 'BUILD_DEFS.ESM_IMPORT_META_URL': 'undefined', diff --git a/onnxruntime/core/framework/external_data_loader.cc b/onnxruntime/core/framework/external_data_loader.cc index fe73a55735631..c577805e69cc4 100644 --- a/onnxruntime/core/framework/external_data_loader.cc +++ b/onnxruntime/core/framework/external_data_loader.cc @@ -60,7 +60,12 @@ common::Status LoadWebAssemblyExternalData(const Env& env, break; case 1: // Load external data to GPU. - Module.jsepUploadExternalBuffer(dataIdOrBuffer, data); + // TODO: use a unified interface for upload external buffer. + if (Module.webgpuUploadExternalBuffer) { + Module.webgpuUploadExternalBuffer(dataIdOrBuffer, data); + } else { + Module.jsepUploadExternalBuffer(dataIdOrBuffer, data); + } break; default: return 4; // Unknown error occurred in memory copy. diff --git a/onnxruntime/core/framework/external_data_loader.h b/onnxruntime/core/framework/external_data_loader.h index 117da7d0a4afa..90d48ca800797 100644 --- a/onnxruntime/core/framework/external_data_loader.h +++ b/onnxruntime/core/framework/external_data_loader.h @@ -42,7 +42,7 @@ class IExternalDataLoader { enum class ExternalDataLoadType { CPU = 0, -#if defined(USE_JSEP) +#if defined(USE_JSEP) || defined(USE_WEBGPU) WEBGPU_BUFFER = 1, #endif }; diff --git a/onnxruntime/core/providers/webgpu/program.cc b/onnxruntime/core/providers/webgpu/program.cc index d1d4c242c4697..976b7927ac3dd 100644 --- a/onnxruntime/core/providers/webgpu/program.cc +++ b/onnxruntime/core/providers/webgpu/program.cc @@ -206,6 +206,26 @@ ProgramVariableDataType ToProgramVariableDataType(int32_t element_type, int comp } } +std::ostream& operator<<(std::ostream& os, ValidationMode mode) { + switch (mode) { + case ValidationMode::Disabled: + os << "Disabled"; + break; + case ValidationMode::WGPUOnly: + os << "WGPUOnly"; + break; + case ValidationMode::Basic: + os << "Basic"; + break; + case ValidationMode::Full: + os << "Full"; + break; + default: + os << "Unknown(" << static_cast(mode) << ")"; + } + return os; +} + namespace { TensorShape GetReducedShape(const TensorShape& shape, int component /* > 1 */) { ORT_ENFORCE(shape.NumDimensions() > 0 && shape.GetDims()[shape.NumDimensions() - 1] % component == 0, diff --git a/onnxruntime/core/providers/webgpu/program.h b/onnxruntime/core/providers/webgpu/program.h index 7bfd9e8800099..95fef36144025 100644 --- a/onnxruntime/core/providers/webgpu/program.h +++ b/onnxruntime/core/providers/webgpu/program.h @@ -237,6 +237,7 @@ enum class ValidationMode { Basic, Full }; +std::ostream& operator<<(std::ostream& os, ValidationMode mode); namespace details { class ProgramWrapper; diff --git a/onnxruntime/core/providers/webgpu/webgpu_context.cc b/onnxruntime/core/providers/webgpu/webgpu_context.cc index 50ace96524ddf..bac738462c912 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_context.cc +++ b/onnxruntime/core/providers/webgpu/webgpu_context.cc @@ -134,6 +134,8 @@ void WebGpuContext::Initialize(const WebGpuBufferCacheConfig& buffer_cache_confi ORT_ENFORCE(device_ != nullptr, "Failed to get a WebGPU device."); } + LOGS_DEFAULT(VERBOSE) << "WebGPU EP Context is created for: Instance=" << instance_.Get() << ", Device=" << device_.Get() << "."; + // cache adapter info ORT_ENFORCE(Device().GetAdapterInfo(&adapter_info_)); // cache device limits @@ -707,45 +709,46 @@ WebGpuContext& WebGpuContextFactory::CreateContext(const WebGpuContextConfig& co WGPUInstance instance = config.instance; WGPUDevice device = config.device; - if (context_id == 0) { - // context ID is preserved for the default context. User cannot use context ID 0 as a custom context. - ORT_ENFORCE(instance == nullptr && device == nullptr, - "WebGPU EP default context (contextId=0) must not have custom WebGPU instance or device."); - - std::call_once(init_default_flag_, [ + std::call_once(init_default_flag_, [ #if !defined(__wasm__) - dawn_proc_table = config.dawn_proc_table + dawn_proc_table = config.dawn_proc_table #endif - ]() { - // Step.1 - setup dawn proc table (only for non-WASM build) + ]() { + // Step.1 - setup dawn proc table (only for non-WASM build) #if !defined(__wasm__) - const DawnProcTable* dawn_procs = reinterpret_cast(dawn_proc_table); + const DawnProcTable* dawn_procs = reinterpret_cast(dawn_proc_table); #if defined(BUILD_DAWN_MONOLITHIC_LIBRARY) - ORT_ENFORCE(dawn_procs == nullptr, "setting DawnProcTable is not allowed when dynamically linked to webgpu_dawn."); + ORT_ENFORCE(dawn_procs == nullptr, "setting DawnProcTable is not allowed when dynamically linked to webgpu_dawn."); #else #if !defined(USE_EXTERNAL_DAWN) - if (dawn_procs == nullptr) { - dawn_procs = &dawn::native::GetProcs(); - } + if (dawn_procs == nullptr) { + dawn_procs = &dawn::native::GetProcs(); + } #else - ORT_ENFORCE(dawn_procs != nullptr, "DawnProcTable must be provided."); + ORT_ENFORCE(dawn_procs != nullptr, "DawnProcTable must be provided."); #endif - dawnProcSetProcs(dawn_procs); + dawnProcSetProcs(dawn_procs); #endif #endif - // Step.2 - Create wgpu::Instance + // Step.2 - Create wgpu::Instance #if !defined(__wasm__) - wgpu::InstanceDescriptor instance_desc{}; - instance_desc.features.timedWaitAnyEnable = true; - default_instance_ = wgpu::CreateInstance(&instance_desc); + wgpu::InstanceDescriptor instance_desc{}; + instance_desc.features.timedWaitAnyEnable = true; + default_instance_ = wgpu::CreateInstance(&instance_desc); #else - default_instance_ = wgpu::CreateInstance(nullptr); + default_instance_ = wgpu::CreateInstance(nullptr); #endif - ORT_ENFORCE(default_instance_ != nullptr, "Failed to create wgpu::Instance."); - }); + ORT_ENFORCE(default_instance_ != nullptr, "Failed to create wgpu::Instance."); + }); + + if (context_id == 0) { + // context ID is preserved for the default context. User cannot use context ID 0 as a custom context. + ORT_ENFORCE(instance == nullptr && device == nullptr, + "WebGPU EP default context (contextId=0) must not have custom WebGPU instance or device."); + instance = default_instance_.Get(); } else { // for context ID > 0, user must provide custom WebGPU instance and device. @@ -799,5 +802,9 @@ void CleanupWebGpuContexts() { WebGpuContextFactory::Cleanup(); } +WGPUDevice GetDevice(int context_id) { + return WebGpuContextFactory::GetContext(context_id).Device().Get(); +} + } // namespace webgpu } // namespace onnxruntime diff --git a/onnxruntime/core/providers/webgpu/webgpu_provider_factory.cc b/onnxruntime/core/providers/webgpu/webgpu_provider_factory.cc index 60c61b2ca5665..1d779152f91f3 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_provider_factory.cc +++ b/onnxruntime/core/providers/webgpu/webgpu_provider_factory.cc @@ -151,6 +151,12 @@ std::shared_ptr WebGpuProviderFactoryCreator::Create( validation_mode, }; + LOGS_DEFAULT(VERBOSE) << "WebGPU EP Device ID: " << context_id; + LOGS_DEFAULT(VERBOSE) << "WebGPU EP WGPUInstance: " << webgpu_instance; + LOGS_DEFAULT(VERBOSE) << "WebGPU EP WGPUDevice: " << webgpu_device; + LOGS_DEFAULT(VERBOSE) << "WebGPU EP DawnProcTable: " << dawn_proc_table; + LOGS_DEFAULT(VERBOSE) << "WebGPU EP ValidationMode: " << validation_mode; + // // STEP.3 - prepare parameters for WebGPU context initialization. // diff --git a/onnxruntime/wasm/api.cc b/onnxruntime/wasm/api.cc index 7adfc6a2b2ccb..1ad35b51bb1c1 100644 --- a/onnxruntime/wasm/api.cc +++ b/onnxruntime/wasm/api.cc @@ -8,6 +8,14 @@ #include "core/session/onnxruntime_cxx_api.h" #include "api.h" +#ifdef USE_WEBGPU +namespace onnxruntime { +namespace webgpu { +WGPUDevice GetDevice(int); +} +} // namespace onnxruntime +#endif + #include #include #include @@ -164,8 +172,12 @@ OrtSessionOptions* OrtCreateSessionOptions(size_t graph_optimization_level, return UNREGISTER_AUTO_RELEASE(session_options); } -int OrtAppendExecutionProvider(ort_session_options_handle_t session_options, const char* name) { - return CHECK_STATUS(SessionOptionsAppendExecutionProvider, session_options, name, nullptr, nullptr, 0); +int OrtAppendExecutionProvider(ort_session_options_handle_t session_options, + const char* name, + const char* const* provider_options_keys, + const char* const* provider_options_values, + size_t num_keys) { + return CHECK_STATUS(SessionOptionsAppendExecutionProvider, session_options, name, provider_options_keys, provider_options_values, num_keys); } int OrtAddFreeDimensionOverride(ort_session_options_handle_t session_options, @@ -507,6 +519,16 @@ char* OrtEndProfiling(ort_session_handle_t session) { : nullptr; } +// WebGPU API Section + +#ifdef USE_WEBGPU + +WGPUDevice OrtGetWebGpuDevice(int device_id) { + return onnxruntime::webgpu::GetDevice(device_id); +} + +#endif + // Training API Section #ifdef ENABLE_TRAINING_APIS diff --git a/onnxruntime/wasm/api.h b/onnxruntime/wasm/api.h index f44c515d98f6b..9ff1eb55ecedc 100644 --- a/onnxruntime/wasm/api.h +++ b/onnxruntime/wasm/api.h @@ -10,6 +10,10 @@ #include +#ifdef USE_WEBGPU +#include +#endif + #include struct OrtSession; @@ -85,7 +89,10 @@ ort_session_options_handle_t EMSCRIPTEN_KEEPALIVE OrtCreateSessionOptions(size_t * @returns ORT error code. If not zero, call OrtGetLastError() to get detailed error message. */ int EMSCRIPTEN_KEEPALIVE OrtAppendExecutionProvider(ort_session_options_handle_t session_options, - const char* name); + const char* name, + const char* const* provider_options_keys, + const char* const* provider_options_values, + size_t num_keys); /** * add a free dimension override for one dimension of a session's input. @@ -294,6 +301,21 @@ int EMSCRIPTEN_KEEPALIVE OrtRun(ort_session_handle_t session, */ char* EMSCRIPTEN_KEEPALIVE OrtEndProfiling(ort_session_handle_t session); +// WebGPU API Section + +#ifdef USE_WEBGPU + +/** + * get the GPU Device by device ID. + * + * This function is only available after the GPU Device is initialized in WebGpuContextFactory. + * + * @returns a WGPUDevice handle. + */ +WGPUDevice EMSCRIPTEN_KEEPALIVE OrtGetWebGpuDevice(int device_id); + +#endif + // Training API Section #ifdef ENABLE_TRAINING_APIS diff --git a/onnxruntime/wasm/js_post_js.js b/onnxruntime/wasm/js_post_js.js index b77d82fbd7d10..be5a4d3c7415a 100644 --- a/onnxruntime/wasm/js_post_js.js +++ b/onnxruntime/wasm/js_post_js.js @@ -2,6 +2,6 @@ // Licensed under the MIT License. -'use strict'; +"use strict"; Module["PTR_SIZE"] = 4; diff --git a/onnxruntime/wasm/js_post_js_64.js b/onnxruntime/wasm/js_post_js_64.js index b140df927ebbd..b16383b746b8a 100644 --- a/onnxruntime/wasm/js_post_js_64.js +++ b/onnxruntime/wasm/js_post_js_64.js @@ -2,6 +2,6 @@ // Licensed under the MIT License. -'use strict'; +"use strict"; Module["PTR_SIZE"] = 8; diff --git a/onnxruntime/wasm/post-webgpu.js b/onnxruntime/wasm/post-webgpu.js new file mode 100644 index 0000000000000..2f2eb250c4417 --- /dev/null +++ b/onnxruntime/wasm/post-webgpu.js @@ -0,0 +1,238 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +"use strict"; + +// +// This file contains the post-run code for the ORT WebAssembly module. The code in this file will be injected into the +// final module using Emscripten's `--post-js` option. +// +// This file will only be used in build with flag `--use_webgpu`. + +/** + * a map from GPUDevice to [deviceId, instanceHandle, deviceHandle] + * + * only stores custom devices (ie. devices created by the user, not the default device created by ORT) + * + * key is the GPUDevice object. + * + * value is a tuple of 3 elements: + * - deviceId: a unique ID for the device. Must be positive integer. + * - instanceHandle: the instance handle(pointer) of the device. + * - deviceHandle: the device handle(pointer) of the device. + * + * @type {WeakMap} + */ +const webgpuActiveDevices = new WeakMap(); +/** + * a number that is used to assign a unique ID to the next custom device. + */ +let webgpuNextDeviceId = 1; +/** + * a function to set the default device. + * + * @type {(gpuDevice: GPUDevice) => void} + */ +let webgpuSetDefaultDevice = undefined; +/** + * the current device that is being used to create a WebGPU EP inference session. + * + * the value of this variable is only valid during the creation of a WebGPU EP inference session. + * + * @type {GPUDevice|undefined} + */ +let webgpuCurrentDevice = undefined; +/** + * the current device ID that is being used to create a WebGPU EP inference session. + * + * the value of this variable is only valid during the creation of a WebGPU EP inference session. + * + * @type {number|undefined} + */ +let webgpuCurrentDeviceId = undefined; + +/** + * This function is called only once when initializing the WebGPU backend. + * + * @param {(gpuDevice: GPUDevice) => void} setDefaultDevice A callback function to set the default device. + */ +Module["webgpuInit"] = (setDefaultDevice) => { + webgpuSetDefaultDevice = setDefaultDevice; + + // Setup a callback function for loading external buffers (model weights). + Module.webgpuUploadExternalBuffer = (bufferHandle, data) => { + const srcArrayBuffer = data.buffer; + const srcOffset = data.byteOffset; + const srcLength = data.byteLength; + const size = Math.ceil(Number(srcLength) / 16) * 16; + + const gpuBuffer = WebGPU.getJsObject(bufferHandle); + + // get current device + if (!webgpuCurrentDevice) { + const deviceHandle = _OrtGetWebGpuDevice(webgpuCurrentDeviceId); + webgpuCurrentDevice = WebGPU.getJsObject(deviceHandle); + } + + // create gpu buffer + const gpuBufferForUploading = webgpuCurrentDevice.createBuffer( + { mappedAtCreation: true, size, usage: GPUBufferUsage.MAP_WRITE | GPUBufferUsage.COPY_SRC }, + ); + + // copy (upload) data + const arrayBuffer = gpuBufferForUploading.getMappedRange(); + new Uint8Array(arrayBuffer).set(new Uint8Array(srcArrayBuffer, srcOffset, srcLength)); + gpuBufferForUploading.unmap(); + + // GPU copy + const commandEncoder = webgpuCurrentDevice.createCommandEncoder(); + commandEncoder.copyBufferToBuffer(gpuBufferForUploading, 0, gpuBuffer, 0, size); + webgpuCurrentDevice.queue.submit([commandEncoder.finish()]); + gpuBufferForUploading.destroy(); + }; +}; + +/** + * This function is called only when a custom device is used, during preparation of session options. + * + * @param {GPUDevice} device the user provided device object. + * @returns {undefined|[number, number, number]} a tuple of device id, instance handle, and device handle. + */ +Module["webgpuRegisterDevice"] = (device) => { + if (webgpuCurrentDeviceId !== undefined) { + throw new Error("another WebGPU EP inference session is being created."); + } + + if (device) { + let deviceInfo = webgpuActiveDevices.get(device); + if (!deviceInfo) { + const instanceHandle = _wgpuCreateInstance(0); + const deviceHandle = WebGPU.importJsDevice(device, instanceHandle); + deviceInfo = [webgpuNextDeviceId++, instanceHandle, deviceHandle]; + webgpuActiveDevices.set(device, deviceInfo); + } + + // The current device ID is a temporary storage for the device ID to be used in the session that is being created. + // + // Soon after `webgpuRegisterDevice` (this function) is called, `webgpuOnCreateSession` will be called so that the + // value of `webgpuCurrentDeviceId` is used and reset then. + webgpuCurrentDevice = device; + webgpuCurrentDeviceId = deviceInfo[0]; + return deviceInfo; + } else { + webgpuCurrentDevice = undefined; + webgpuCurrentDeviceId = 0; + return undefined; + } +}; + +const webgpuActiveSessions = new Map(); +Module["webgpuOnCreateSession"] = (sessionHandle) => { + if (webgpuCurrentDeviceId === undefined) { + // do nothing if webgpuCurrentDeviceId is undefined. + // this means no WebGPU EP is being created. + return; + } + + const deviceId = webgpuCurrentDeviceId; + webgpuCurrentDeviceId = undefined; + + if (sessionHandle) { + // when session created successfully + const deviceHandle = _OrtGetWebGpuDevice(deviceId); + webgpuActiveSessions.set(sessionHandle, deviceHandle); + + if (deviceId === 0) { + const device = webgpuCurrentDevice ?? WebGPU.getJsObject(deviceHandle); + webgpuSetDefaultDevice(device); + } + } + webgpuCurrentDevice = undefined; +}; + +Module["webgpuOnReleaseSession"] = (sessionHandle) => { + webgpuActiveSessions.delete(sessionHandle); +}; + +const gpuBufferMetadataSymbol = Symbol("gpuBufferMetadata"); + +Module["webgpuRegisterBuffer"] = (buffer, sessionHandle, bufferHandle) => { + const metadata = buffer[gpuBufferMetadataSymbol]; + if (bufferHandle) { + // This is a buffer that was created by ORT. Metadata is [bufferHandle, NaN] + + buffer[gpuBufferMetadataSymbol] = [bufferHandle, NaN]; + return bufferHandle; + } else { + // This is a buffer that was created by the user. Metadata is [bufferHandle, refCount] + + if (metadata) { + metadata[1]++; + return metadata[0]; + } + + const deviceHandle = webgpuActiveSessions.get(sessionHandle); + if (deviceHandle === undefined) { + throw new Error("Invalid session handle passed to webgpuRegisterBuffer"); + } + + const bufferHandle = WebGPU.importJsBuffer(buffer, deviceHandle); + buffer[gpuBufferMetadataSymbol] = [bufferHandle, 1]; + return bufferHandle; + } +}; + +Module["webgpuUnregisterBuffer"] = (buffer) => { + const metadata = buffer[gpuBufferMetadataSymbol]; + if (!metadata) { + throw new Error("Buffer is not registered"); + } + metadata[1]--; + if (metadata[1] === 0) { + // For buffers created by ORT, metadata[1] will always be NaN. This function will not release the buffer. + // Instead, the buffer will be released when user calls `Tensor.dispose()` in JavaScript. + _wgpuBufferRelease(metadata[0]); + delete buffer[gpuBufferMetadataSymbol]; + } +}; + +Module["webgpuGetBuffer"] = (bufferHandle) => { + return WebGPU.getJsObject(bufferHandle); +}; + +Module["webgpuCreateDownloader"] = (gpuBuffer, bufferSize, sessionHandle) => { + const deviceHandle = webgpuActiveSessions.get(sessionHandle); + if (deviceHandle === undefined) { + throw new Error("Invalid session handle passed to webgpuRegisterBuffer"); + } + + const buffer = gpuBuffer; + const device = WebGPU.getJsObject(deviceHandle); + const originalSize = bufferSize; + const size = Math.ceil(Number(originalSize) / 16) * 16; + + return async () => { + const gpuReadBuffer = device.createBuffer({ + size, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ, + }); + try { + const commandEncoder = device.createCommandEncoder(); + commandEncoder.copyBufferToBuffer( + buffer /* source buffer */, + 0 /* source offset */, + gpuReadBuffer /* destination buffer */, + 0 /* destination offset */, + size /* size */ + ); + device.queue.submit([commandEncoder.finish()]); + + await gpuReadBuffer.mapAsync(GPUMapMode.READ); + + const arrayBuffer = gpuReadBuffer.getMappedRange(); + return arrayBuffer.slice(0, originalSize); + } finally { + gpuReadBuffer.destroy(); + } + }; +}; diff --git a/onnxruntime/wasm/pre-async.js b/onnxruntime/wasm/pre-async.js new file mode 100644 index 0000000000000..a1e66d854d296 --- /dev/null +++ b/onnxruntime/wasm/pre-async.js @@ -0,0 +1,142 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +"use strict"; + +// +// This file contains the pre-run code for the ORT WebAssembly module. The code in this file will be injected into the +// final module using Emscripten's `--pre-js` option. +// +// This file will only be used in build with flag `-s ASYNCIFY=1`. + +/** + * initialize for asyncify support. + */ +let initAsyncImpl = () => { + // This is a simplified version of cwrap() with options.async === true (-sASYNCIFY=1) + // It removes some overhead in cwarp() and ccall() that we don't need. + // + // Currently in ASYNCIFY build, we only use this for the following functions: + // - OrtCreateSession() + // - OrtRun() + // - OrtRunWithBinding() + // - OrtBindInput() + // + // Note: about parameters "getFunc" and "setFunc": + // - Emscripten has different behaviors for Debug and Release builds for generating exported function wrapper. + // + // - In Debug build, it will generate a wrapper function for each exported function. For example, it generates a + // wrapper for OrtRun() like this (minified): + // ``` + // var _OrtRun = Module["_OrtRun"] = createExportWrapper("OrtRun"); + // ``` + // + // - In Release build, it will generate a lazy loading wrapper for each exported function. For example, it generates + // a wrapper for OrtRun() like this (minified): + // ``` + // d._OrtRun = (a, b, c, e, f, h, l, q) => (d._OrtRun = J.ka)(a, b, c, e, f, h, l, q); + // ``` + // + // The behavior of these two wrappers are different. The debug build will assign `Module["_OrtRun"]` only once + // because `createExportWrapper()` does not reset `Module["_OrtRun"]` inside. The release build, however, will + // reset d._OrtRun to J.ka when the first time it is called. + // + // The difference is important because we need to design the async wrapper in a way that it can handle both cases. + // + // Now, let's look at how the async wrapper is designed to work for both cases: + // + // - Debug build: + // 1. When Web assembly is being loaded, `Module["_OrtRun"]` is assigned to `createExportWrapper("OrtRun")`. + // 2. When the first time `Module["initAsync"]` is called, `Module["_OrtRun"]` is re-assigned to a new async + // wrapper function. + // Value of `Module["_OrtRun"]` will not be changed again. + // + // - Release build: + // 1. When Web assembly is being loaded, `Module["_OrtRun"]` is assigned to a lazy loading wrapper function. + // 2. When the first time `Module["initAsync"]` is called, `Module["_OrtRun"]` is re-assigned to a new async + // wrapper function. + // 3. When the first time `Module["_OrtRun"]` is called, the async wrapper will be called. It will call into this + // function: + // ``` + // (a, b, c, e, f, h, l, q) => (d._OrtRun = J.ka)(a, b, c, e, f, h, l, q); + // ``` + // This function will assign d._OrtRun (ie. the minimized `Module["_OrtRun"]`) to the real function (J.ka). + // 4. Since d._OrtRun is re-assigned, we need to update the async wrapper to re-assign its stored + // function to the updated value (J.ka), and re-assign the value of `d._OrtRun` back to the async wrapper. + // Value of `Module["_OrtRun"]` will not be changed again. + // + // The value of `Module["_OrtRun"]` will need to be assigned for 2 times for debug build and 4 times for release + // build. + // + // This is why we need this `getFunc` and `setFunc` parameters. They are used to get the current value of an + // exported function and set the new value of an exported function. + // + const wrapAsync = (func, getFunc, setFunc) => { + return (...args) => { + // cache the async data before calling the function. + const previousAsync = Asyncify.currData; + + const previousFunc = getFunc?.(); + const ret = func(...args); + const newFunc = getFunc?.(); + if (previousFunc !== newFunc) { + // The exported function has been updated. + // Set the sync function reference to the new function. + func = newFunc; + // Set the exported function back to the async wrapper. + setFunc(previousFunc); + // Remove getFunc and setFunc. They are no longer needed. + setFunc = null; + getFunc = null; + } + + // If the async data has been changed, it means that the function started an async operation. + if (Asyncify.currData != previousAsync) { + // returns the promise + return Asyncify.whenDone(); + } + // the function is synchronous. returns the result. + return ret; + }; + }; + + // replace the original functions with asyncified versions + Module["_OrtAppendExecutionProvider"] = wrapAsync( + Module["_OrtAppendExecutionProvider"], + () => Module["_OrtAppendExecutionProvider"], + (v) => (Module["_OrtAppendExecutionProvider"] = v) + ); + Module["_OrtCreateSession"] = wrapAsync( + Module["_OrtCreateSession"], + () => Module["_OrtCreateSession"], + (v) => (Module["_OrtCreateSession"] = v) + ); + Module["_OrtRun"] = wrapAsync( + Module["_OrtRun"], + () => Module["_OrtRun"], + (v) => (Module["_OrtRun"] = v) + ); + Module["_OrtRunWithBinding"] = wrapAsync( + Module["_OrtRunWithBinding"], + () => Module["_OrtRunWithBinding"], + (v) => (Module["_OrtRunWithBinding"] = v) + ); + Module["_OrtBindInput"] = wrapAsync( + Module["_OrtBindInput"], + () => Module["_OrtBindInput"], + (v) => (Module["_OrtBindInput"] = v) + ); + + // If JSEP is enabled, wrap OrtRun() and OrtRunWithBinding() with asyncify. + if (typeof jsepRunAsync !== "undefined") { + Module["_OrtRun"] = jsepRunAsync(Module["_OrtRun"]); + Module["_OrtRunWithBinding"] = jsepRunAsync(Module["_OrtRunWithBinding"]); + } + + // remove this function to make sure it is called only once. + initAsyncImpl = undefined; +}; + +Module["asyncInit"] = () => { + initAsyncImpl?.(); +}; diff --git a/onnxruntime/wasm/pre-jsep.js b/onnxruntime/wasm/pre-jsep.js index 0c83e71a921cb..a35ab129280c4 100644 --- a/onnxruntime/wasm/pre-jsep.js +++ b/onnxruntime/wasm/pre-jsep.js @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -'use strict'; +"use strict"; // // This file contains the pre-run code for the ORT WebAssembly module. The code in this file will be injected into the @@ -9,247 +9,151 @@ // // This file will only be used in build with flag `--use_jsep`. - -/** - * initialize JSEP for asyncify support. - */ -let jsepInitAsync = () => { - // This is a simplified version of cwrap() with options.async === true (-sASYNCIFY=1) - // It removes some overhead in cwarp() and ccall() that we don't need. - // - // Currently in JSEP build, we only use this for the following functions: - // - OrtRun() - // - OrtRunWithBinding() - // - OrtBindInput() - // - // Note: about parameters "getFunc" and "setFunc": - // - Emscripten has different behaviors for Debug and Release builds for generating exported function wrapper. - // - // - In Debug build, it will generate a wrapper function for each exported function. For example, it generates a - // wrapper for OrtRun() like this (minified): - // ``` - // var _OrtRun = Module["_OrtRun"] = createExportWrapper("OrtRun"); - // ``` - // - // - In Release build, it will generate a lazy loading wrapper for each exported function. For example, it generates - // a wrapper for OrtRun() like this (minified): - // ``` - // d._OrtRun = (a, b, c, e, f, h, l, q) => (d._OrtRun = J.ka)(a, b, c, e, f, h, l, q); - // ``` - // - // The behavior of these two wrappers are different. The debug build will assign `Module["_OrtRun"]` only once - // because `createExportWrapper()` does not reset `Module["_OrtRun"]` inside. The release build, however, will - // reset d._OrtRun to J.ka when the first time it is called. - // - // The difference is important because we need to design the async wrapper in a way that it can handle both cases. - // - // Now, let's look at how the async wrapper is designed to work for both cases: - // - // - Debug build: - // 1. When Web assembly is being loaded, `Module["_OrtRun"]` is assigned to `createExportWrapper("OrtRun")`. - // 2. When the first time `Module["jsepInit"]` is called, `Module["_OrtRun"]` is re-assigned to a new async - // wrapper function. - // Value of `Module["_OrtRun"]` will not be changed again. - // - // - Release build: - // 1. When Web assembly is being loaded, `Module["_OrtRun"]` is assigned to a lazy loading wrapper function. - // 2. When the first time `Module["jsepInit"]` is called, `Module["_OrtRun"]` is re-assigned to a new async - // wrapper function. - // 3. When the first time `Module["_OrtRun"]` is called, the async wrapper will be called. It will call into this - // function: - // ``` - // (a, b, c, e, f, h, l, q) => (d._OrtRun = J.ka)(a, b, c, e, f, h, l, q); - // ``` - // This function will assign d._OrtRun (ie. the minimized `Module["_OrtRun"]`) to the real function (J.ka). - // 4. Since d._OrtRun is re-assigned, we need to update the async wrapper to re-assign its stored - // function to the updated value (J.ka), and re-assign the value of `d._OrtRun` back to the async wrapper. - // Value of `Module["_OrtRun"]` will not be changed again. - // - // The value of `Module["_OrtRun"]` will need to be assigned for 2 times for debug build and 4 times for release - // build. - // - // This is why we need this `getFunc` and `setFunc` parameters. They are used to get the current value of an - // exported function and set the new value of an exported function. - // - const jsepWrapAsync = (func, getFunc, setFunc) => { - return (...args) => { - // cache the async data before calling the function. - const previousAsync = Asyncify.currData; - - const previousFunc = getFunc?.(); - const ret = func(...args); - const newFunc = getFunc?.(); - if (previousFunc !== newFunc) { - // The exported function has been updated. - // Set the sync function reference to the new function. - func = newFunc; - // Set the exported function back to the async wrapper. - setFunc(previousFunc); - // Remove getFunc and setFunc. They are no longer needed. - setFunc = null; - getFunc = null; +// This is a wrapper for OrtRun() and OrtRunWithBinding() to ensure that Promises are handled correctly. +const jsepRunAsync = (runAsyncFunc) => { + return async (...args) => { + try { + // Module.jsepSessionState should be null, unless we are in the middle of a session. + // If it is not null, it means that the previous session has not finished yet. + if (Module.jsepSessionState) { + throw new Error("Session already started"); } + const state = (Module.jsepSessionState = { + sessionHandle: args[0], + errors: [], + }); - // If the async data has been changed, it means that the function started an async operation. - if (Asyncify.currData != previousAsync) { - // returns the promise - return Asyncify.whenDone(); - } - // the function is synchronous. returns the result. - return ret; - }; - }; - - // This is a wrapper for OrtRun() and OrtRunWithBinding() to ensure that Promises are handled correctly. - const runAsync = (runAsyncFunc) => { - return async (...args) => { - try { - // Module.jsepSessionState should be null, unless we are in the middle of a session. - // If it is not null, it means that the previous session has not finished yet. - if (Module.jsepSessionState) { - throw new Error('Session already started'); - } - const state = Module.jsepSessionState = {sessionHandle: args[0], errors: []}; + // Run the acyncified function: OrtRun() or OrtRunWithBinding() + const ret = await runAsyncFunc(...args); - // Run the acyncified function: OrtRun() or OrtRunWithBinding() - const ret = await runAsyncFunc(...args); - - // Check if the session is still valid. this object should be the same as the one we set above. - if (Module.jsepSessionState !== state) { - throw new Error('Session mismatch'); - } + // Check if the session is still valid. this object should be the same as the one we set above. + if (Module.jsepSessionState !== state) { + throw new Error("Session mismatch"); + } - // Flush the backend. This will submit all pending commands to the GPU. - Module.jsepBackend?.['flush'](); + // Flush the backend. This will submit all pending commands to the GPU. + Module.jsepBackend?.["flush"](); - // Await all pending promises. This includes GPU validation promises for diagnostic purposes. - const errorPromises = state.errors; - if (errorPromises.length > 0) { - let errors = await Promise.all(errorPromises); - errors = errors.filter(e => e); - if (errors.length > 0) { - throw new Error(errors.join('\n')); - } + // Await all pending promises. This includes GPU validation promises for diagnostic purposes. + const errorPromises = state.errors; + if (errorPromises.length > 0) { + let errors = await Promise.all(errorPromises); + errors = errors.filter((e) => e); + if (errors.length > 0) { + throw new Error(errors.join("\n")); } - - return ret; - } finally { - Module.jsepSessionState = null; } - }; - }; - // replace the original functions with asyncified versions - Module['_OrtCreateSession'] = jsepWrapAsync( - Module['_OrtCreateSession'], - () => Module['_OrtCreateSession'], - v => Module['_OrtCreateSession'] = v); - Module['_OrtRun'] = runAsync(jsepWrapAsync( - Module['_OrtRun'], - () => Module['_OrtRun'], - v => Module['_OrtRun'] = v)); - Module['_OrtRunWithBinding'] = runAsync(jsepWrapAsync( - Module['_OrtRunWithBinding'], - () => Module['_OrtRunWithBinding'], - v => Module['_OrtRunWithBinding'] = v)); - Module['_OrtBindInput'] = jsepWrapAsync( - Module['_OrtBindInput'], - () => Module['_OrtBindInput'], - v => Module['_OrtBindInput'] = v); - - // remove this function to make sure it is called only once. - jsepInitAsync = undefined; + return ret; + } finally { + Module.jsepSessionState = null; + } + }; }; - /** - * initialize JSEP for WebGPU. + * initialize JSEP for WebGPU and WebNN. */ -Module['jsepInit'] = (name, params) => { - jsepInitAsync?.(); - - if (name === 'webgpu') { - [Module.jsepBackend, - Module.jsepAlloc, - Module.jsepFree, - Module.jsepCopy, - Module.jsepCopyAsync, - Module.jsepCreateKernel, - Module.jsepReleaseKernel, - Module.jsepRunKernel, - Module.jsepCaptureBegin, - Module.jsepCaptureEnd, - Module.jsepReplay] = params; +Module["jsepInit"] = (name, params) => { + if (name === "webgpu") { + [ + Module.jsepBackend, + Module.jsepAlloc, + Module.jsepFree, + Module.jsepCopy, + Module.jsepCopyAsync, + Module.jsepCreateKernel, + Module.jsepReleaseKernel, + Module.jsepRunKernel, + Module.jsepCaptureBegin, + Module.jsepCaptureEnd, + Module.jsepReplay, + ] = params; // expose webgpu backend functions const backend = Module.jsepBackend; - Module['jsepRegisterBuffer'] = (sessionId, index, buffer, size) => { - return backend['registerBuffer'](sessionId, index, buffer, size); + Module["jsepRegisterBuffer"] = (sessionId, index, buffer, size) => { + return backend["registerBuffer"](sessionId, index, buffer, size); }; - Module['jsepGetBuffer'] = (dataId) => { - return backend['getBuffer'](dataId); + Module["jsepGetBuffer"] = (dataId) => { + return backend["getBuffer"](dataId); }; - Module['jsepCreateDownloader'] = (gpuBuffer, size, type) => { - return backend['createDownloader'](gpuBuffer, size, type); + Module["jsepCreateDownloader"] = (gpuBuffer, size, type) => { + return backend["createDownloader"](gpuBuffer, size, type); }; - Module['jsepOnCreateSession'] = sessionId => { - backend['onCreateSession'](sessionId); + Module["jsepOnCreateSession"] = (sessionId) => { + backend["onCreateSession"](sessionId); }; - Module['jsepOnReleaseSession'] = sessionId => { - backend['onReleaseSession'](sessionId); + Module["jsepOnReleaseSession"] = (sessionId) => { + backend["onReleaseSession"](sessionId); }; - Module['jsepOnRunStart'] = sessionId => { - return backend['onRunStart'](sessionId); + Module["jsepOnRunStart"] = (sessionId) => { + return backend["onRunStart"](sessionId); }; Module.jsepUploadExternalBuffer = (dataId, buffer) => { - backend['upload'](dataId, buffer); + backend["upload"](dataId, buffer); }; - } else if (name === 'webnn') { + } else if (name === "webnn") { // Functions called from EM_ASM need to be assigned in a way that can be minified. // Functions called via emscripten::val::module_property need to be assigned by name so that the minifier doesn't // change the name. - [Module.jsepBackend, - Module.jsepReserveTensorId, - Module.jsepReleaseTensorId, - Module['jsepEnsureTensor'], - Module.jsepUploadTensor, - Module['jsepDownloadTensor'], + [ + Module.jsepBackend, + Module.jsepReserveTensorId, + Module.jsepReleaseTensorId, + Module["jsepEnsureTensor"], + Module.jsepUploadTensor, + Module["jsepDownloadTensor"], ] = params; // This function is called from both JS and an EM_ASM block, it needs both a minifiable name and an explicit name. - Module['jsepReleaseTensorId'] = Module.jsepReleaseTensorId; - Module['jsepUploadTensor'] = Module.jsepUploadTensor; + Module["jsepReleaseTensorId"] = Module.jsepReleaseTensorId; + Module["jsepUploadTensor"] = Module.jsepUploadTensor; // Functions called from JS also need to have explicit names. const backend = Module.jsepBackend; - Module['jsepOnRunStart'] = sessionId => { - return backend['onRunStart'](sessionId); + Module["jsepOnRunStart"] = (sessionId) => { + return backend["onRunStart"](sessionId); }; - Module['jsepOnRunEnd'] = backend['onRunEnd'].bind(backend); - Module['jsepRegisterMLContext'] = (sessionId, mlContext) => { - backend['registerMLContext'](sessionId, mlContext); + Module["jsepOnRunEnd"] = backend["onRunEnd"].bind(backend); + Module["jsepRegisterMLContext"] = (sessionId, mlContext) => { + backend["registerMLContext"](sessionId, mlContext); }; - Module['jsepOnReleaseSession'] = sessionId => { - backend['onReleaseSession'](sessionId); + Module["jsepOnReleaseSession"] = (sessionId) => { + backend["onReleaseSession"](sessionId); }; - Module['jsepCreateMLTensorDownloader'] = (tensorId, type) => { - return backend['createMLTensorDownloader'](tensorId, type); - } - Module['jsepRegisterMLTensor'] = (sessionId, tensor, dataType, shape) => { - return backend['registerMLTensor'](sessionId, tensor, dataType, shape); + Module["jsepCreateMLTensorDownloader"] = (tensorId, type) => { + return backend["createMLTensorDownloader"](tensorId, type); + }; + Module["jsepRegisterMLTensor"] = (sessionId, tensor, dataType, shape) => { + return backend["registerMLTensor"](sessionId, tensor, dataType, shape); }; - Module['jsepCreateMLContext'] = (optionsOrGpuDevice) => { - return backend['createMLContext'](optionsOrGpuDevice); + Module["jsepCreateMLContext"] = (optionsOrGpuDevice) => { + return backend["createMLContext"](optionsOrGpuDevice); }; - Module['jsepRegisterMLConstant'] = (externalFilePath, dataOffset, dataLength, builder, desc) => { - return backend['registerMLConstant']( - externalFilePath, dataOffset, dataLength, builder, desc, Module.MountedFiles); + Module["jsepRegisterMLConstant"] = ( + externalFilePath, + dataOffset, + dataLength, + builder, + desc + ) => { + return backend["registerMLConstant"]( + externalFilePath, + dataOffset, + dataLength, + builder, + desc, + Module.MountedFiles + ); }; - Module['jsepRegisterGraphInput'] = backend['registerGraphInput'].bind(backend); - Module['jsepIsGraphInput'] = backend['isGraphInput'].bind(backend); + Module["jsepRegisterGraphInput"] = + backend["registerGraphInput"].bind(backend); + Module["jsepIsGraphInput"] = backend["isGraphInput"].bind(backend); - Module['jsepCreateTemporaryTensor'] = backend['createTemporaryTensor'].bind(backend); + Module["jsepCreateTemporaryTensor"] = + backend["createTemporaryTensor"].bind(backend); } }; diff --git a/onnxruntime/wasm/pre.js b/onnxruntime/wasm/pre.js index 9b5f3ce545b78..6da28fc355899 100644 --- a/onnxruntime/wasm/pre.js +++ b/onnxruntime/wasm/pre.js @@ -1,21 +1,20 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -'use strict'; +"use strict"; // // This file contains the pre-run code for the ORT WebAssembly module. The code in this file will be injected into the // final module using Emscripten's `--pre-js` option. - /** * Mount external data files of a model to an internal map, which will be used during session initialization. * * @param {string} externalDataFilesPath * @param {Uint8Array} externalDataFilesData */ -Module['mountExternalData'] = (externalDataFilePath, externalDataFileData) => { - if (externalDataFilePath.startsWith('./')) { +Module["mountExternalData"] = (externalDataFilePath, externalDataFileData) => { + if (externalDataFilePath.startsWith("./")) { externalDataFilePath = externalDataFilePath.substring(2); } const files = Module.MountedFiles || (Module.MountedFiles = new Map()); @@ -25,7 +24,7 @@ Module['mountExternalData'] = (externalDataFilePath, externalDataFileData) => { /** * Unmount external data files of a model. */ -Module['unmountExternalData'] = () => { +Module["unmountExternalData"] = () => { delete Module.MountedFiles; }; @@ -48,5 +47,7 @@ Module['unmountExternalData'] = () => { * * @suppress {checkVars} */ -var SharedArrayBuffer = globalThis.SharedArrayBuffer ?? - new WebAssembly.Memory({'initial': 0, 'maximum': 0, 'shared': true}).buffer.constructor; +var SharedArrayBuffer = + globalThis.SharedArrayBuffer ?? + new WebAssembly.Memory({ initial: 0, maximum: 0, shared: true }).buffer + .constructor; diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index c234a69a73ed8..bd2ed871957e9 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1453,8 +1453,11 @@ def generate_build_tree( raise BuildError("WebNN is only available for WebAssembly build.") cmake_args += ["-Donnxruntime_USE_WEBNN=ON"] - if args.use_jsep and args.use_webgpu: - raise BuildError("JSEP (--use_jsep) and WebGPU (--use_webgpu) cannot be enabled at the same time.") + # TODO: currently we allows building with both --use_jsep and --use_webgpu in this working branch. + # This situation is temporary. Eventually, those two flags will be mutually exclusive. + # + # if args.use_jsep and args.use_webgpu: + # raise BuildError("JSEP (--use_jsep) and WebGPU (--use_webgpu) cannot be enabled at the same time.") if args.use_external_dawn and not args.use_webgpu: raise BuildError("External Dawn (--use_external_dawn) must be enabled with WebGPU (--use_webgpu).")