From f362c814dcd8c97b858408ce4e231c6bdb857436 Mon Sep 17 00:00:00 2001 From: abretonc7s Date: Fri, 4 Apr 2025 23:22:52 +0800 Subject: [PATCH 1/9] feat: web wasm with single file --- .gitignore | 7 +- wasm/CMakeLists.txt | 4 + wasm/combined/.gitignore | 7 + wasm/combined/CMakeLists.txt | 164 ++++ wasm/combined/README.md | 114 +++ wasm/combined/assets/.gitignore | 18 + wasm/combined/assets/setup-assets.sh | 129 +++ wasm/combined/copy_with_confirm.cmake | 74 ++ wasm/combined/demos/asr.html | 250 ++++++ wasm/combined/demos/assets | 1 + wasm/combined/demos/common.css | 171 ++++ wasm/combined/demos/common.js | 79 ++ wasm/combined/demos/index.html | 41 + wasm/combined/demos/kws.html | 231 +++++ wasm/combined/demos/tts.html | 229 +++++ wasm/combined/demos/vad.html | 280 ++++++ wasm/combined/index.html | 928 ++++++++++++++++++++ wasm/combined/sherpa-onnx-asr.js | 930 ++++++++++++++++++++ wasm/combined/sherpa-onnx-combined.js | 246 ++++++ wasm/combined/sherpa-onnx-core.js | 962 +++++++++++++++++++++ wasm/combined/sherpa-onnx-enhancement.js | 96 ++ wasm/combined/sherpa-onnx-kws.js | 658 ++++++++++++++ wasm/combined/sherpa-onnx-speaker.js | 110 +++ wasm/combined/sherpa-onnx-tts.js | 844 ++++++++++++++++++ wasm/combined/sherpa-onnx-vad.js | 297 +++++++ wasm/combined/sherpa-onnx-wasm-combined.cc | 292 +++++++ 26 files changed, 7160 insertions(+), 2 deletions(-) create mode 100644 wasm/combined/.gitignore create mode 100644 wasm/combined/CMakeLists.txt create mode 100644 wasm/combined/README.md create mode 100644 wasm/combined/assets/.gitignore create mode 100755 wasm/combined/assets/setup-assets.sh create mode 100644 wasm/combined/copy_with_confirm.cmake create mode 100644 wasm/combined/demos/asr.html create mode 120000 wasm/combined/demos/assets create mode 100644 wasm/combined/demos/common.css create mode 100644 wasm/combined/demos/common.js create mode 100644 wasm/combined/demos/index.html create mode 100644 wasm/combined/demos/kws.html create mode 100644 wasm/combined/demos/tts.html create mode 100644 wasm/combined/demos/vad.html create mode 100644 wasm/combined/index.html create mode 100644 wasm/combined/sherpa-onnx-asr.js create mode 100644 wasm/combined/sherpa-onnx-combined.js create mode 100644 wasm/combined/sherpa-onnx-core.js create mode 100644 wasm/combined/sherpa-onnx-enhancement.js create mode 100644 wasm/combined/sherpa-onnx-kws.js create mode 100644 wasm/combined/sherpa-onnx-speaker.js create mode 100644 wasm/combined/sherpa-onnx-tts.js create mode 100644 wasm/combined/sherpa-onnx-vad.js create mode 100644 wasm/combined/sherpa-onnx-wasm-combined.cc diff --git a/.gitignore b/.gitignore index e642ec5351..f84da661b2 100644 --- a/.gitignore +++ b/.gitignore @@ -136,8 +136,11 @@ kokoro-multi-lang-v1_0 sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16 cmake-build-debug README-DEV.txt -*.rknn -*.jit + ##clion .idea +scripts/dotnet/examples/obj/Debug/net8.0/Common.AssemblyInfo.cs +scripts/dotnet/examples/obj/Debug/net8.0/Common.GeneratedMSBuildEditorConfig.editorconfig +scripts/dotnet/examples/obj/Debug/net8.0/Common.AssemblyInfoInputs.cache + sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt index 0f18d3130b..2ea46af935 100644 --- a/wasm/CMakeLists.txt +++ b/wasm/CMakeLists.txt @@ -29,3 +29,7 @@ endif() if(SHERPA_ONNX_ENABLE_WASM_NODEJS) add_subdirectory(nodejs) endif() + +if(SHERPA_ONNX_ENABLE_WASM_COMBINED) + add_subdirectory(combined) +endif() diff --git a/wasm/combined/.gitignore b/wasm/combined/.gitignore new file mode 100644 index 0000000000..4b5b215ffd --- /dev/null +++ b/wasm/combined/.gitignore @@ -0,0 +1,7 @@ +# Generated WASM files +*.wasm +sherpa-onnx-wasm-combined.js + +# Local model files +*.onnx +*tokens.txt \ No newline at end of file diff --git a/wasm/combined/CMakeLists.txt b/wasm/combined/CMakeLists.txt new file mode 100644 index 0000000000..b553dfd3a9 --- /dev/null +++ b/wasm/combined/CMakeLists.txt @@ -0,0 +1,164 @@ +if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH}) + message(FATAL_ERROR "Please use ./build-wasm-combined.sh to build") +endif() + +# Collect all exported functions from all modules +set(exported_functions + # Core utilities + CopyHeap + MyPrintOnlineASR + MyPrintVAD + MyPrintTTS + MyPrintSpeakerDiarization + MyPrintSpeechEnhancement + MyPrintKeywordSpotting + SherpaOnnxFileExists + + # Online ASR + SherpaOnnxCreateOnlineRecognizer + SherpaOnnxCreateOnlineStream + SherpaOnnxDecodeOnlineStream + SherpaOnnxDestroyOfflineStreamResultJson + SherpaOnnxDestroyOnlineRecognizer + SherpaOnnxDestroyOnlineRecognizerResult + SherpaOnnxDestroyOnlineStream + SherpaOnnxDestroyOnlineStreamResultJson + SherpaOnnxGetOfflineStreamResultAsJson + SherpaOnnxGetOnlineStreamResult + SherpaOnnxGetOnlineStreamResultAsJson + SherpaOnnxIsOnlineStreamReady + SherpaOnnxOnlineStreamAcceptWaveform + SherpaOnnxOnlineStreamInputFinished + SherpaOnnxOnlineStreamIsEndpoint + SherpaOnnxOnlineStreamReset + + # Offline ASR + SherpaOnnxCreateOfflineRecognizer + SherpaOnnxCreateOfflineStream + SherpaOnnxDecodeOfflineStream + SherpaOnnxDecodeMultipleOfflineStreams + SherpaOnnxDestroyOfflineRecognizer + SherpaOnnxDestroyOfflineRecognizerResult + SherpaOnnxDestroyOfflineStream + SherpaOnnxAcceptWaveformOffline + SherpaOnnxGetOfflineStreamResult + + # TTS + SherpaOnnxCreateOfflineTts + SherpaOnnxDestroyOfflineTts + SherpaOnnxDestroyOfflineTtsGeneratedAudio + SherpaOnnxOfflineTtsGenerate + SherpaOnnxOfflineTtsGenerateWithCallback + SherpaOnnxOfflineTtsSampleRate + SherpaOnnxOfflineTtsNumSpeakers + SherpaOnnxWriteWave + + # VAD + SherpaOnnxCreateCircularBuffer + SherpaOnnxDestroyCircularBuffer + SherpaOnnxCircularBufferPush + SherpaOnnxCircularBufferGet + SherpaOnnxCircularBufferFree + SherpaOnnxCircularBufferPop + SherpaOnnxCircularBufferSize + SherpaOnnxCircularBufferHead + SherpaOnnxCircularBufferReset + SherpaOnnxCreateVoiceActivityDetector + SherpaOnnxDestroyVoiceActivityDetector + SherpaOnnxVoiceActivityDetectorAcceptWaveform + SherpaOnnxVoiceActivityDetectorEmpty + SherpaOnnxVoiceActivityDetectorDetected + SherpaOnnxVoiceActivityDetectorPop + SherpaOnnxVoiceActivityDetectorClear + SherpaOnnxVoiceActivityDetectorFront + SherpaOnnxDestroySpeechSegment + SherpaOnnxVoiceActivityDetectorReset + SherpaOnnxVoiceActivityDetectorFlush + + # KWS + SherpaOnnxCreateKeywordSpotter + SherpaOnnxDestroyKeywordSpotter + SherpaOnnxCreateKeywordStream + SherpaOnnxIsKeywordStreamReady + SherpaOnnxDecodeKeywordStream + SherpaOnnxResetKeywordStream + SherpaOnnxGetKeywordResult + SherpaOnnxDestroyKeywordResult +) + +set(mangled_exported_functions) +foreach(x IN LISTS exported_functions) + list(APPEND mangled_exported_functions "_${x}") +endforeach() +list(JOIN mangled_exported_functions "," all_exported_functions) + +include_directories(${CMAKE_SOURCE_DIR}) +set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1") +string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB +string(APPEND MY_FLAGS " -sASYNCIFY=1 -sFETCH=1 ") # For async loading +string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_malloc,_free,${all_exported_functions}] ") +# No preloaded assets - all models will be loaded dynamically +string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString','FS'] ") + +message(STATUS "MY_FLAGS: ${MY_FLAGS}") + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}") +set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}") + +add_executable(sherpa-onnx-wasm-combined sherpa-onnx-wasm-combined.cc) +target_link_libraries(sherpa-onnx-wasm-combined sherpa-onnx-c-api) +install(TARGETS sherpa-onnx-wasm-combined DESTINATION bin/wasm/combined) + +install( + FILES + "$/sherpa-onnx-wasm-combined.js" + "index.html" + "sherpa-onnx-combined.js" + "$/sherpa-onnx-wasm-combined.wasm" + DESTINATION + bin/wasm/combined +) + +# Add option to install to original repo +option(SHERPA_ONNX_INSTALL_TO_REPO "Install compiled WASM files to original repo directory" OFF) +set(SHERPA_ONNX_REPO_PATH "${CMAKE_SOURCE_DIR}/wasm/combined" CACHE PATH "Path to original repo wasm directory") + +if(SHERPA_ONNX_INSTALL_TO_REPO) + # Add a custom target that will run after the installation + add_custom_target(install_to_repo ALL + COMMAND ${CMAKE_COMMAND} -E echo "Installing to original repo at ${SHERPA_ONNX_REPO_PATH}..." + COMMAND ${CMAKE_COMMAND} -E make_directory ${SHERPA_ONNX_REPO_PATH} + + # Copy the JS file + COMMAND ${CMAKE_COMMAND} + -DSRC_DIR=${CMAKE_BINARY_DIR}/bin + -DDEST_DIR=${SHERPA_ONNX_REPO_PATH} + -DCOPY_FILES="sherpa-onnx-wasm-combined.js" + -P ${CMAKE_CURRENT_SOURCE_DIR}/copy_with_confirm.cmake + + # Copy the WASM file + COMMAND ${CMAKE_COMMAND} + -DSRC_DIR=${CMAKE_BINARY_DIR}/bin + -DDEST_DIR=${SHERPA_ONNX_REPO_PATH} + -DCOPY_FILES="sherpa-onnx-wasm-combined.wasm" + -P ${CMAKE_CURRENT_SOURCE_DIR}/copy_with_confirm.cmake + + # Copy the index.html file + COMMAND ${CMAKE_COMMAND} + -DSRC_DIR=${CMAKE_CURRENT_SOURCE_DIR} + -DDEST_DIR=${SHERPA_ONNX_REPO_PATH} + -DCOPY_FILES="index.html" + -P ${CMAKE_CURRENT_SOURCE_DIR}/copy_with_confirm.cmake + + # Copy the JS library file + COMMAND ${CMAKE_COMMAND} + -DSRC_DIR=${CMAKE_CURRENT_SOURCE_DIR} + -DDEST_DIR=${SHERPA_ONNX_REPO_PATH} + -DCOPY_FILES="sherpa-onnx-combined.js" + -P ${CMAKE_CURRENT_SOURCE_DIR}/copy_with_confirm.cmake + + DEPENDS sherpa-onnx-wasm-combined + COMMENT "Checking and installing WASM files to original repo" + ) +endif() \ No newline at end of file diff --git a/wasm/combined/README.md b/wasm/combined/README.md new file mode 100644 index 0000000000..b4f5ee54d7 --- /dev/null +++ b/wasm/combined/README.md @@ -0,0 +1,114 @@ +# Sherpa-ONNX Combined WebAssembly Module + +This directory contains a combined WebAssembly module for the Sherpa-ONNX project, which integrates multiple features: + +- Automatic Speech Recognition (ASR) +- Voice Activity Detection (VAD) +- Text-to-Speech Synthesis (TTS) +- Speech Enhancement +- Speaker Diarization +- Keyword Spotting + +## How to Use + +### Loading the Module + +You can use the combined module in two ways: + +#### Option 1: Load Individual Modules (Recommended) + +This approach loads only the components you need: + +```html + + + + + + + + + + + +``` + +#### Option 2: Load All Modules via the Combined Loader + +This approach loads all available modules: + +```html + + + + + + + +``` + +### Module Structure + +The codebase has been organized into modular files: + +- `sherpa-onnx-core.js`: Core functionality, utilities, and file system operations +- `sherpa-onnx-vad.js`: Voice Activity Detection functionality +- `sherpa-onnx-combined.js`: Loader that loads all individual modules + +Additional modules will be added in the future: +- `sherpa-onnx-asr.js`: Automatic Speech Recognition functionality +- `sherpa-onnx-tts.js`: Text-to-Speech functionality +- And more... + +## Demo Application + +The included `index.html` demonstrates how to use the combined module. It shows: + +1. How to load models from URLs +2. How to initialize each component (ASR, VAD, TTS) +3. How to stream audio from the microphone +4. How to get results from each component + +## Building the Module + +The WebAssembly module is built using Emscripten. To rebuild it: + +```bash +cd /path/to/sherpa-onnx +mkdir -p build-wasm-combined +cd build-wasm-combined +emcmake cmake -DCMAKE_BUILD_TYPE=Release -DSHERPA_ONNX_ENABLE_WASM=ON -DSHERPA_ONNX_ENABLE_CHECK=OFF .. +make -j$(nproc) +``` + +The built files will be located in `build-wasm-combined/wasm/combined/`. + +## Setting Up Models + +Before using the demo, you need to set up model files: + +```bash +cd /path/to/sherpa-onnx/wasm/combined +./setup-assets.sh +``` + +This script will download necessary model files to the `assets/` directory. + +## Troubleshooting + +- **Module load errors**: Ensure the WASM module is loaded before any other scripts +- **Model load errors**: Check the browser console for specific error messages +- **Audio capture issues**: Make sure your browser has permission to access the microphone +- **Performance issues**: Try reducing buffer sizes or using smaller models \ No newline at end of file diff --git a/wasm/combined/assets/.gitignore b/wasm/combined/assets/.gitignore new file mode 100644 index 0000000000..a181d8d850 --- /dev/null +++ b/wasm/combined/assets/.gitignore @@ -0,0 +1,18 @@ +# Ignore all ONNX model files and subdirectories +*.onnx +asr/ +vad/ +tts/ +speakers/ +enhancement/ +kws/ + +# Ignore tokens files +*tokens.txt + +# Ignore temporary files +tmp/ + +# But keep the README.md and setup script +!README.md +!setup-assets.sh \ No newline at end of file diff --git a/wasm/combined/assets/setup-assets.sh b/wasm/combined/assets/setup-assets.sh new file mode 100755 index 0000000000..0fc25a9aab --- /dev/null +++ b/wasm/combined/assets/setup-assets.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# +# Script to download and setup all required models for Sherpa-ONNX Combined WASM demo +# + +set -e + +# Create a tmp directory for downloads +mkdir -p tmp +cd tmp + +echo "===== Setting up assets for Sherpa-ONNX Combined WASM Demo =====" +echo "" + +# Function to check if a file exists and download only if needed +download_if_missing() { + local target_file="../$1" + local download_url="$2" + local is_archive="$3" + local extract_dir="$4" + + if [ -f "$target_file" ]; then + echo "File $target_file already exists. Skipping download." + return 0 + fi + + echo "Downloading $download_url..." + + if [ "$is_archive" = "yes" ]; then + wget -q "$download_url" + local file=$(basename "$download_url") + + echo "Extracting $file..." + tar xvf "$file" + rm "$file" + + if [ ! -z "$extract_dir" ]; then + mv "$extract_dir" "../$(dirname "$target_file")" + fi + else + wget -q -O "$target_file" "$download_url" + fi + + echo "Downloaded and setup $target_file" +} + +# Create subdirectories for each model type +mkdir -p ../asr ../vad ../tts ../speakers ../enhancement ../kws + +echo "1. Setting up ASR Models (Speech Recognition)..." + +# Download ASR models +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + +# Rename for compatibility +mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx ../asr/encoder.onnx +mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ../asr/decoder.onnx +mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx ../asr/joiner.onnx +mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ../asr/tokens.txt +rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/ + +echo "2. Setting up VAD Models (Voice Activity Detection)..." + +# Download VAD model +wget -q -O ../vad/silero_vad.onnx https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx + +echo "3. Setting up TTS Models (Text-to-Speech)..." + +# Download TTS models +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 +tar xvf vits-piper-en_US-amy-low.tar.bz2 +rm vits-piper-en_US-amy-low.tar.bz2 + +mv vits-piper-en_US-amy-low/en_US-amy-low.onnx ../tts/model.onnx +mv vits-piper-en_US-amy-low/tokens.txt ../tts/tokens.txt +# Create a zip archive of the espeak-ng-data directory for efficient loading in WASM +mv vits-piper-en_US-amy-low/espeak-ng-data ../tts/ + +# Create zip archive of espeak-ng-data +echo "Creating zip archive of espeak-ng-data..." +cd ../tts +zip -r espeak-ng-data.zip espeak-ng-data/ +cd ../../tmp + +rm -rf vits-piper-en_US-amy-low/ + +echo "4. Setting up Speaker Diarization Models..." + +# Download speaker diarization models +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +mv sherpa-onnx-pyannote-segmentation-3-0/model.onnx ../speakers/segmentation.onnx +rm -rf sherpa-onnx-pyannote-segmentation-3-0 + +wget -q -O ../speakers/embedding.onnx https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + +echo "5. Setting up Speech Enhancement Models..." + +# Download speech enhancement model +wget -q -O ../enhancement/gtcrn.onnx https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx + +echo "6. Setting up Keyword Spotting Models..." + +# Download keyword spotting models +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 +tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 +rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 + +mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx ../kws/encoder.onnx +mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx ../kws/decoder.onnx +mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx ../kws/joiner.onnx +mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ../kws/tokens.txt +rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 + +# Clean up tmp directory +cd .. +rm -rf tmp + +echo "" +echo "===== All assets have been downloaded and set up successfully! =====" +echo "" +echo "To run the demo:" +echo "1. Build the WASM module: ./build-wasm-combined.sh" +echo "2. Start a local server: cd ../.. && python3 -m http.server 8080" +echo "3. Open your browser and go to: http://localhost:8080/wasm/combined/" +echo "" \ No newline at end of file diff --git a/wasm/combined/copy_with_confirm.cmake b/wasm/combined/copy_with_confirm.cmake new file mode 100644 index 0000000000..1d87e87150 --- /dev/null +++ b/wasm/combined/copy_with_confirm.cmake @@ -0,0 +1,74 @@ +# This script copies files with confirmation for overwriting +# It is specifically used for the WASM combined build process in wasm/combined +# and should be kept in the wasm/combined directory. + +# Expected variables: +# SRC_DIR - source directory +# DEST_DIR - destination directory +# COPY_FILES - semicolon-separated list of files to copy + +# Print debug information +message(STATUS "Source directory: ${SRC_DIR}") +message(STATUS "Destination directory: ${DEST_DIR}") +message(STATUS "Files to copy: ${COPY_FILES}") + +# Verify source directory exists +if(NOT EXISTS "${SRC_DIR}") + message(FATAL_ERROR "Source directory does not exist: ${SRC_DIR}") +endif() + +# Verify destination directory exists or create it +if(NOT EXISTS "${DEST_DIR}") + message(STATUS "Creating destination directory: ${DEST_DIR}") + file(MAKE_DIRECTORY "${DEST_DIR}") +endif() + +# List source directory contents for debugging +message(STATUS "Contents of source directory:") +file(GLOB source_files "${SRC_DIR}/*") +foreach(file ${source_files}) + message(STATUS " ${file}") +endforeach() + +# Process each file (just one file in each call now) +foreach(file_name ${COPY_FILES}) + # Remove quotes if present + string(REGEX REPLACE "^\"(.*)\"$" "\\1" file_name "${file_name}") + + set(src_file "${SRC_DIR}/${file_name}") + set(dest_file "${DEST_DIR}/${file_name}") + + message(STATUS "Processing file: ${file_name}") + message(STATUS "Source file path: ${src_file}") + message(STATUS "Destination file path: ${dest_file}") + + # Verify source file exists + if(NOT EXISTS "${src_file}") + message(FATAL_ERROR "Source file does not exist: ${src_file}") + endif() + + # Check if the destination file exists + if(EXISTS "${dest_file}") + message(STATUS "File ${file_name} already exists in ${DEST_DIR}") + # Prompt for confirmation (this will be shown in terminal) + message(STATUS "Do you want to overwrite? [y/N]") + + # Read user input (works in interactive mode) + execute_process( + COMMAND ${CMAKE_COMMAND} -E echo_append "" + COMMAND /bin/bash -c "read -n 1 answer && echo $answer" + OUTPUT_VARIABLE answer + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + if("${answer}" STREQUAL "y" OR "${answer}" STREQUAL "Y") + message(STATUS "Overwriting ${dest_file}") + file(COPY "${src_file}" DESTINATION "${DEST_DIR}") + else() + message(STATUS "Skipping ${file_name}") + endif() + else() + message(STATUS "Copying ${file_name} to ${DEST_DIR}") + file(COPY "${src_file}" DESTINATION "${DEST_DIR}") + endif() +endforeach() \ No newline at end of file diff --git a/wasm/combined/demos/asr.html b/wasm/combined/demos/asr.html new file mode 100644 index 0000000000..5b287bc4d3 --- /dev/null +++ b/wasm/combined/demos/asr.html @@ -0,0 +1,250 @@ + + + + + + Sherpa-ONNX ASR Demo + + + + + + + + + + + + +

Sherpa-ONNX ASR Demo

+ + + +
Loading WebAssembly module...
+ +
+

Automatic Speech Recognition (ASR)

+ +
+

Model Configuration

+
+ + +
+
+ + + (The directory where model files will be stored) +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ +
+ + + + +
+
Status: Not active
+
+
+ + + + diff --git a/wasm/combined/demos/assets b/wasm/combined/demos/assets new file mode 120000 index 0000000000..ec2e4be2f8 --- /dev/null +++ b/wasm/combined/demos/assets @@ -0,0 +1 @@ +../assets \ No newline at end of file diff --git a/wasm/combined/demos/common.css b/wasm/combined/demos/common.css new file mode 100644 index 0000000000..1d6a8efa33 --- /dev/null +++ b/wasm/combined/demos/common.css @@ -0,0 +1,171 @@ +body { + font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; + max-width: 900px; + margin: 0 auto; + padding: 20px; + background-color: #f5f5f5; +} +h1, h2, h3 { + color: #333; +} +section { + margin: 20px 0; + padding: 15px; + border: 1px solid #ccc; + border-radius: 8px; + background-color: #fff; + box-shadow: 0 2px 4px rgba(0,0,0,0.1); +} +.controls { + display: flex; + flex-wrap: wrap; + gap: 10px; + margin-bottom: 10px; +} +.controls input { + flex-grow: 1; + padding: 8px; + border: 1px solid #ccc; + border-radius: 4px; +} +button { + padding: 8px 16px; + background-color: #4285f4; + color: white; + border: none; + border-radius: 4px; + cursor: pointer; + transition: background-color 0.3s; +} +button:hover { + background-color: #3367d6; +} +button:disabled { + background-color: #ccc; + cursor: not-allowed; +} +textarea { + width: 100%; + height: 100px; + padding: 8px; + border: 1px solid #ccc; + border-radius: 4px; + resize: vertical; +} +.loading { + display: none; +} +#status { + padding: 10px; + border-radius: 4px; + background-color: #e0f7fa; + margin-bottom: 20px; +} +.model-url { + width: 100%; + margin-bottom: 5px; +} +.module-info { + margin-top: 20px; + padding: 15px; + background-color: #f1f8e9; + border-radius: 4px; +} +.form-group { + margin-bottom: 12px; +} +.form-group label { + display: block; + margin-bottom: 5px; + font-weight: 500; +} +.form-group input[type="text"] { + width: 100%; + padding: 8px; + border: 1px solid #ccc; + border-radius: 4px; + box-sizing: border-box; +} +.form-group small { + display: block; + color: #666; + margin-top: 4px; + font-size: 0.85em; +} +.form-check { + margin-bottom: 8px; +} +.form-check input[type="checkbox"] { + margin-right: 8px; +} +select { + width: 100%; + padding: 8px; + border: 1px solid #ccc; + border-radius: 4px; + box-sizing: border-box; + margin-bottom: 8px; +} +.result-box { + min-height: 100px; + max-height: 200px; + overflow-y: auto; + border: 1px solid #ccc; + border-radius: 4px; + padding: 10px; + margin-top: 10px; + background-color: #f9f9f9; +} +.audio-output { + margin-top: 15px; +} +.audio-item { + display: flex; + align-items: center; + padding: 10px; + border: 1px solid #ddd; + border-radius: 4px; + margin-bottom: 10px; + background-color: #f9f9f9; +} +.audio-item audio { + margin-right: 10px; +} +.audio-item .audio-text { + flex-grow: 1; +} +.audio-item .delete-btn { + background-color: #f44336; + color: white; + border: none; + border-radius: 4px; + padding: 5px 10px; + cursor: pointer; +} +.unload-button { + background-color: #f44336; + color: white; +} +.unload-button:hover { + background-color: #d32f2f; +} +.nav-menu { + display: flex; + gap: 10px; + margin-bottom: 20px; + padding: 10px; + background-color: #333; + border-radius: 8px; +} +.nav-menu a { + color: white; + text-decoration: none; + padding: 5px 10px; + border-radius: 4px; +} +.nav-menu a:hover { + background-color: #555; +} +.nav-menu a.active { + background-color: #4285f4; +} diff --git a/wasm/combined/demos/common.js b/wasm/combined/demos/common.js new file mode 100644 index 0000000000..c22dc35a9e --- /dev/null +++ b/wasm/combined/demos/common.js @@ -0,0 +1,79 @@ +// Set up initialization callback +window.onSherpaOnnxReady = function(success, error) { + if (success) { + console.log("All SherpaOnnx modules loaded successfully"); + initializeUI(); // This function would be defined in each individual demo file + } else { + console.error("Some SherpaOnnx modules failed to load:", error); + document.getElementById('status').textContent = + "Error loading some modules. Some features may not work correctly."; + document.getElementById('status').style.backgroundColor = "#ffcccc"; + + // Still try to initialize the UI with available modules + initializeUI(); + } +}; + +// Old-style module initialization for backward compatibility +window.onModuleReady = function() { + console.log("WASM module ready - waiting for all JS modules to load"); +}; + +// Shared audio context and microphone access +let audioContext; +let mediaStream; + +function setupAudioContext() { + if (!audioContext) { + audioContext = new (window.AudioContext || window.webkitAudioContext)({sampleRate: 16000}); + } + return audioContext; +} + +async function getMicrophoneInput() { + try { + const stream = await navigator.mediaDevices.getUserMedia({audio: true}); + const context = setupAudioContext(); + mediaStream = context.createMediaStreamSource(stream); + return stream; + } catch (error) { + console.error('Error accessing microphone:', error); + throw error; + } +} + +// Create unload button +function createUnloadButton(container, modelType, resource, statusElem) { + const button = document.createElement('button'); + button.textContent = `Unload ${modelType} Model`; + button.classList.add('unload-button'); + + button.addEventListener('click', function() { + if (resource) { + // Free the resource + resource.free(); + + // Call the appropriate cleanup method + if (modelType === 'ASR') { + SherpaOnnx.cleanupASR(); + } else if (modelType === 'TTS') { + SherpaOnnx.cleanupTTS(); + } else if (modelType === 'VAD') { + SherpaOnnx.cleanupVAD(); + } else if (modelType === 'KWS') { + SherpaOnnx.cleanupKWS(); + } + + // Update UI + button.disabled = true; + if (statusElem) { + statusElem.textContent = `Status: ${modelType} model unloaded`; + } + + console.log(`${modelType} model unloaded successfully`); + } + }); + + container.appendChild(button); + return button; +} diff --git a/wasm/combined/demos/index.html b/wasm/combined/demos/index.html new file mode 100644 index 0000000000..6bb709f620 --- /dev/null +++ b/wasm/combined/demos/index.html @@ -0,0 +1,41 @@ + + + + + + Sherpa-ONNX Demos + + + + + + + + +

Sherpa-ONNX Demos

+ +
These demos showcase the modular capabilities of Sherpa-ONNX WebAssembly
+ +
+

Available Demos

+ + +

About These Demos

+

Each demo uses the unified Sherpa-ONNX combined library but focuses on a single module for easier testing and validation.

+

The combined approach allows using just the modules you need in your own applications.

+ +

Memory Management

+

Each demo includes an "Unload Model" button to demonstrate proper memory management:

+
    +
  • Explicit unloading frees WASM memory
  • +
  • Resources are tracked and properly cleaned up
  • +
  • Prevents memory leaks in long-running applications
  • +
+
+ + diff --git a/wasm/combined/demos/kws.html b/wasm/combined/demos/kws.html new file mode 100644 index 0000000000..4400c8ce7c --- /dev/null +++ b/wasm/combined/demos/kws.html @@ -0,0 +1,231 @@ + + + + + + Sherpa-ONNX KWS Demo + + + + + + + + + + + + +

Sherpa-ONNX KWS Demo

+ + + +
Loading WebAssembly module...
+ +
+

Keyword Spotting (KWS)

+ +
+

Model Configuration

+
+ + + (The directory where model files will be stored) +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + + Format: Phonetic tokens with spaces between letters, followed by @ and the keyword label +
+
+ + +
+
+ + +
+
+ +
+ + + + +
+
Status: Not active
+
+
+ + + + diff --git a/wasm/combined/demos/tts.html b/wasm/combined/demos/tts.html new file mode 100644 index 0000000000..60909fd80b --- /dev/null +++ b/wasm/combined/demos/tts.html @@ -0,0 +1,229 @@ + + + + + + Sherpa-ONNX TTS Demo + + + + + + + + + + + + +

Sherpa-ONNX TTS Demo

+ + + +
Loading WebAssembly module...
+ +
+

Text-to-Speech (TTS)

+ +
+

Model Configuration

+
+ + +
+
+ + + (The directory where model files will be stored) +
+
+ + +
+
+ + +
+
+ + + (Required for VITS models) +
+
+ + +
+
+ + +
+
+ +
+ + +
+ +
+ + + +
+
Status: Not active
+
+
+ + + + diff --git a/wasm/combined/demos/vad.html b/wasm/combined/demos/vad.html new file mode 100644 index 0000000000..29b82c16fd --- /dev/null +++ b/wasm/combined/demos/vad.html @@ -0,0 +1,280 @@ + + + + + + Sherpa-ONNX VAD Demo + + + + + + + + + + + + +

Sherpa-ONNX VAD Demo

+ + + +
Loading WebAssembly module...
+ +
+

Voice Activity Detection (VAD)

+ +
+

Model Configuration

+
+ + +
+
+ + + (The directory where model files will be stored) +
+
+ + +
+
+ + +
+ +

VAD Parameters

+
+ + + 0.5 +
+
+ + + 0.3 +
+
+ + + 0.1 +
+
+ +
+ + + + +
+
Status: Not active
+ +
+
+
+
Voice Activity Level
+
+
+
+ + + + diff --git a/wasm/combined/index.html b/wasm/combined/index.html new file mode 100644 index 0000000000..dfb9fb058d --- /dev/null +++ b/wasm/combined/index.html @@ -0,0 +1,928 @@ + + + + + + Sherpa-ONNX Combined Demo + + + + + + + + + + + +

Sherpa-ONNX Combined Demo

+
Loading WebAssembly module...
+ +
+

Modular Design

+

This demo uses a modular architecture. You can load modules individually:

+
+<script src="sherpa-onnx-core.js"></script>
+<script src="sherpa-onnx-vad.js"></script>
+    
+

Or load all modules at once:

+
+<script src="sherpa-onnx-combined.js"></script>
+    
+
+ +
+

Voice Activity Detection (VAD)

+ +
+

Model Configuration

+
+ + +
+
+ + + (The directory where model files will be stored) +
+
+ + +
+
+ + +
+
+ +
+ + + +
+
Status: Not active
+
+ + +
+

Automatic Speech Recognition (ASR)

+ +
+

Model Configuration

+
+ + +
+
+ + + (The directory where model files will be stored) +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ +
+ + + +
+
Status: Not active
+
+
+ + +
+

Text-to-Speech (TTS)

+ +
+

Model Configuration

+
+ + +
+
+ + + (The directory where model files will be stored) +
+
+ + +
+
+ + +
+
+ + + (Required for VITS models) +
+
+ + +
+
+ + +
+
+ +
+ + +
+ +
+ + +
+
Status: Not active
+
+
+ + +
+

Keyword Spotting (KWS)

+ +
+

Model Configuration

+
+ + + (The directory where model files will be stored) +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + + Format: Phonetic tokens with spaces between letters, followed by @ and the keyword label +
+
+ + +
+
+ + +
+
+ +
+ + + +
+
Status: Not active
+
+
+ + + + \ No newline at end of file diff --git a/wasm/combined/sherpa-onnx-asr.js b/wasm/combined/sherpa-onnx-asr.js new file mode 100644 index 0000000000..04c0fdafc1 --- /dev/null +++ b/wasm/combined/sherpa-onnx-asr.js @@ -0,0 +1,930 @@ +/** + * sherpa-onnx-asr.js + * + * Automatic Speech Recognition functionality for SherpaOnnx + * Requires sherpa-onnx-core.js to be loaded first + */ + +(function(global) { + // Ensure the namespace exists + if (!global.SherpaOnnx) { + console.error('SherpaOnnx namespace not found. Make sure to load sherpa-onnx-core.js first.'); + return; + } + + // Get a reference to the SherpaOnnx namespace + const SherpaOnnx = global.SherpaOnnx; + + // Create or use existing ASR namespace + SherpaOnnx.ASR = SherpaOnnx.ASR || {}; + + // Define the ASR module functionality + SherpaOnnx.ASR = { + /** + * Load an ASR model from URLs + * @param {Object} modelConfig - Configuration for the model + * @returns {Promise} - Information about the loaded model + */ + loadModel: async function(modelConfig) { + const modelDir = modelConfig.modelDir || 'asr-models'; + + // Create directory if it doesn't exist + try { + global.Module.FS.mkdir(modelDir, 0o777); + } catch(e) { + if (e.code !== 'EEXIST') throw e; + } + + // Collection for actual file paths + const actualPaths = {}; + + // Load model files based on type + if (modelConfig.type === 'transducer') { + const results = await Promise.all([ + SherpaOnnx.FileSystem.safeLoadFile(modelConfig.encoder || 'assets/asr/encoder.onnx', `${modelDir}/encoder.onnx`, modelConfig.debug), + SherpaOnnx.FileSystem.safeLoadFile(modelConfig.decoder || 'assets/asr/decoder.onnx', `${modelDir}/decoder.onnx`, modelConfig.debug), + SherpaOnnx.FileSystem.safeLoadFile(modelConfig.joiner || 'assets/asr/joiner.onnx', `${modelDir}/joiner.onnx`, modelConfig.debug), + SherpaOnnx.FileSystem.safeLoadFile(modelConfig.tokens || 'assets/asr/tokens.txt', `${modelDir}/tokens.txt`, modelConfig.debug) + ]); + + // Collect actual paths + actualPaths.encoder = results[0].path; + actualPaths.decoder = results[1].path; + actualPaths.joiner = results[2].path; + actualPaths.tokens = results[3].path; + + } else if (modelConfig.type === 'paraformer') { + const results = await Promise.all([ + SherpaOnnx.FileSystem.safeLoadFile(modelConfig.encoder || 'assets/asr/encoder.onnx', `${modelDir}/encoder.onnx`, modelConfig.debug), + SherpaOnnx.FileSystem.safeLoadFile(modelConfig.decoder || 'assets/asr/decoder.onnx', `${modelDir}/decoder.onnx`, modelConfig.debug), + SherpaOnnx.FileSystem.safeLoadFile(modelConfig.tokens || 'assets/asr/tokens.txt', `${modelDir}/tokens.txt`, modelConfig.debug) + ]); + + // Collect actual paths + actualPaths.encoder = results[0].path; + actualPaths.decoder = results[1].path; + actualPaths.tokens = results[2].path; + + } else if (modelConfig.type === 'ctc') { + const results = await Promise.all([ + SherpaOnnx.FileSystem.safeLoadFile(modelConfig.model || 'assets/asr/model.onnx', `${modelDir}/model.onnx`, modelConfig.debug), + SherpaOnnx.FileSystem.safeLoadFile(modelConfig.tokens || 'assets/asr/tokens.txt', `${modelDir}/tokens.txt`, modelConfig.debug) + ]); + + // Collect actual paths + actualPaths.model = results[0].path; + actualPaths.tokens = results[1].path; + } + + // Get base directory from the tokens path + let effectiveModelDir = modelDir; + if (actualPaths.tokens) { + const lastSlash = actualPaths.tokens.lastIndexOf('/'); + if (lastSlash > 0) { + effectiveModelDir = actualPaths.tokens.substring(0, lastSlash); + } + } + + return { + modelDir: effectiveModelDir, + type: modelConfig.type, + actualPaths: actualPaths + }; + }, + + /** + * Initialize online recognizer configuration in WASM + * @param {Object} config - ASR configuration + * @param {Object} Module - WebAssembly module + * @returns {number} - Pointer to the configuration in WASM + * @private + */ + _initOnlineRecognizerConfig: function(config, Module) { + if (!config) { + console.error('ASR config is null'); + return 0; + } + + try { + // First, allocate all the strings we need + const allocatedStrings = {}; + + // Transducer model config + if (config.modelConfig.transducer) { + allocatedStrings.encoder = SherpaOnnx.Utils.allocateString(config.modelConfig.transducer.encoder, Module); + allocatedStrings.decoder = SherpaOnnx.Utils.allocateString(config.modelConfig.transducer.decoder, Module); + allocatedStrings.joiner = SherpaOnnx.Utils.allocateString(config.modelConfig.transducer.joiner, Module); + } else { + allocatedStrings.encoder = SherpaOnnx.Utils.allocateString('', Module); + allocatedStrings.decoder = SherpaOnnx.Utils.allocateString('', Module); + allocatedStrings.joiner = SherpaOnnx.Utils.allocateString('', Module); + } + + // Paraformer model config + if (config.modelConfig.paraformer) { + allocatedStrings.paraEncoder = SherpaOnnx.Utils.allocateString(config.modelConfig.paraformer.encoder, Module); + allocatedStrings.paraDecoder = SherpaOnnx.Utils.allocateString(config.modelConfig.paraformer.decoder, Module); + } else { + allocatedStrings.paraEncoder = SherpaOnnx.Utils.allocateString('', Module); + allocatedStrings.paraDecoder = SherpaOnnx.Utils.allocateString('', Module); + } + + // Zipformer2 CTC model config + if (config.modelConfig.zipformer2Ctc) { + allocatedStrings.zipformerModel = SherpaOnnx.Utils.allocateString(config.modelConfig.zipformer2Ctc.model, Module); + } else { + allocatedStrings.zipformerModel = SherpaOnnx.Utils.allocateString('', Module); + } + + // Tokens, provider, model_type, modeling_unit, bpe_vocab + allocatedStrings.tokens = SherpaOnnx.Utils.allocateString(config.modelConfig.tokens, Module); + allocatedStrings.provider = SherpaOnnx.Utils.allocateString(config.modelConfig.provider || 'cpu', Module); + allocatedStrings.modelType = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + allocatedStrings.modelingUnit = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + allocatedStrings.bpeVocab = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + + // Token buffer is not used in JS API + allocatedStrings.tokensBuffer = SherpaOnnx.Utils.allocateString('', Module); + + // Decoding method + allocatedStrings.decodingMethod = SherpaOnnx.Utils.allocateString(config.decodingMethod || 'greedy_search', Module); + + // Hotwords + allocatedStrings.hotwordsFile = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + allocatedStrings.hotwordsBuffer = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + + // Rule FSTs and FARs + allocatedStrings.ruleFsts = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + allocatedStrings.ruleFars = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + + // Now allocate the main config structure + // Size needs to match the C structure size + const configSize = 200; // Adjust if needed to match C struct + const configPtr = Module._malloc(configSize); + + // Zero out the memory + Module.HEAP8.fill(0, configPtr, configPtr + configSize); + + // Set feat_config fields + let offset = 0; + Module.setValue(configPtr + offset, config.featConfig.sampleRate || 16000, 'i32'); + offset += 4; + Module.setValue(configPtr + offset, config.featConfig.featureDim || 80, 'i32'); + offset += 4; + + // Set model_config fields - transducer + Module.setValue(configPtr + offset, allocatedStrings.encoder.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.decoder.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.joiner.ptr, 'i8*'); + offset += 4; + + // Set model_config fields - paraformer + Module.setValue(configPtr + offset, allocatedStrings.paraEncoder.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.paraDecoder.ptr, 'i8*'); + offset += 4; + + // Set model_config fields - zipformer2_ctc + Module.setValue(configPtr + offset, allocatedStrings.zipformerModel.ptr, 'i8*'); + offset += 4; + + // Set remaining model_config fields + Module.setValue(configPtr + offset, allocatedStrings.tokens.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, config.modelConfig.numThreads || 1, 'i32'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.provider.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, config.modelConfig.debug || 0, 'i32'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.modelType.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.modelingUnit.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.bpeVocab.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.tokensBuffer.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, 0, 'i32'); // tokens_buf_size + offset += 4; + + // Set recognizer config fields + Module.setValue(configPtr + offset, allocatedStrings.decodingMethod.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, config.maxActivePaths || 4, 'i32'); + offset += 4; + Module.setValue(configPtr + offset, config.enableEndpoint || 1, 'i32'); + offset += 4; + Module.setValue(configPtr + offset, config.rule1MinTrailingSilence || 2.4, 'float'); + offset += 4; + Module.setValue(configPtr + offset, config.rule2MinTrailingSilence || 1.2, 'float'); + offset += 4; + Module.setValue(configPtr + offset, config.rule3MinUtteranceLength || 300, 'float'); + offset += 4; + + // Set hotwords fields + Module.setValue(configPtr + offset, allocatedStrings.hotwordsFile.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, 0.0, 'float'); // hotwords_score + offset += 4; + + // Set CTC FST decoder config - graph and max_active + Module.setValue(configPtr + offset, 0, 'i8*'); // graph + offset += 4; + Module.setValue(configPtr + offset, 0, 'i32'); // max_active + offset += 4; + + // Set rule FSTs and FARs + Module.setValue(configPtr + offset, allocatedStrings.ruleFsts.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.ruleFars.ptr, 'i8*'); + offset += 4; + + // Set blank penalty + Module.setValue(configPtr + offset, 0.0, 'float'); // blank_penalty + offset += 4; + + // Set hotwords buffer and size + Module.setValue(configPtr + offset, allocatedStrings.hotwordsBuffer.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, 0, 'i32'); // hotwords_buf_size + offset += 4; + + // Save the allocated strings for freeing later + Module.SherpaOnnxAllocatedStrings = allocatedStrings; + + return configPtr; + } catch (error) { + console.error('Error initializing ASR config:', error); + return 0; + } + }, + + /** + * Free the configuration memory + * @param {number} configPtr - Pointer to the configuration + * @param {Object} Module - WebAssembly module + * @private + */ + _freeConfig: function(configPtr, Module) { + if (!configPtr) return; + + try { + // Free all allocated strings + if (Module.SherpaOnnxAllocatedStrings) { + for (const key in Module.SherpaOnnxAllocatedStrings) { + if (Module.SherpaOnnxAllocatedStrings[key].ptr) { + Module._free(Module.SherpaOnnxAllocatedStrings[key].ptr); + } + } + delete Module.SherpaOnnxAllocatedStrings; + } + + // Free the config structure itself + Module._free(configPtr); + } catch (error) { + console.error('Error freeing ASR config:', error); + } + }, + + /** + * Create an online ASR recognizer with the loaded model + * @param {Object} loadedModel - Model information returned by loadModel + * @param {Object} options - Additional configuration options + * @returns {OnlineRecognizer} - An instance of OnlineRecognizer + */ + createOnlineRecognizer: function(loadedModel, options = {}) { + const config = { + featConfig: { + sampleRate: options.sampleRate || 16000, + featureDim: options.featureDim || 80, + }, + modelConfig: { + tokens: loadedModel.actualPaths.tokens || `${loadedModel.modelDir}/tokens.txt`, + numThreads: options.numThreads || 1, + provider: options.provider || 'cpu', + debug: options.debug !== undefined ? options.debug : 1, // Configurable debug + }, + decodingMethod: options.decodingMethod || 'greedy_search', + enableEndpoint: options.enableEndpoint === undefined ? 1 : options.enableEndpoint, + maxActivePaths: options.maxActivePaths || 4, + rule1MinTrailingSilence: options.rule1MinTrailingSilence || 2.4, + rule2MinTrailingSilence: options.rule2MinTrailingSilence || 1.2, + rule3MinUtteranceLength: options.rule3MinUtteranceLength || 300.0, + }; + + if (loadedModel.type === 'transducer') { + config.modelConfig.transducer = { + encoder: loadedModel.actualPaths.encoder || `${loadedModel.modelDir}/encoder.onnx`, + decoder: loadedModel.actualPaths.decoder || `${loadedModel.modelDir}/decoder.onnx`, + joiner: loadedModel.actualPaths.joiner || `${loadedModel.modelDir}/joiner.onnx`, + }; + } else if (loadedModel.type === 'paraformer') { + config.modelConfig.paraformer = { + encoder: loadedModel.actualPaths.encoder || `${loadedModel.modelDir}/encoder.onnx`, + decoder: loadedModel.actualPaths.decoder || `${loadedModel.modelDir}/decoder.onnx`, + }; + } else if (loadedModel.type === 'ctc') { + config.modelConfig.zipformer2Ctc = { + model: loadedModel.actualPaths.model || `${loadedModel.modelDir}/model.onnx`, + }; + } + + const recognizer = new global.OnlineRecognizer(config, global.Module); + + // Track the resource for cleanup if tracking function is available + if (SherpaOnnx.trackResource) { + SherpaOnnx.trackResource('asr', recognizer); + } + + return recognizer; + }, + + /** + * Create an offline ASR recognizer with the loaded model + * @param {Object} loadedModel - Model information returned by loadModel + * @param {Object} options - Additional configuration options + * @returns {OfflineRecognizer} - An instance of OfflineRecognizer + */ + createOfflineRecognizer: function(loadedModel, options = {}) { + const config = { + featConfig: { + sampleRate: options.sampleRate || 16000, + featureDim: options.featureDim || 80, + }, + modelConfig: { + tokens: loadedModel.actualPaths.tokens || `${loadedModel.modelDir}/tokens.txt`, + numThreads: options.numThreads || 1, + provider: options.provider || 'cpu', + debug: options.debug !== undefined ? options.debug : 1, // Configurable debug + }, + lmConfig: { + model: '', // No language model by default + scale: 1.0, + }, + decodingMethod: options.decodingMethod || 'greedy_search', + maxActivePaths: options.maxActivePaths || 4, + }; + + if (loadedModel.type === 'transducer') { + config.modelConfig.transducer = { + encoder: loadedModel.actualPaths.encoder || `${loadedModel.modelDir}/encoder.onnx`, + decoder: loadedModel.actualPaths.decoder || `${loadedModel.modelDir}/decoder.onnx`, + joiner: loadedModel.actualPaths.joiner || `${loadedModel.modelDir}/joiner.onnx`, + }; + } else if (loadedModel.type === 'paraformer') { + config.modelConfig.paraformer = { + model: loadedModel.actualPaths.model || `${loadedModel.modelDir}/model.onnx`, + }; + } else if (loadedModel.type === 'ctc') { + config.modelConfig.nemoCtc = { + model: loadedModel.actualPaths.model || `${loadedModel.modelDir}/model.onnx`, + }; + } + + const recognizer = new global.OfflineRecognizer(config, global.Module); + + // Track the resource for cleanup if tracking function is available + if (SherpaOnnx.trackResource) { + SherpaOnnx.trackResource('asr', recognizer); + } + + return recognizer; + }, + + /** + * Initialize offline recognizer configuration in WASM + * @param {Object} config - ASR configuration + * @param {Object} Module - WebAssembly module + * @returns {number} - Pointer to the configuration in WASM + * @private + */ + _initOfflineRecognizerConfig: function(config, Module) { + if (!config) { + console.error('ASR config is null'); + return 0; + } + + try { + // First, allocate all the strings we need + const allocatedStrings = {}; + + // Transducer model config + if (config.modelConfig.transducer) { + allocatedStrings.encoder = SherpaOnnx.Utils.allocateString(config.modelConfig.transducer.encoder, Module); + allocatedStrings.decoder = SherpaOnnx.Utils.allocateString(config.modelConfig.transducer.decoder, Module); + allocatedStrings.joiner = SherpaOnnx.Utils.allocateString(config.modelConfig.transducer.joiner, Module); + } else { + allocatedStrings.encoder = SherpaOnnx.Utils.allocateString('', Module); + allocatedStrings.decoder = SherpaOnnx.Utils.allocateString('', Module); + allocatedStrings.joiner = SherpaOnnx.Utils.allocateString('', Module); + } + + // Paraformer model config + if (config.modelConfig.paraformer) { + allocatedStrings.paraEncoder = SherpaOnnx.Utils.allocateString(config.modelConfig.paraformer.encoder, Module); + allocatedStrings.paraDecoder = SherpaOnnx.Utils.allocateString(config.modelConfig.paraformer.decoder, Module); + } else { + allocatedStrings.paraEncoder = SherpaOnnx.Utils.allocateString('', Module); + allocatedStrings.paraDecoder = SherpaOnnx.Utils.allocateString('', Module); + } + + // Zipformer2 CTC model config + if (config.modelConfig.zipformer2Ctc) { + allocatedStrings.zipformerModel = SherpaOnnx.Utils.allocateString(config.modelConfig.zipformer2Ctc.model, Module); + } else { + allocatedStrings.zipformerModel = SherpaOnnx.Utils.allocateString('', Module); + } + + // Tokens, provider, model_type, modeling_unit, bpe_vocab + allocatedStrings.tokens = SherpaOnnx.Utils.allocateString(config.modelConfig.tokens, Module); + allocatedStrings.provider = SherpaOnnx.Utils.allocateString(config.modelConfig.provider || 'cpu', Module); + allocatedStrings.modelType = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + allocatedStrings.modelingUnit = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + allocatedStrings.bpeVocab = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + + // Token buffer is not used in JS API + allocatedStrings.tokensBuffer = SherpaOnnx.Utils.allocateString('', Module); + + // Decoding method + allocatedStrings.decodingMethod = SherpaOnnx.Utils.allocateString(config.decodingMethod || 'greedy_search', Module); + + // Hotwords + allocatedStrings.hotwordsFile = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + allocatedStrings.hotwordsBuffer = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + + // Rule FSTs and FARs + allocatedStrings.ruleFsts = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + allocatedStrings.ruleFars = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + + // Now allocate the main config structure + // Size needs to match the C structure size + const configSize = 200; // Adjust if needed to match C struct + const configPtr = Module._malloc(configSize); + + // Zero out the memory + Module.HEAP8.fill(0, configPtr, configPtr + configSize); + + // Set feat_config fields + let offset = 0; + Module.setValue(configPtr + offset, config.featConfig.sampleRate || 16000, 'i32'); + offset += 4; + Module.setValue(configPtr + offset, config.featConfig.featureDim || 80, 'i32'); + offset += 4; + + // Set model_config fields - transducer + Module.setValue(configPtr + offset, allocatedStrings.encoder.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.decoder.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.joiner.ptr, 'i8*'); + offset += 4; + + // Set model_config fields - paraformer + Module.setValue(configPtr + offset, allocatedStrings.paraEncoder.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.paraDecoder.ptr, 'i8*'); + offset += 4; + + // Set model_config fields - zipformer2_ctc + Module.setValue(configPtr + offset, allocatedStrings.zipformerModel.ptr, 'i8*'); + offset += 4; + + // Set remaining model_config fields + Module.setValue(configPtr + offset, allocatedStrings.tokens.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, config.modelConfig.numThreads || 1, 'i32'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.provider.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, config.modelConfig.debug || 0, 'i32'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.modelType.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.modelingUnit.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.bpeVocab.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.tokensBuffer.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, 0, 'i32'); // tokens_buf_size + offset += 4; + + // Set recognizer config fields + Module.setValue(configPtr + offset, allocatedStrings.decodingMethod.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, config.maxActivePaths || 4, 'i32'); + offset += 4; + Module.setValue(configPtr + offset, config.enableEndpoint || 1, 'i32'); + offset += 4; + Module.setValue(configPtr + offset, config.rule1MinTrailingSilence || 2.4, 'float'); + offset += 4; + Module.setValue(configPtr + offset, config.rule2MinTrailingSilence || 1.2, 'float'); + offset += 4; + Module.setValue(configPtr + offset, config.rule3MinUtteranceLength || 300, 'float'); + offset += 4; + + // Set hotwords fields + Module.setValue(configPtr + offset, allocatedStrings.hotwordsFile.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, 0.0, 'float'); // hotwords_score + offset += 4; + + // Set CTC FST decoder config - graph and max_active + Module.setValue(configPtr + offset, 0, 'i8*'); // graph + offset += 4; + Module.setValue(configPtr + offset, 0, 'i32'); // max_active + offset += 4; + + // Set rule FSTs and FARs + Module.setValue(configPtr + offset, allocatedStrings.ruleFsts.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, allocatedStrings.ruleFars.ptr, 'i8*'); + offset += 4; + + // Set blank penalty + Module.setValue(configPtr + offset, 0.0, 'float'); // blank_penalty + offset += 4; + + // Set hotwords buffer and size + Module.setValue(configPtr + offset, allocatedStrings.hotwordsBuffer.ptr, 'i8*'); + offset += 4; + Module.setValue(configPtr + offset, 0, 'i32'); // hotwords_buf_size + offset += 4; + + // Save the allocated strings for freeing later + Module.SherpaOnnxAllocatedStrings = allocatedStrings; + + return configPtr; + } catch (error) { + console.error('Error initializing ASR config:', error); + return 0; + } + } + }; + + /** + * OnlineRecognizer class for streaming speech recognition + */ + global.OnlineRecognizer = global.OnlineRecognizer || function(config, Module) { + this.Module = Module; + this.config = config; + this.streams = []; // Track streams created by this recognizer + + // Initialize the configuration in WASM + const configPtr = SherpaOnnx.ASR._initOnlineRecognizerConfig(config, Module); + + // Create the recognizer + this.handle = Module.ccall( + 'SherpaOnnxCreateOnlineRecognizer', + 'number', + ['number'], + [configPtr] + ); + + // Free the configuration memory + SherpaOnnx.ASR._freeConfig(configPtr, Module); + + /** + * Create a stream for audio input + * @returns {OnlineStream} - A new stream for audio input + */ + this.createStream = function() { + const streamHandle = this.Module.ccall( + 'SherpaOnnxCreateOnlineStream', + 'number', + ['number'], + [this.handle] + ); + const stream = new global.OnlineStream(streamHandle, this.Module); + + // Track the stream for cleanup + this.streams.push(stream); + + return stream; + }; + + /** + * Check if the stream is ready for decoding + * @param {OnlineStream} stream - The stream to check + * @returns {boolean} - True if ready, false otherwise + */ + this.isReady = function(stream) { + return this.Module.ccall( + 'SherpaOnnxIsOnlineStreamReady', + 'number', + ['number', 'number'], + [this.handle, stream.handle] + ) === 1; + }; + + /** + * Decode the audio in the stream + * @param {OnlineStream} stream - The stream to decode + */ + this.decode = function(stream) { + this.Module.ccall( + 'SherpaOnnxDecodeOnlineStream', + 'void', + ['number', 'number'], + [this.handle, stream.handle] + ); + }; + + /** + * Check if an endpoint has been detected + * @param {OnlineStream} stream - The stream to check + * @returns {boolean} - True if endpoint detected, false otherwise + */ + this.isEndpoint = function(stream) { + return this.Module.ccall( + 'SherpaOnnxOnlineStreamIsEndpoint', + 'number', + ['number', 'number'], + [this.handle, stream.handle] + ) === 1; + }; + + /** + * Reset the stream + * @param {OnlineStream} stream - The stream to reset + */ + this.reset = function(stream) { + this.Module.ccall( + 'SherpaOnnxOnlineStreamReset', + 'void', + ['number', 'number'], + [this.handle, stream.handle] + ); + }; + + /** + * Get the recognition result + * @param {OnlineStream} stream - The stream to get results from + * @returns {Object} - Recognition result as JSON + */ + this.getResult = function(stream) { + const resultPtr = this.Module.ccall( + 'SherpaOnnxGetOnlineStreamResultAsJson', + 'number', + ['number', 'number'], + [this.handle, stream.handle] + ); + + const jsonStr = this.Module.UTF8ToString(resultPtr); + const result = JSON.parse(jsonStr); + + this.Module.ccall( + 'SherpaOnnxDestroyOnlineStreamResultJson', + 'null', + ['number'], + [resultPtr] + ); + + return result; + }; + + /** + * Free the recognizer and all associated streams + */ + this.free = function() { + // Free all streams first + for (let i = this.streams.length - 1; i >= 0; i--) { + if (this.streams[i]) { + this.streams[i].free(); + } + this.streams.splice(i, 1); + } + + // Then free the recognizer + if (this.handle) { + this.Module.ccall( + 'SherpaOnnxDestroyOnlineRecognizer', + 'null', + ['number'], + [this.handle] + ); + this.handle = null; + } + }; + }; + + /** + * OnlineStream class for handling streaming audio input + */ + global.OnlineStream = global.OnlineStream || function(handle, Module) { + this.handle = handle; + this.Module = Module; + this.pointer = null; // buffer + this.n = 0; // buffer size + + /** + * Accept audio waveform data + * @param {number} sampleRate - Sample rate of the audio + * @param {Float32Array} samples - Audio samples in [-1, 1] range + */ + this.acceptWaveform = function(sampleRate, samples) { + if (this.n < samples.length) { + if (this.pointer) { + this.Module._free(this.pointer); + } + this.pointer = this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); + this.n = samples.length; + } + + this.Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT); + this.Module.ccall( + 'SherpaOnnxOnlineStreamAcceptWaveform', + 'void', + ['number', 'number', 'number', 'number'], + [this.handle, sampleRate, this.pointer, samples.length] + ); + }; + + /** + * Signal that input is finished + */ + this.inputFinished = function() { + this.Module.ccall( + 'SherpaOnnxOnlineStreamInputFinished', + 'void', + ['number'], + [this.handle] + ); + }; + + /** + * Free the stream + */ + this.free = function() { + if (this.handle) { + this.Module.ccall( + 'SherpaOnnxDestroyOnlineStream', + 'null', + ['number'], + [this.handle] + ); + this.handle = null; + + if (this.pointer) { + this.Module._free(this.pointer); + this.pointer = null; + this.n = 0; + } + } + }; + }; + + /** + * OfflineRecognizer class for non-streaming speech recognition + */ + global.OfflineRecognizer = global.OfflineRecognizer || function(config, Module) { + this.Module = Module; + this.config = config; + this.streams = []; // Track streams created by this recognizer + + // Initialize the configuration in WASM + const configPtr = SherpaOnnx.ASR._initOfflineRecognizerConfig(config, Module); + + // Create the recognizer + this.handle = Module.ccall( + 'SherpaOnnxCreateOfflineRecognizer', + 'number', + ['number'], + [configPtr] + ); + + // Free the configuration memory + SherpaOnnx.ASR._freeConfig(configPtr, Module); + + /** + * Create a stream for offline processing + * @returns {OfflineStream} - A new stream for offline processing + */ + this.createStream = function() { + const streamHandle = this.Module.ccall( + 'SherpaOnnxCreateOfflineStream', + 'number', + ['number'], + [this.handle] + ); + const stream = new global.OfflineStream(streamHandle, this.Module); + + // Track the stream for cleanup + this.streams.push(stream); + + return stream; + }; + + /** + * Decode the audio in the stream + * @param {OfflineStream} stream - The stream to decode + */ + this.decode = function(stream) { + this.Module.ccall( + 'SherpaOnnxDecodeOfflineStream', + 'void', + ['number', 'number'], + [this.handle, stream.handle] + ); + }; + + /** + * Free the recognizer and all associated streams + */ + this.free = function() { + // Free all streams first + for (let i = this.streams.length - 1; i >= 0; i--) { + if (this.streams[i]) { + this.streams[i].free(); + } + this.streams.splice(i, 1); + } + + // Then free the recognizer + if (this.handle) { + this.Module.ccall( + 'SherpaOnnxDestroyOfflineRecognizer', + 'null', + ['number'], + [this.handle] + ); + this.handle = null; + } + }; + }; + + /** + * OfflineStream class for handling non-streaming audio input + */ + global.OfflineStream = global.OfflineStream || function(handle, Module) { + this.handle = handle; + this.Module = Module; + + /** + * Accept audio waveform data + * @param {number} sampleRate - Sample rate of the audio + * @param {Float32Array} samples - Audio samples in [-1, 1] range + */ + this.acceptWaveform = function(sampleRate, samples) { + const pointer = this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); + this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT); + + this.Module.ccall( + 'SherpaOnnxAcceptWaveformOffline', + 'void', + ['number', 'number', 'number', 'number'], + [this.handle, sampleRate, pointer, samples.length] + ); + + this.Module._free(pointer); + }; + + /** + * Get the recognition result + * @returns {Object} - Recognition result as JSON + */ + this.getResult = function() { + const resultPtr = this.Module.ccall( + 'SherpaOnnxGetOfflineStreamResultAsJson', + 'number', + ['number'], + [this.handle] + ); + + const jsonStr = this.Module.UTF8ToString(resultPtr); + const result = JSON.parse(jsonStr); + + this.Module.ccall( + 'SherpaOnnxDestroyOfflineStreamResultJson', + 'null', + ['number'], + [resultPtr] + ); + + return result; + }; + + /** + * Free the stream + */ + this.free = function() { + if (this.handle) { + this.Module.ccall( + 'SherpaOnnxDestroyOfflineStream', + 'null', + ['number'], + [this.handle] + ); + this.handle = null; + } + }; + }; + + // For Node.js environments + if (typeof module !== 'undefined' && module.exports) { + module.exports = SherpaOnnx; + } +})(typeof window !== 'undefined' ? window : global); \ No newline at end of file diff --git a/wasm/combined/sherpa-onnx-combined.js b/wasm/combined/sherpa-onnx-combined.js new file mode 100644 index 0000000000..ac37fc3a1c --- /dev/null +++ b/wasm/combined/sherpa-onnx-combined.js @@ -0,0 +1,246 @@ +/** + * sherpa-onnx-combined.js + * + * Loader for all Sherpa-ONNX modules + */ + +(function(global) { + // Auto-detect script path to handle loading from different directories + function getScriptPath() { + // For browser environments + if (typeof document !== 'undefined') { + const scripts = document.getElementsByTagName('script'); + for (let i = 0; i < scripts.length; i++) { + const src = scripts[i].src; + if (src.indexOf('sherpa-onnx-combined.js') !== -1) { + // Return the directory path of the script + return src.substring(0, src.lastIndexOf('/') + 1); + } + } + } + // Default path if we can't detect + return ''; + } + + // Get the base path where all JS modules are located + const basePath = getScriptPath(); + console.log("Detected script base path:", basePath); + + // Define module paths relative to the base path + const defaultModules = [ + 'sherpa-onnx-core.js', + 'sherpa-onnx-vad.js', + 'sherpa-onnx-asr.js', + 'sherpa-onnx-tts.js', + 'sherpa-onnx-speaker.js', + 'sherpa-onnx-enhancement.js', + 'sherpa-onnx-kws.js' + ]; + + // Use custom module paths if provided, otherwise use defaults with base path + let modulePaths; + if (typeof window !== 'undefined' && window.sherpaOnnxModulePaths) { + console.log("Using custom module paths from window.sherpaOnnxModulePaths"); + modulePaths = window.sherpaOnnxModulePaths; + } else if (global.sherpaOnnxModulePaths) { + console.log("Using custom module paths from global.sherpaOnnxModulePaths"); + modulePaths = global.sherpaOnnxModulePaths; + } else { + // Apply base path to each module + modulePaths = defaultModules.map(module => basePath + module); + console.log("Using default module paths with detected base path:", modulePaths); + } + + // Keep track of loaded modules + let loadedModules = {}; + let modulesLoading = false; + + // Keep track of active resources to clean up + let activeResources = { + asr: [], + tts: [], + vad: [], + speaker: [], + enhancement: [], + kws: [] + }; + + // Async loader for scripts + const loadScript = function(url) { + return new Promise((resolve, reject) => { + const script = document.createElement('script'); + script.src = url; + script.async = true; + + script.onload = () => { + console.log(`Module ${url} loaded successfully`); + loadedModules[url] = true; + resolve(); + }; + + script.onerror = (e) => { + console.error(`Failed to load script: ${url}`, e); + loadedModules[url] = false; + // Continue loading other modules even if one fails + resolve(); + }; + + document.head.appendChild(script); + }); + }; + + // Check if core module is available + const ensureCoreModule = function() { + if (!global.SherpaOnnx) { + console.error("SherpaOnnx core module not loaded! Other modules will not function properly."); + return false; + } + return true; + }; + + // Load modules in sequence to ensure proper initialization + const loadModulesSequentially = async function() { + if (modulesLoading) return; + + modulesLoading = true; + + try { + // Load core module first since other modules depend on it + console.log("Loading SherpaOnnx core module from: " + modulePaths[0]); + await loadScript(modulePaths[0]); // Use the first module from the paths array + + if (!ensureCoreModule()) { + throw new Error("Failed to load core module"); + } + + // Load the rest of the modules sequentially + for (let i = 1; i < modulePaths.length; i++) { + console.log(`Loading module ${i+1}/${modulePaths.length}: ${modulePaths[i]}`); + await loadScript(modulePaths[i]); + } + + // Check if all critical modules are loaded + let allLoaded = true; + let missingModules = []; + + for (const module of modulePaths) { + if (!loadedModules[module]) { + allLoaded = false; + missingModules.push(module); + } + } + + if (!allLoaded) { + console.warn(`Not all modules loaded successfully. Missing: ${missingModules.join(', ')}`); + } else { + console.log("All SherpaOnnx modules loaded successfully"); + } + + // Add resource tracking and cleanup methods after modules are loaded + if (global.SherpaOnnx) { + // Add resource tracking methods + global.SherpaOnnx.trackResource = function(type, resource) { + if (activeResources[type]) { + activeResources[type].push(resource); + } + return resource; + }; + + // Add cleanup methods + global.SherpaOnnx.cleanup = function(type) { + if (!type) { + // Clean up all resource types if no specific type is provided + Object.keys(activeResources).forEach(t => this.cleanup(t)); + return; + } + + if (activeResources[type]) { + const resources = activeResources[type]; + console.log(`Cleaning up ${resources.length} ${type} resources`); + + for (let i = resources.length - 1; i >= 0; i--) { + try { + if (resources[i] && typeof resources[i].free === 'function') { + resources[i].free(); + } + resources.splice(i, 1); + } catch (e) { + console.error(`Error cleaning up ${type} resource:`, e); + } + } + } + }; + + // Add convenience methods for each resource type + global.SherpaOnnx.cleanupASR = function() { this.cleanup('asr'); }; + global.SherpaOnnx.cleanupTTS = function() { this.cleanup('tts'); }; + global.SherpaOnnx.cleanupVAD = function() { this.cleanup('vad'); }; + global.SherpaOnnx.cleanupSpeaker = function() { this.cleanup('speaker'); }; + global.SherpaOnnx.cleanupEnhancement = function() { this.cleanup('enhancement'); }; + global.SherpaOnnx.cleanupKWS = function() { this.cleanup('kws'); }; + } + + // Call ready callback if defined + if (global.onSherpaOnnxReady) { + console.log("Calling onSherpaOnnxReady callback"); + global.onSherpaOnnxReady(allLoaded, missingModules); + } + } catch (error) { + console.error("Error during module loading:", error); + + if (global.onSherpaOnnxReady) { + global.onSherpaOnnxReady(false, error); + } + } finally { + modulesLoading = false; + } + }; + + // Main initialization function + const initialize = function() { + // Browser environment: load scripts + if (typeof window !== 'undefined') { + // Set up a backup timeout to ensure callback is called even if loading fails + const timeoutPromise = new Promise((resolve) => { + setTimeout(() => { + console.warn("Module loading timeout reached - some modules may not have loaded correctly"); + resolve(); + }, 30000); // 30 second timeout + }); + + // Load modules with timeout protection + Promise.race([loadModulesSequentially(), timeoutPromise]) + .catch(error => { + console.error("Module loading failed:", error); + + if (global.onSherpaOnnxReady) { + global.onSherpaOnnxReady(false, error); + } + }); + } + }; + + // Check if WASM module is already loaded + if (typeof global.Module !== 'undefined' && typeof global.Module.onRuntimeInitialized !== 'undefined') { + const originalOnRuntimeInitialized = global.Module.onRuntimeInitialized; + + global.Module.onRuntimeInitialized = function() { + console.log("WASM module runtime initialized, now loading JavaScript modules"); + + if (originalOnRuntimeInitialized) { + originalOnRuntimeInitialized(); + } + + initialize(); + }; + } else { + // No WASM module yet, set up a listener + global.onModuleReady = function() { + console.log("WASM module ready, proceeding with module initialization"); + initialize(); + }; + + // Also start loading anyway in case the event was missed + setTimeout(initialize, 1000); + } +})(typeof window !== 'undefined' ? window : global); \ No newline at end of file diff --git a/wasm/combined/sherpa-onnx-core.js b/wasm/combined/sherpa-onnx-core.js new file mode 100644 index 0000000000..13386234d7 --- /dev/null +++ b/wasm/combined/sherpa-onnx-core.js @@ -0,0 +1,962 @@ +/** + * sherpa-onnx-core.js + * + * Core functionality for the SherpaOnnx WASM modules + */ + +(function(global) { + // Create main namespace + const SherpaOnnx = {}; + + // Check if Module already exists and extend it + if (typeof global.Module !== 'undefined') { + const originalOnRuntimeInitialized = global.Module.onRuntimeInitialized; + global.Module.onRuntimeInitialized = function() { + console.log("SherpaOnnx Core module initialized"); + if (originalOnRuntimeInitialized) originalOnRuntimeInitialized(); + if (global.onModuleReady) global.onModuleReady(); + }; + } + + // Common utilities for memory management and shared functionality + SherpaOnnx.Utils = { + /** + * Free configuration memory allocated in WASM + * @param {Object} config - Configuration object with allocated memory + * @param {Object} Module - WebAssembly module + */ + freeConfig: function(config, Module) { + if (!config) return; + + if ('buffer' in config) { + Module._free(config.buffer); + } + + if ('sileroVad' in config) { + this.freeConfig(config.sileroVad, Module); + } + + if (config.ptr) { + Module._free(config.ptr); + } + }, + + /** + * Copy string to WASM heap and return pointer + * @param {string} str - String to allocate + * @param {Object} Module - WebAssembly module + * @returns {Object} Object with pointer and length + */ + allocateString: function(str, Module) { + if (!str) str = ''; + const strLen = Module.lengthBytesUTF8(str) + 1; + const strPtr = Module._malloc(strLen); + Module.stringToUTF8(str, strPtr, strLen); + return { ptr: strPtr, len: strLen }; + } + }; + + // File system utilities for model loading + SherpaOnnx.FileSystem = { + /** + * Safely create a directory in the WASM filesystem + * Handles cases where the path already exists as a file + * @param {string} dirPath - Path of the directory to create + * @param {boolean} debug - Whether to output debug logs + * @returns {boolean|Object} - True if successful or object with alternative path + */ + safeCreateDirectory: function(dirPath, debug = false) { + try { + // Skip empty paths + if (!dirPath || dirPath === '') { + if (debug) console.log("Empty directory path, skipping"); + return true; + } + + if (debug) console.log(`Creating directory: ${dirPath}`); + + // Generate a unique directory path to avoid conflicts + const timestamp = Date.now(); + const random = Math.floor(Math.random() * 100000); + const uniquePath = `temp_${timestamp}_${random}`; + + try { + // Create the temporary directory first + if (debug) console.log(`Creating unique temporary directory: ${uniquePath}`); + global.Module.FS.mkdir(uniquePath, 0o777); + + // Then create our target directory inside the unique temp directory + const safePath = `${uniquePath}/${dirPath}`; + if (debug) console.log(`Creating directory in safe location: ${safePath}`); + + // Create all directories in the path + let currentPath = uniquePath; + const parts = dirPath.split('/'); + + for (const part of parts) { + if (!part) continue; + currentPath += '/' + part; + try { + global.Module.FS.mkdir(currentPath, 0o777); + if (debug) console.log(`Created directory component: ${currentPath}`); + } catch (mkErr) { + if (mkErr.errno !== 17) { // Not EEXIST + console.error(`Error creating directory component ${currentPath}:`, mkErr); + throw mkErr; + } + } + } + + if (debug) console.log(`Successfully created nested directory at ${safePath}`); + + // Return the full alternative path + return { + success: true, + altPath: safePath + }; + } catch (nestErr) { + console.error(`Failed to create nested directory structure:`, nestErr); + + // Try a different approach - directly creating a unique directory + const directUniquePath = `${dirPath}_${timestamp}_${random}`; + try { + if (debug) console.log(`Trying direct unique path creation: ${directUniquePath}`); + global.Module.FS.mkdir(directUniquePath, 0o777); + if (debug) console.log(`Created unique directory: ${directUniquePath}`); + + return { + success: true, + altPath: directUniquePath + }; + } catch (directErr) { + console.error(`Failed to create directory with unique name:`, directErr); + + // Last attempt - try creating the original directory + try { + global.Module.FS.mkdir(dirPath, 0o777); + if (debug) console.log(`Successfully created original directory: ${dirPath}`); + return true; + } catch (origErr) { + // If it exists and is a directory, that's fine + if (origErr.errno === 17) { // EEXIST + try { + const stat = global.Module.FS.stat(dirPath); + if (stat.isDir) { + if (debug) console.log(`Directory ${dirPath} already exists`); + return true; + } + } catch (statErr) { + console.error(`Error checking if ${dirPath} is a directory:`, statErr); + } + } + + console.error(`All attempts to create directory failed:`, origErr); + throw origErr; + } + } + } + } catch (error) { + console.error(`Failed to create directory ${dirPath}:`, error); + return false; + } + }, + + /** + * Safely load a file from a URL into the WASM file system + * Takes care of creating parent directories and verifying the file was written + * @param {string} url - URL to fetch the file from + * @param {string} localPath - Path where to save the file in WASM filesystem + * @param {boolean} debug - Whether to output debug logs + * @returns {Promise} - True if successful, false otherwise + */ + safeLoadFile: async function(url, localPath, debug = false) { + try { + if (debug) console.log(`Loading file from ${url} to ${localPath}`); + + // Get the directory + const lastSlash = localPath.lastIndexOf('/'); + let targetPath = localPath; + + if (lastSlash > 0) { + const dirPath = localPath.substring(0, lastSlash); + if (debug) console.log(`Ensuring directory exists: ${dirPath}`); + + const dirResult = this.safeCreateDirectory(dirPath, debug); + + // Check if we need to use an alternate path + if (dirResult && typeof dirResult === 'object' && dirResult.altPath) { + // Adjust the target path to use the alternate directory path + targetPath = `${dirResult.altPath}/${localPath.substring(lastSlash + 1)}`; + if (debug) console.log(`Using alternate target path: ${targetPath}`); + } else if (!dirResult) { + throw new Error(`Failed to create directory ${dirPath}`); + } + } + + // Fetch the file + if (debug) console.log(`Fetching ${url}`); + const response = await fetch(url); + + if (!response.ok) { + throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`); + } + + const buffer = await response.arrayBuffer(); + + if (!buffer || buffer.byteLength === 0) { + throw new Error(`Empty response from ${url}`); + } + + if (debug) console.log(`Downloaded ${url}, size: ${buffer.byteLength} bytes`); + + // Write the file + try { + global.Module.FS.writeFile(targetPath, new Uint8Array(buffer)); + + // Verify the file was written + try { + const stat = global.Module.FS.stat(targetPath); + if (debug) console.log(`File written to ${targetPath}, size: ${stat.size} bytes`); + } catch (statErr) { + throw new Error(`Failed to verify file was written: ${statErr.message}`); + } + + // Return both the success status and the actual path used + return { + success: true, + path: targetPath + }; + } catch (writeErr) { + console.error(`Error writing file to ${targetPath}:`, writeErr); + throw writeErr; + } + } catch (error) { + console.error(`Error loading ${url}:`, error); + return false; + } + }, + + /** + * Check if a file exists in the WASM filesystem + * @param {string} path - Path to check + * @returns {boolean} - True if file exists, false otherwise + */ + fileExists: function(path) { + try { + global.Module.FS.stat(path); + return true; + } catch (e) { + return false; + } + }, + + /** + * Check if path exists and is a directory + * @param {string} path - Path to check + * @returns {boolean} - True if path exists and is a directory, false otherwise + */ + isDirectory: function(path) { + try { + const stat = global.Module.FS.stat(path); + return stat.isDir; + } catch (e) { + return false; + } + }, + + /** + * Remove a file or directory from the WASM filesystem + * @param {string} path - Path to remove + * @param {boolean} debug - Whether to output debug logs + * @returns {boolean} - True if successful, false otherwise + */ + removePath: function(path, debug = false) { + try { + if (!this.fileExists(path)) { + if (debug) console.log(`Path ${path} doesn't exist, nothing to remove`); + return true; + } + + if (this.isDirectory(path)) { + if (debug) console.log(`Removing directory ${path}`); + global.Module.FS.rmdir(path); + } else { + if (debug) console.log(`Removing file ${path}`); + global.Module.FS.unlink(path); + } + + return true; + } catch (error) { + console.error(`Error removing path ${path}:`, error); + return false; + } + }, + + // Backward compatibility aliases - DIRECTLY COPY FUNCTIONALITY to avoid any reference issues + ensureDirectory: function(dirPath) { + console.log(`Using legacy ensureDirectory on path: ${dirPath}`); + + try { + // Skip empty paths + if (!dirPath || dirPath === '') { + console.log("Empty directory path, skipping"); + return true; + } + + console.log(`Creating directory: ${dirPath}`); + + // First check if the path exists + try { + const info = global.Module.FS.analyzePath(dirPath); + if (info.exists) { + const stat = global.Module.FS.stat(dirPath); + if (stat.isDir) { + console.log(`Directory ${dirPath} already exists`); + return true; + } else { + // It exists as a file, remove it first + console.log(`Path ${dirPath} exists as a file, removing it`); + global.Module.FS.unlink(dirPath); + // Then create as directory + global.Module.FS.mkdir(dirPath, 0o777); + console.log(`Successfully created directory at ${dirPath}`); + return true; + } + } else { + // Path doesn't exist, create it + global.Module.FS.mkdir(dirPath, 0o777); + console.log(`Created new directory at ${dirPath}`); + return true; + } + } catch (e) { + if (e.errno === 44 || e.errno === 2) { // ENOENT - path doesn't exist + // Create the directory + global.Module.FS.mkdir(dirPath, 0o777); + console.log(`Created directory at ${dirPath}`); + return true; + } else if (e.errno === 17) { // EEXIST - already exists + console.log(`Directory ${dirPath} already exists`); + return true; + } else { + console.error(`Error creating directory ${dirPath}:`, e); + throw e; + } + } + } catch (error) { + console.error(`Failed to create directory ${dirPath}:`, error); + return false; + } + }, + + loadFile: async function(url, localPath) { + console.log(`DEBUG: DIRECT loadFile called with url: ${url}, localPath: ${localPath}`); + + try { + console.log(`Loading file from ${url} to ${localPath}`); + + // Get the directory + const lastSlash = localPath.lastIndexOf('/'); + if (lastSlash > 0) { + const dirPath = localPath.substring(0, lastSlash); + console.log(`Ensuring directory exists: ${dirPath}`); + + // Use the ensureDirectory function directly to avoid any reference issues + this.ensureDirectory(dirPath); + } + + // Fetch the file + console.log(`Fetching ${url}`); + const response = await fetch(url); + + if (!response.ok) { + throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`); + } + + const buffer = await response.arrayBuffer(); + + if (!buffer || buffer.byteLength === 0) { + throw new Error(`Empty response from ${url}`); + } + + console.log(`Downloaded ${url}, size: ${buffer.byteLength} bytes`); + + // Write the file + try { + global.Module.FS.writeFile(localPath, new Uint8Array(buffer)); + + // Verify the file was written + try { + const stat = global.Module.FS.stat(localPath); + console.log(`File written to ${localPath}, size: ${stat.size} bytes`); + } catch (statErr) { + throw new Error(`Failed to verify file was written: ${statErr.message}`); + } + + return true; + } catch (writeErr) { + console.error(`Error writing file to ${localPath}:`, writeErr); + throw writeErr; + } + } catch (error) { + console.error(`Error loading ${url}:`, error); + return false; + } + }, + + /** + * Create a model directory with a guaranteed unique path to avoid conflicts + * Will create a completely new unique directory for models + * + * @param {string} baseName - Base name for the model directory + * @param {boolean} debug - Whether to enable debug logging + * @returns {Promise} - Object with success status and path information + */ + createModelDirectory: async function(baseName, debug = false) { + try { + if (!baseName || typeof baseName !== 'string') { + baseName = 'model-dir'; + } + + // Generate a unique path with timestamp and random ID + const timestamp = Date.now(); + const randomId = Math.floor(Math.random() * 1000000); + const uniqueDirName = `${baseName}_${timestamp}_${randomId}`; + + if (debug) console.log(`Creating unique model directory: ${uniqueDirName}`); + + try { + // Create the directory + global.Module.FS.mkdir(uniqueDirName, 0o777); + + if (debug) console.log(`Successfully created unique model directory: ${uniqueDirName}`); + + return { + success: true, + baseName: baseName, + uniquePath: uniqueDirName, + timestamp: timestamp, + randomId: randomId + }; + } catch (error) { + console.error(`Failed to create unique model directory: ${uniqueDirName}`, error); + + // Try a different random ID + const newRandomId = Math.floor(Math.random() * 1000000); + const backupDirName = `backup_${baseName}_${timestamp}_${newRandomId}`; + + if (debug) console.log(`Trying backup directory name: ${backupDirName}`); + + try { + global.Module.FS.mkdir(backupDirName, 0o777); + + if (debug) console.log(`Successfully created backup model directory: ${backupDirName}`); + + return { + success: true, + baseName: baseName, + uniquePath: backupDirName, + timestamp: timestamp, + randomId: newRandomId, + isBackup: true + }; + } catch (backupError) { + console.error(`Failed to create backup model directory: ${backupDirName}`, backupError); + return { + success: false, + error: backupError + }; + } + } + } catch (error) { + console.error(`Error in createModelDirectory:`, error); + return { + success: false, + error: error + }; + } + }, + + /** + * Extract a zip file to the WASM filesystem + * @param {ArrayBuffer} zipData - The zip file data + * @param {string} targetPath - Target extraction path + * @param {boolean} debug - Enable debug logging + * @returns {Promise} - Result of extraction + */ + extractZip: async function(zipData, targetPath, debug = false) { + if (debug) console.log(`Extracting zip to ${targetPath}`); + + try { + // Force clean the target path if it exists as a file + try { + const stat = global.Module.FS.stat(targetPath); + const isFile = (stat.mode & 61440) === 32768; + if (isFile) { + if (debug) console.log(`Target path ${targetPath} exists as a FILE - removing it`); + global.Module.FS.unlink(targetPath); + } + } catch (err) { + // Path doesn't exist, which is fine + } + + // Make sure the base directory exists + try { + this.mkdirp(targetPath); + if (debug) console.log(`Created base directory ${targetPath}`); + } catch (dirErr) { + console.error(`Failed to create base directory ${targetPath}: ${dirErr.message}`); + return { + success: false, + error: `Failed to create target directory: ${dirErr.message}` + }; + } + + // Load JSZip from CDN if needed + if (typeof JSZip === 'undefined') { + if (debug) console.log("Loading JSZip library from CDN"); + await new Promise((resolve, reject) => { + const script = document.createElement('script'); + script.src = 'https://cdnjs.cloudflare.com/ajax/libs/jszip/3.10.1/jszip.min.js'; + script.onload = resolve; + script.onerror = reject; + document.head.appendChild(script); + }); + + if (typeof JSZip === 'undefined') { + throw new Error("Failed to load JSZip library"); + } + } + + // Process the zip file + const zip = await JSZip.loadAsync(zipData); + const extractedFiles = []; + + // First, create all directories + const directories = new Set(); + for (const path in zip.files) { + const file = zip.files[path]; + + if (file.dir) { + // Add directory path + directories.add(`${targetPath}/${path}`); + } else { + // Add parent directory path for files + const dirPath = path.substring(0, path.lastIndexOf('/')); + if (dirPath) { + directories.add(`${targetPath}/${dirPath}`); + } + } + } + + // Create directories in sorted order to ensure parents are created first + if (debug) console.log(`Creating ${directories.size} directories`); + const sortedDirs = [...directories].sort((a, b) => a.split('/').length - b.split('/').length); + for (const dir of sortedDirs) { + try { + this.mkdirp(dir); + } catch (e) { + console.warn(`Error creating directory ${dir}: ${e.message}`); + } + } + + // Now extract all files + for (const path in zip.files) { + const file = zip.files[path]; + if (file.dir) continue; // Skip directories, already created + + try { + // Create the full path + const fullPath = `${targetPath}/${path}`; + + // Extract and write the file + const content = await file.async('arraybuffer'); + FS.writeFile(fullPath, new Uint8Array(content)); + extractedFiles.push(fullPath); + + if (debug && extractedFiles.length % 50 === 0) { + console.log(`Extracted ${extractedFiles.length} files so far...`); + } + } catch (fileErr) { + console.error(`Error extracting file ${path}: ${fileErr.message}`); + } + } + + if (debug) console.log(`Successfully extracted ${extractedFiles.length} files`); + return { success: true, files: extractedFiles }; + } catch (error) { + console.error(`Error extracting zip: ${error.message}`); + return { success: false, error: error.message }; + } + }, + + /** + * Create directory and parents if needed + * @param {string} dirPath - Directory path + */ + mkdirp: function(dirPath) { + if (!dirPath || dirPath === '/') return; + + const parts = dirPath.split('/').filter(p => p); + let current = ''; + + for (const part of parts) { + current += '/' + part; + try { + const stat = global.Module.FS.stat(current); + // Only continue if it's a directory + if ((stat.mode & 61440) !== 16384) { // Not a directory (S_IFDIR = 16384) + console.error(`Path ${current} exists but is not a directory`); + + // Try to delete it if it's a file + if ((stat.mode & 61440) === 32768) { // Is a file (S_IFREG = 32768) + console.log(`Removing file at ${current} to create directory`); + global.Module.FS.unlink(current); + global.Module.FS.mkdir(current); + } else { + throw new Error(`Path exists but is not a directory: ${current}`); + } + } + } catch (e) { + // ENOENT error means directory doesn't exist, so create it + if (e.errno === 44 || e.errno === 2 || e.message.includes('No such file or directory')) { + try { + global.Module.FS.mkdir(current); + } catch (mkdirErr) { + console.error(`Failed to create directory ${current}:`, mkdirErr); + throw mkdirErr; + } + } else { + console.error(`Error processing path ${current}:`, e); + throw e; // Rethrow other errors + } + } + } + + // Verify the directory was created + try { + const stat = global.Module.FS.stat(dirPath); + if ((stat.mode & 61440) !== 16384) { // Not a directory + throw new Error(`Path ${dirPath} was created but is not a directory`); + } + } catch (verifyErr) { + console.error(`Failed to verify directory ${dirPath}:`, verifyErr); + throw verifyErr; + } + }, + + /** + * Prepare a model directory and load files + * @param {Array} files - List of files to prepare + * @param {string} baseDir - Base directory for the model + * @param {boolean} debug - Enable debug logging + * @returns {Promise} - Result of the preparation + */ + prepareModelDirectory: async function(files, baseDir = 'models', debug = false) { + if (debug) console.log(`Preparing model directory with base: ${baseDir}`); + + try { + // Create a unique directory name with random suffix to avoid conflicts + const uniqueSuffix = Math.random().toString(36).substring(2, 10); + const uniqueDir = `${baseDir}-${uniqueSuffix}`; + + if (debug) console.log(`Creating model directory: ${uniqueDir}`); + + // Force clean any problematic paths before creating new directories + this.forceCleanPaths(baseDir, uniqueDir, debug); + + // Track results for each file + const fileResults = []; + + // Process each file in the file list + const archiveFiles = files.filter(f => f.isZip); + const regularFiles = files.filter(f => !f.isZip); + + // First process all regular files to ensure the model directory is created + for (const file of regularFiles) { + try { + if (file.content) { + // Write string content directly to file + const filename = this.joinPaths(uniqueDir, file.filename); + const directoryPath = filename.substring(0, filename.lastIndexOf('/')); + + if (debug) console.log(`Writing content to ${filename}`); + + // Ensure the directory exists + this.mkdirp(directoryPath); + + // Write the file + FS.writeFile(filename, file.content); + + fileResults.push({ + success: true, + path: filename, + original: file + }); + } else if (file.url) { + // Load file from URL + if (debug) console.log(`Fetching file from ${file.url}`); + const response = await fetch(file.url); + + if (!response.ok) { + console.error(`Failed to fetch ${file.url}: ${response.status} ${response.statusText}`); + fileResults.push({ + success: false, + error: `HTTP error: ${response.status}`, + original: file + }); + continue; + } + + // Write the downloaded file + const filename = this.joinPaths(uniqueDir, file.filename); + const directoryPath = filename.substring(0, filename.lastIndexOf('/')); + + if (debug) console.log(`Writing downloaded file to ${filename}`); + + // Ensure the directory exists + this.mkdirp(directoryPath); + + // Get binary data and write to file + const arrayBuffer = await response.arrayBuffer(); + FS.writeFile(filename, new Uint8Array(arrayBuffer), { encoding: 'binary' }); + + fileResults.push({ + success: true, + path: filename, + original: file + }); + } else { + console.error('Invalid file specification: no content or URL'); + fileResults.push({ + success: false, + error: 'Invalid file specification', + original: file + }); + } + } catch (error) { + console.error(`Error processing file: ${error.message}`); + fileResults.push({ + success: false, + error: error.message, + original: file + }); + } + } + + // Now process archives with the correct model directory path + for (const file of archiveFiles) { + try { + if (debug) console.log(`Fetching archive from ${file.url}`); + const response = await fetch(file.url); + + if (!response.ok) { + console.error(`Failed to fetch ${file.url}: ${response.status} ${response.statusText}`); + fileResults.push({ + success: false, + error: `HTTP error: ${response.status}`, + original: file + }); + continue; + } + + if (debug) console.log(`Processing archive ${file.url}`); + const zipData = await response.arrayBuffer(); + + // Set the extract path to the created model directory if not specified + const extractPath = file.extractToPath || uniqueDir; + + // Clean existing files if requested + if (file.cleanBeforeExtract) { + if (debug) console.log(`Cleaning before extraction at ${extractPath}`); + try { + // Create the directory if it doesn't exist + this.mkdirp(extractPath); + + // Remove any existing espeak-ng-data directory + const espeakDir = `${extractPath}/espeak-ng-data`; + try { + FS.stat(espeakDir); + if (debug) console.log(`Removing existing directory: ${espeakDir}`); + this.removePath(espeakDir); + } catch (e) { + // Directory doesn't exist, which is fine + } + } catch (cleanErr) { + console.warn(`Could not clean extraction path: ${cleanErr.message}`); + } + } + + const extractResult = await this.extractZip(zipData, extractPath, debug); + + if (extractResult.success) { + fileResults.push({ + success: true, + path: extractPath, + original: file, + extractedFiles: extractResult.files + }); + } else { + fileResults.push({ + success: false, + error: extractResult.error, + original: file + }); + } + } catch (error) { + console.error(`Error processing archive file: ${error.message}`); + fileResults.push({ + success: false, + error: error.message, + original: file + }); + } + } + + // Check if any files failed to load + const success = fileResults.some(result => result.success); + + if (debug) { + console.log(`Model preparation ${success ? 'successful' : 'partially successful'}`); + console.log(`Loaded ${fileResults.filter(r => r.success).length} of ${fileResults.length} files`); + } + + return { + modelDir: uniqueDir, + success, + files: fileResults + }; + } catch (error) { + console.error(`Error in prepareModelDirectory:`, error); + return { + success: false, + error: error.message + }; + } + }, + + /** + * Join path segments properly + * @param {...string} paths - Path segments to join + * @returns {string} - Joined path + */ + joinPaths: function(...paths) { + return paths.join('/').replace(/\/+/g, '/'); + }, + + /** + * Ensure a directory exists, creating it if necessary + * @param {string} dirPath - Directory path to ensure + */ + ensureDirectory: function(dirPath) { + if (!dirPath) return; + + // Skip if it's the root directory + if (dirPath === '/') return; + + try { + // Check if directory exists + const stat = FS.stat(dirPath); + if (stat.isDir) return; // Already exists - using isDir property, not a function + throw new Error(`Path exists but is not a directory: ${dirPath}`); + } catch (error) { + // If error is that the path doesn't exist, create it + if (error.errno === 44 || error.message.includes('No such file or directory')) { + // Ensure parent directory exists first + const parentDir = dirPath.split('/').slice(0, -1).join('/'); + if (parentDir) this.ensureDirectory(parentDir); + + // Create this directory + FS.mkdir(dirPath); + return; + } + + // For other errors, rethrow + throw error; + } + }, + + /** + * Debug the filesystem by listing key directories + * @param {boolean} debug - Enable debug output + */ + debugFilesystem: function(debug = true) { + if (!debug) return; + + try { + console.log("--- FILESYSTEM DEBUG ---"); + + // List root directory + const rootEntries = global.Module.FS.readdir('/'); + console.log("Root directory contents:", rootEntries); + + // Check relevant TTS model directories + for (const entry of rootEntries) { + // Only check TTS-related directories + if (entry === 'tts-models' || entry.startsWith('tts-models-')) { + try { + const stat = global.Module.FS.stat('/' + entry); + const isDir = (stat.mode & 61440) === 16384; // S_IFDIR + + if (isDir) { + const subEntries = global.Module.FS.readdir('/' + entry); + console.log(`Contents of /${entry}:`, subEntries); + } else { + console.log(`/${entry}: Not a directory`); + } + } catch (err) { + console.log(`Error checking /${entry}:`, err.message); + } + } + } + + console.log("--- END FILESYSTEM DEBUG ---"); + } catch (err) { + console.log("Filesystem debug error:", err.message); + } + }, + + /** + * Clean up paths that may cause conflicts + * @param {string} modelDir - The model directory + * @param {string} uniqueDir - The unique model directory + * @param {boolean} debug - Enable debug output + */ + forceCleanPaths: function(modelDir, uniqueDir, debug = false) { + try { + if (debug) console.log(`Cleaning paths: ${modelDir}, ${uniqueDir}`); + + // Clean up common problematic paths + const pathsToClean = [ + '/espeak-ng-data', + `/${modelDir}`, + `/${uniqueDir}` + ]; + + for (const path of pathsToClean) { + try { + // Check if path exists + const stat = global.Module.FS.stat(path); + const type = stat.mode & 61440; + const isFile = type === 32768; + const isDir = type === 16384; + + if (isFile) { + if (debug) console.log(`Removing file at ${path}`); + global.Module.FS.unlink(path); + } else if (isDir) { + if (debug) console.log(`Removing directory at ${path}`); + this.removePath(path, debug); + } + } catch (err) { + // Path doesn't exist, which is fine + } + } + } catch (err) { + console.error("Error cleaning paths:", err.message); + } + } + }; + + // Expose SherpaOnnx to the global object + global.SherpaOnnx = SherpaOnnx; +})(typeof window !== 'undefined' ? window : global); \ No newline at end of file diff --git a/wasm/combined/sherpa-onnx-enhancement.js b/wasm/combined/sherpa-onnx-enhancement.js new file mode 100644 index 0000000000..0b833c8548 --- /dev/null +++ b/wasm/combined/sherpa-onnx-enhancement.js @@ -0,0 +1,96 @@ +/** + * sherpa-onnx-enhancement.js + * + * Speech Enhancement functionality for SherpaOnnx + * Requires sherpa-onnx-core.js to be loaded first + */ + +(function(global) { + // Ensure the namespace exists + if (!global.SherpaOnnx) { + console.error('SherpaOnnx namespace not found. Make sure to load sherpa-onnx-core.js first.'); + return; + } + + // Get a reference to the SherpaOnnx namespace + const SherpaOnnx = global.SherpaOnnx; + + // Create or use existing SpeechEnhancement namespace + SherpaOnnx.SpeechEnhancement = SherpaOnnx.SpeechEnhancement || {}; + + // Define the SpeechEnhancement module functionality + SherpaOnnx.SpeechEnhancement = { + /** + * Load a Speech Enhancement model from URL + * @param {Object} modelConfig - Configuration for the model + * @returns {Promise} - Information about the loaded model + */ + loadModel: async function(modelConfig) { + const modelDir = modelConfig.modelDir || 'speech-enhancement-models'; + + try { + global.Module.FS.mkdir(modelDir, 0o777); + } catch(e) { + if (e.code !== 'EEXIST') throw e; + } + + // Load the model + await SherpaOnnx.FileSystem.loadFile(modelConfig.model || 'assets/enhancement/gtcrn.onnx', `${modelDir}/model.onnx`); + + return { + modelDir: modelDir + }; + }, + + /** + * Create a Speech Enhancement instance with the loaded model + * @param {Object} loadedModel - Model information returned by loadModel + * @param {Object} options - Additional configuration options + * @returns {SpeechEnhancer} - A Speech Enhancement instance + */ + createSpeechEnhancer: function(loadedModel, options = {}) { + // This is a placeholder for actual implementation + // In a real implementation, you would create the configuration + // and pass it to the WASM module + + const config = { + model: { + gtcrn: { + model: `${loadedModel.modelDir}/model.onnx` + }, + numThreads: options.numThreads || 1, + debug: options.debug !== undefined ? options.debug : 1, + provider: options.provider || 'cpu' + } + }; + + // In a real implementation, you would create and return an instance + // of a SpeechEnhancer class + + console.warn('Speech Enhancement implementation is not fully functional yet'); + + // Placeholder for the actual implementation + return { + config: config, + + // Placeholder methods that would normally interact with the WASM module + process: function(audioSamples, sampleRate) { + console.warn('SpeechEnhancement.process is a placeholder'); + return { + enhancedSamples: audioSamples, // Just return the original samples for now + sampleRate: sampleRate + }; + }, + + free: function() { + console.warn('SpeechEnhancement.free is a placeholder'); + } + }; + } + }; + + // For Node.js environments + if (typeof module !== 'undefined' && module.exports) { + module.exports = SherpaOnnx; + } +})(typeof window !== 'undefined' ? window : global); \ No newline at end of file diff --git a/wasm/combined/sherpa-onnx-kws.js b/wasm/combined/sherpa-onnx-kws.js new file mode 100644 index 0000000000..5f93a19625 --- /dev/null +++ b/wasm/combined/sherpa-onnx-kws.js @@ -0,0 +1,658 @@ +/** + * sherpa-onnx-kws.js + * + * Keyword Spotting functionality for SherpaOnnx + * Requires sherpa-onnx-core.js to be loaded first + */ + +(function(global) { + // Ensure the namespace exists + if (!global.SherpaOnnx) { + console.error('SherpaOnnx namespace not found. Make sure to load sherpa-onnx-core.js first.'); + return; + } + + // Get a reference to the SherpaOnnx namespace + const SherpaOnnx = global.SherpaOnnx; + + // Create or use existing KWS namespace + SherpaOnnx.KWS = SherpaOnnx.KWS || {}; + + // Define the KWS module functionality + SherpaOnnx.KWS = { + /** + * Load a KWS model from URLs + * @param {Object} modelConfig - Configuration for the model + * @returns {Promise} - Information about the loaded model + */ + loadModel: async function(modelConfig) { + const modelDir = modelConfig.modelDir || 'kws-models'; + const debug = modelConfig.debug || false; + + try { + global.Module.FS.mkdir(modelDir, 0o777); + } catch(e) { + if (e.code !== 'EEXIST') throw e; + } + + if (debug) console.log(`Loading KWS model files to ${modelDir}`); + + // Load model files and store the actual paths + const actualPaths = {}; + + // Load encoder + const encoderResult = await SherpaOnnx.FileSystem.safeLoadFile( + modelConfig.encoder || 'assets/kws/encoder.onnx', + `${modelDir}/encoder.onnx`, + debug + ); + actualPaths.encoder = encoderResult.path || `${modelDir}/encoder.onnx`; + if (debug) console.log(`Loaded encoder to ${actualPaths.encoder}`); + + // Load decoder + const decoderResult = await SherpaOnnx.FileSystem.safeLoadFile( + modelConfig.decoder || 'assets/kws/decoder.onnx', + `${modelDir}/decoder.onnx`, + debug + ); + actualPaths.decoder = decoderResult.path || `${modelDir}/decoder.onnx`; + if (debug) console.log(`Loaded decoder to ${actualPaths.decoder}`); + + // Load joiner + const joinerResult = await SherpaOnnx.FileSystem.safeLoadFile( + modelConfig.joiner || 'assets/kws/joiner.onnx', + `${modelDir}/joiner.onnx`, + debug + ); + actualPaths.joiner = joinerResult.path || `${modelDir}/joiner.onnx`; + if (debug) console.log(`Loaded joiner to ${actualPaths.joiner}`); + + // Load tokens file + const tokensResult = await SherpaOnnx.FileSystem.safeLoadFile( + modelConfig.tokens || 'assets/kws/tokens.txt', + `${modelDir}/tokens.txt`, + debug + ); + actualPaths.tokens = tokensResult.path || `${modelDir}/tokens.txt`; + if (debug) console.log(`Loaded tokens to ${actualPaths.tokens}`); + + // Load the tokens content for validation + try { + const tokensContent = global.Module.FS.readFile(actualPaths.tokens, { encoding: 'utf8' }); + actualPaths.tokensMap = this.parseTokensFile(tokensContent); + if (debug) console.log(`Parsed ${Object.keys(actualPaths.tokensMap).length} tokens`); + } catch (e) { + console.error(`Failed to read tokens file: ${e.message}`); + actualPaths.tokensMap = null; + } + + // Load keywords file if provided + if (modelConfig.keywordsFile) { + const keywordsResult = await SherpaOnnx.FileSystem.safeLoadFile( + modelConfig.keywordsFile, + `${modelDir}/keywords.txt`, + debug + ); + actualPaths.keywordsFile = keywordsResult.path || `${modelDir}/keywords.txt`; + if (debug) console.log(`Loaded keywords file to ${actualPaths.keywordsFile}`); + } + + return { + modelDir: modelDir, + paths: actualPaths + }; + }, + + /** + * Parse the tokens file to create a map of valid tokens + * @param {string} content - The content of the tokens file + * @returns {Object} - Map of tokens to their IDs + */ + parseTokensFile: function(content) { + const tokensMap = {}; + const lines = content.split('\n'); + + for (const line of lines) { + const parts = line.trim().split(' '); + if (parts.length >= 2) { + const token = parts[0]; + const id = parseInt(parts[1]); + if (!isNaN(id)) { + tokensMap[token] = id; + } + } + } + + return tokensMap; + }, + + /** + * Validate keywords against available tokens + * @param {string} keywords - The keywords to validate + * @param {Object} tokensMap - Map of valid tokens + * @returns {Object} - Validation result with formatted keywords + */ + validateKeywords: function(keywords, tokensMap) { + if (!tokensMap) return { valid: false, message: 'No tokens available for validation' }; + + const lines = keywords.split('\n'); + const validatedLines = []; + const invalidTokens = new Set(); + let isValid = true; + + for (const line of lines) { + // Skip empty lines + if (!line.trim()) continue; + + const parts = line.trim().split('@'); + let phonetic = parts[0].trim(); + const label = parts.length > 1 ? parts[1].trim() : phonetic; + + // Validate each token in the phonetic representation + const tokens = phonetic.split(' ').filter(t => t); + const validTokens = []; + + for (const token of tokens) { + if (token in tokensMap) { + validTokens.push(token); + } else { + invalidTokens.add(token); + isValid = false; + } + } + + const validatedLine = validTokens.join(' ') + ' @' + label; + validatedLines.push(validatedLine); + } + + return { + valid: isValid, + formattedKeywords: validatedLines.join('\n'), + invalidTokens: [...invalidTokens], + message: isValid ? 'All keywords are valid' : + `Invalid tokens: ${[...invalidTokens].join(', ')}` + }; + }, + + /** + * Create a Keyword Spotter with the loaded model + * @param {Object} loadedModel - Model information returned by loadModel + * @param {Object} options - Additional configuration options + * @returns {KeywordSpotter} - An instance of KeywordSpotter + */ + createKeywordSpotter: function(loadedModel, options = {}) { + const debug = options.debug || false; + + // Create transducer configuration using actual paths + const transducerConfig = { + encoder: loadedModel.paths.encoder, + decoder: loadedModel.paths.decoder, + joiner: loadedModel.paths.joiner, + }; + + // Create model configuration + const modelConfig = { + transducer: transducerConfig, + tokens: loadedModel.paths.tokens, + provider: options.provider || 'cpu', + numThreads: options.numThreads || 1, + debug: options.debug !== undefined ? options.debug : 1, + }; + + // Create feature configuration + const featConfig = { + samplingRate: options.sampleRate || 16000, + featureDim: options.featureDim || 80, + }; + + // First, create a keywords.txt file in the same directory as the tokens file + const tokensPath = loadedModel.paths.tokens; + const tokensDir = tokensPath.substring(0, tokensPath.lastIndexOf('/')); + const keywordsPath = `${tokensDir}/keywords.txt`; + + // Default keywords as individual characters + let defaultKeywords = + "h e l l o @Hello\n" + + "c o m p u t e r @Computer\n" + + "a l e x a @Alexa"; + + // Use provided keywords or default, then validate + let keywordsContent = options.keywords || defaultKeywords; + + // Validate the keywords against the tokens map if available + if (loadedModel.paths.tokensMap) { + const validationResult = this.validateKeywords(keywordsContent, loadedModel.paths.tokensMap); + + if (!validationResult.valid) { + console.warn(`Keyword validation failed: ${validationResult.message}`); + console.warn('Using only valid tokens for keywords'); + } + + keywordsContent = validationResult.formattedKeywords; + + if (debug) { + console.log(`Validation result:`, validationResult); + } + } + + try { + // Make sure file exists with absolute path + global.Module.FS.writeFile(keywordsPath, keywordsContent); + console.log(`Created keywords file at: ${keywordsPath}`); + + if (debug) { + console.log(`Keywords content: ${keywordsContent}`); + } + + // Verify the file is created + try { + const stat = global.Module.FS.stat(keywordsPath); + if (debug) console.log(`Keywords file exists, size: ${stat.size} bytes`); + } catch (e) { + console.error(`Failed to verify keywords file at ${keywordsPath}:`, e); + } + } catch (e) { + console.error('Failed to write keywords file:', e); + } + + // Create the KWS configuration + const configObj = { + featConfig: featConfig, + modelConfig: modelConfig, + maxActivePaths: options.maxActivePaths || 4, + numTrailingBlanks: options.numTrailingBlanks || 1, + keywordsScore: options.keywordsScore || 1.0, + keywordsThreshold: options.keywordsThreshold || 0.25, + keywordsFile: keywordsPath + }; + + if (debug) { + console.log('KWS Configuration:', JSON.stringify(configObj, null, 2)); + } + + // Create the KWS instance using the global createKws helper + if (typeof createKws === 'function') { + return createKws(global.Module, configObj); + } + + // Fall back to our implementation if global function not available + return new global.Kws(configObj, global.Module); + } + }; + + /** + * Wrapper for Stream class + */ + global.Stream = global.Stream || function(handle, Module) { + this.handle = handle; + this.Module = Module; + this.pointer = null; + this.n = 0; + + /** + * Free the stream + */ + this.free = function() { + if (this.handle) { + this.Module._SherpaOnnxDestroyOnlineStream(this.handle); + this.handle = null; + if (this.pointer) { + this.Module._free(this.pointer); + this.pointer = null; + this.n = 0; + } + } + }; + + /** + * Accept audio waveform data + * @param {number} sampleRate - Sample rate of the audio + * @param {Float32Array} samples - Audio samples in [-1, 1] range + */ + this.acceptWaveform = function(sampleRate, samples) { + if (this.n < samples.length) { + if (this.pointer) { + this.Module._free(this.pointer); + } + this.pointer = this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); + this.n = samples.length; + } + + this.Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT); + this.Module._SherpaOnnxOnlineStreamAcceptWaveform( + this.handle, sampleRate, this.pointer, samples.length); + }; + + /** + * Signal that input is finished + */ + this.inputFinished = function() { + this.Module._SherpaOnnxOnlineStreamInputFinished(this.handle); + }; + }; + + /** + * KeywordSpotter class + */ + global.Kws = global.Kws || function(configObj, Module) { + this.config = configObj; + + // Initialize the configuration + const config = initKwsConfig(configObj, Module); + const handle = Module._SherpaOnnxCreateKeywordSpotter(config.ptr); + + // Free the configuration + freeConfig(config, Module); + + this.handle = handle; + this.Module = Module; + + /** + * Free the keyword spotter + */ + this.free = function() { + this.Module._SherpaOnnxDestroyKeywordSpotter(this.handle); + this.handle = 0; + }; + + /** + * Create a stream for keyword spotting + * @returns {Stream} - A new stream for keyword spotting + */ + this.createStream = function() { + const handle = this.Module._SherpaOnnxCreateKeywordStream(this.handle); + return new global.Stream(handle, this.Module); + }; + + /** + * Check if the stream is ready for decoding + * @param {Stream} stream - The stream to check + * @returns {boolean} - True if ready, false otherwise + */ + this.isReady = function(stream) { + return this.Module._SherpaOnnxIsKeywordStreamReady( + this.handle, stream.handle) === 1; + }; + + /** + * Decode the audio in the stream for keyword spotting + * @param {Stream} stream - The stream to decode + */ + this.decode = function(stream) { + this.Module._SherpaOnnxDecodeKeywordStream(this.handle, stream.handle); + }; + + /** + * Reset the stream after keyword detection + * @param {Stream} stream - The stream to reset + */ + this.reset = function(stream) { + this.Module._SherpaOnnxResetKeywordStream(this.handle, stream.handle); + }; + + /** + * Get the keyword spotting result + * @param {Stream} stream - The stream to get results from + * @returns {Object} - Keyword spotting result as JSON + */ + this.getResult = function(stream) { + const r = this.Module._SherpaOnnxGetKeywordResult(this.handle, stream.handle); + const jsonPtr = this.Module.getValue(r + 24, 'i8*'); + const json = this.Module.UTF8ToString(jsonPtr); + this.Module._SherpaOnnxDestroyKeywordResult(r); + return JSON.parse(json); + }; + }; + + // For Node.js environments + if (typeof module !== 'undefined' && module.exports) { + module.exports = SherpaOnnx; + } +})(typeof window !== 'undefined' ? window : global); + +/** + * Initialize feature extractor configuration + */ +function initFeatureExtractorConfig(config, Module) { + const ptr = Module._malloc(4 * 2); + Module.setValue(ptr, config.samplingRate || 16000, 'i32'); + Module.setValue(ptr + 4, config.featureDim || 80, 'i32'); + return { + ptr: ptr, + len: 8, + }; +} + +/** + * Initialize transducer model configuration + */ +function initSherpaOnnxOnlineTransducerModelConfig(config, Module) { + const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1; + const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1; + const joinerLen = Module.lengthBytesUTF8(config.joiner) + 1; + + const n = encoderLen + decoderLen + joinerLen; + const buffer = Module._malloc(n); + + const len = 3 * 4; // 3 pointers + const ptr = Module._malloc(len); + + let offset = 0; + Module.stringToUTF8(config.encoder, buffer + offset, encoderLen); + offset += encoderLen; + + Module.stringToUTF8(config.decoder, buffer + offset, decoderLen); + offset += decoderLen; + + Module.stringToUTF8(config.joiner, buffer + offset, joinerLen); + + offset = 0; + Module.setValue(ptr, buffer + offset, 'i8*'); + offset += encoderLen; + + Module.setValue(ptr + 4, buffer + offset, 'i8*'); + offset += decoderLen; + + Module.setValue(ptr + 8, buffer + offset, 'i8*'); + + return { + buffer: buffer, + ptr: ptr, + len: len, + }; +} + +/** + * Initialize model configuration + */ +function initModelConfig(config, Module) { + if (!('tokensBuf' in config)) { + config.tokensBuf = ''; + } + + if (!('tokensBufSize' in config)) { + config.tokensBufSize = 0; + } + + const transducer = initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module); + const paraformer_len = 2 * 4; + const ctc_len = 1 * 4; + + const len = transducer.len + paraformer_len + ctc_len + 9 * 4; + const ptr = Module._malloc(len); + Module.HEAPU8.fill(0, ptr, ptr + len); + + let offset = 0; + Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset); + + const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1; + const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; + const buffer = Module._malloc(tokensLen + providerLen); + + offset = 0; + Module.stringToUTF8(config.tokens, buffer, tokensLen); + offset += tokensLen; + + Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen); + + offset = transducer.len + paraformer_len + ctc_len; + Module.setValue(ptr + offset, buffer, 'i8*'); + offset += 4; + + Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); + offset += 4; + + Module.setValue(ptr + offset, config.debug, 'i32'); + + return { + buffer: buffer, + ptr: ptr, + len: len, + transducer: transducer + }; +} + +/** + * Initialize KWS configuration + */ +function initKwsConfig(config, Module) { + if (!('featConfig' in config)) { + config.featConfig = { + samplingRate: 16000, + featureDim: 80, + }; + } + + if (!('keywordsBuf' in config)) { + config.keywordsBuf = ''; + } + + if (!('keywordsBufSize' in config)) { + config.keywordsBufSize = 0; + } + + const featConfig = initFeatureExtractorConfig(config.featConfig, Module); + const modelConfig = initModelConfig(config.modelConfig, Module); + const numBytes = featConfig.len + modelConfig.len + 4 * 7; + + const ptr = Module._malloc(numBytes); + let offset = 0; + Module._CopyHeap(featConfig.ptr, featConfig.len, ptr + offset); + offset += featConfig.len; + + Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset); + offset += modelConfig.len; + + Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.numTrailingBlanks || 1, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.keywordsScore || 1.0, 'float'); + offset += 4; + + Module.setValue(ptr + offset, config.keywordsThreshold || 0.25, 'float'); + offset += 4; + + // Handle keywords file + let keywordsFileBuffer = 0; + if (config.keywordsFile) { + const keywordsFileLen = Module.lengthBytesUTF8(config.keywordsFile) + 1; + keywordsFileBuffer = Module._malloc(keywordsFileLen); + Module.stringToUTF8(config.keywordsFile, keywordsFileBuffer, keywordsFileLen); + } + + // Set keywords_file + Module.setValue(ptr + offset, keywordsFileBuffer, 'i8*'); + offset += 4; + + // Set keywords_buf to 0 - we're using a file instead + Module.setValue(ptr + offset, 0, 'i8*'); + offset += 4; + + // Set keywords_buf_size to 0 + Module.setValue(ptr + offset, 0, 'i32'); + offset += 4; + + return { + ptr: ptr, + len: numBytes, + featConfig: featConfig, + modelConfig: modelConfig, + keywordsFileBuffer: keywordsFileBuffer + }; +} + +/** + * Free configuration memory + */ +function freeConfig(config, Module) { + if ('buffer' in config) { + Module._free(config.buffer); + } + + if ('transducer' in config) { + freeConfig(config.transducer, Module); + } + + if ('featConfig' in config) { + freeConfig(config.featConfig, Module); + } + + if ('modelConfig' in config) { + freeConfig(config.modelConfig, Module); + } + + if ('keywordsFileBuffer' in config && config.keywordsFileBuffer) { + Module._free(config.keywordsFileBuffer); + } + + Module._free(config.ptr); +} + +/** + * Global helper function to create a Kws instance + */ +function createKws(Module, myConfig) { + let transducerConfig = { + encoder: './encoder-epoch-12-avg-2-chunk-16-left-64.onnx', + decoder: './decoder-epoch-12-avg-2-chunk-16-left-64.onnx', + joiner: './joiner-epoch-12-avg-2-chunk-16-left-64.onnx', + }; + + let modelConfig = { + transducer: transducerConfig, + tokens: './tokens.txt', + provider: 'cpu', + modelType: '', + numThreads: 1, + debug: 1, + modelingUnit: 'cjkchar', + bpeVocab: '', + }; + + let featConfig = { + samplingRate: 16000, + featureDim: 80, + }; + + let configObj = { + featConfig: featConfig, + modelConfig: modelConfig, + maxActivePaths: 4, + numTrailingBlanks: 1, + keywordsScore: 1.0, + keywordsThreshold: 0.25, + // Use keywordsFile instead of keywords + keywordsFile: './keywords.txt' + }; + + if (myConfig) { + configObj = myConfig; + } + return new Kws(configObj, Module); +} \ No newline at end of file diff --git a/wasm/combined/sherpa-onnx-speaker.js b/wasm/combined/sherpa-onnx-speaker.js new file mode 100644 index 0000000000..5153983768 --- /dev/null +++ b/wasm/combined/sherpa-onnx-speaker.js @@ -0,0 +1,110 @@ +/** + * sherpa-onnx-speaker.js + * + * Speaker Diarization functionality for SherpaOnnx + * Requires sherpa-onnx-core.js to be loaded first + */ + +(function(global) { + // Ensure the namespace exists + if (!global.SherpaOnnx) { + console.error('SherpaOnnx namespace not found. Make sure to load sherpa-onnx-core.js first.'); + return; + } + + // Get a reference to the SherpaOnnx namespace + const SherpaOnnx = global.SherpaOnnx; + + // Create or use existing SpeakerDiarization namespace + SherpaOnnx.SpeakerDiarization = SherpaOnnx.SpeakerDiarization || {}; + + // Define the SpeakerDiarization module functionality + SherpaOnnx.SpeakerDiarization = { + /** + * Load Speaker Diarization models from URLs + * @param {Object} modelConfig - Configuration for the models + * @returns {Promise} - Information about the loaded models + */ + loadModel: async function(modelConfig) { + const modelDir = modelConfig.modelDir || 'speaker-diarization-models'; + + try { + global.Module.FS.mkdir(modelDir, 0o777); + } catch(e) { + if (e.code !== 'EEXIST') throw e; + } + + // Load segmentation and embedding models + await Promise.all([ + SherpaOnnx.FileSystem.loadFile(modelConfig.segmentation || 'assets/speakers/segmentation.onnx', `${modelDir}/segmentation.onnx`), + SherpaOnnx.FileSystem.loadFile(modelConfig.embedding || 'assets/speakers/embedding.onnx', `${modelDir}/embedding.onnx`) + ]); + + return { + modelDir: modelDir + }; + }, + + /** + * Create a Speaker Diarization instance with the loaded models + * @param {Object} loadedModel - Model information returned by loadModel + * @param {Object} options - Additional configuration options + * @returns {SpeakerDiarization} - A Speaker Diarization instance + */ + createSpeakerDiarization: function(loadedModel, options = {}) { + // This is a placeholder for actual implementation + // In a real implementation, you would create the configuration + // and pass it to the WASM module + + const config = { + segmentation: { + pyannote: { + model: `${loadedModel.modelDir}/segmentation.onnx` + }, + numThreads: options.numThreads || 1, + debug: options.debug !== undefined ? options.debug : 1, + provider: options.provider || 'cpu' + }, + embedding: { + model: `${loadedModel.modelDir}/embedding.onnx`, + numThreads: options.numThreads || 1, + debug: options.debug !== undefined ? options.debug : 1, + provider: options.provider || 'cpu' + }, + clustering: { + numClusters: options.numClusters || 0, // 0 means auto-detect + threshold: options.threshold || 0.8 + }, + minDurationOn: options.minDurationOn || 0.5, + minDurationOff: options.minDurationOff || 0.5 + }; + + // In a real implementation, you would create and return an instance + // of a SpeakerDiarization class + + console.warn('Speaker Diarization implementation is not fully functional yet'); + + // Placeholder for the actual implementation + return { + config: config, + + // Placeholder methods that would normally interact with the WASM module + process: function(audioSamples, sampleRate) { + console.warn('SpeakerDiarization.process is a placeholder'); + return { + segments: [] + }; + }, + + free: function() { + console.warn('SpeakerDiarization.free is a placeholder'); + } + }; + } + }; + + // For Node.js environments + if (typeof module !== 'undefined' && module.exports) { + module.exports = SherpaOnnx; + } +})(typeof window !== 'undefined' ? window : global); \ No newline at end of file diff --git a/wasm/combined/sherpa-onnx-tts.js b/wasm/combined/sherpa-onnx-tts.js new file mode 100644 index 0000000000..4b22a3a104 --- /dev/null +++ b/wasm/combined/sherpa-onnx-tts.js @@ -0,0 +1,844 @@ +/** + * sherpa-onnx-tts.js + * + * Text-to-Speech functionality for SherpaOnnx + * Requires sherpa-onnx-core.js to be loaded first + */ + +(function(global) { + // Ensure the namespace exists + if (!global.SherpaOnnx) { + console.error('SherpaOnnx namespace not found. Make sure to load sherpa-onnx-core.js first.'); + return; + } + + // Get a reference to the SherpaOnnx namespace + const SherpaOnnx = global.SherpaOnnx; + + // Create or use existing TTS namespace + SherpaOnnx.TTS = SherpaOnnx.TTS || {}; + + // Define the TTS module functionality + SherpaOnnx.TTS = { + /** + * Load a TTS model from URLs + * @param {Object} modelConfig - Configuration for the model + * @returns {Promise} - Information about the loaded model + */ + loadModel: async function(modelConfig) { + const debug = modelConfig.debug || false; + const modelDir = modelConfig.modelDir || 'tts-models'; + + if (debug) console.log(`TTS.loadModel: Starting with base dir ${modelDir}`); + + try { + // Always use clean start to avoid conflicts + if (debug) console.log(`Cleaning model directory to prevent conflicts: ${modelDir}`); + SherpaOnnx.FileSystem.removePath(modelDir, debug); + + // Flag to track if we need espeak data + let needsEspeakData = false; + + // Prepare file list based on model type + const files = []; + + if (modelConfig.type === 'vits') { + // Add model file + files.push({ + url: modelConfig.model || 'assets/tts/model.onnx', + filename: 'model.onnx' + }); + + // Add tokens file + files.push({ + url: modelConfig.tokens || 'assets/tts/tokens.txt', + filename: 'tokens.txt' + }); + + // Add lexicon if provided + if (modelConfig.lexicon) { + files.push({ + url: modelConfig.lexicon, + filename: 'lexicon.txt' + }); + } + + // Flag that we need espeak-ng-data + if (debug) console.log("Will load espeak-ng-data after model directory creation"); + needsEspeakData = true; + } else if (modelConfig.type === 'matcha') { + // Add required files for matcha + files.push({ + url: modelConfig.acousticModel || 'assets/tts/acoustic_model.onnx', + filename: 'acoustic_model.onnx' + }); + + files.push({ + url: modelConfig.vocoder || 'assets/tts/vocoder.onnx', + filename: 'vocoder.onnx' + }); + + files.push({ + url: modelConfig.tokens || 'assets/tts/tokens.txt', + filename: 'tokens.txt' + }); + + if (modelConfig.lexicon) { + files.push({ + url: modelConfig.lexicon, + filename: 'lexicon.txt' + }); + } + } else if (modelConfig.type === 'kokoro') { + // Add required files for kokoro + files.push({ + url: modelConfig.model || 'assets/tts/kokoro/model.onnx', + filename: 'kokoro_model.onnx' + }); + + files.push({ + url: modelConfig.tokens || 'assets/tts/kokoro/tokens.txt', + filename: 'tokens.txt' + }); + + if (modelConfig.voices) { + files.push({ + url: modelConfig.voices, + filename: 'voices.txt' + }); + } + } + + if (debug) console.log(`Prepared ${files.length} files to load for TTS model`); + + // Create unique model directory and load files + const result = await SherpaOnnx.FileSystem.prepareModelDirectory( + files, + modelDir, + debug + ); + + if (!result.success) { + console.error("Failed to load model files:", result); + throw new Error("Failed to load TTS model files"); + } + + // Handle espeak-ng-data for VITS models + if (modelConfig.type === 'vits' && needsEspeakData) { + if (debug) console.log(`Loading espeak-ng-data.zip into ${result.modelDir}`); + + try { + // Use configurable URL if provided, otherwise use default + const espeakZipUrl = modelConfig.espeakDataZip || 'assets/tts/espeak-ng-data.zip'; + if (debug) console.log(`Fetching espeak-ng-data from ${espeakZipUrl}`); + + const zipResponse = await fetch(espeakZipUrl); + const zipData = await zipResponse.arrayBuffer(); + + await SherpaOnnx.FileSystem.extractZip( + zipData, + result.modelDir, + debug + ); + } catch (zipError) { + console.error("Error processing espeak-ng-data.zip:", zipError); + } + } + + // Organize files by type + const modelFiles = {}; + const successFiles = result.files.filter(f => f.success); + + if (debug) console.log(`Successfully loaded ${successFiles.length} of ${result.files.length} files`); + + // Map files to their proper keys + successFiles.forEach(file => { + const filename = file.original.filename; + + if (filename === 'model.onnx') modelFiles.model = file.path; + else if (filename === 'acoustic_model.onnx') modelFiles.acousticModel = file.path; + else if (filename === 'vocoder.onnx') modelFiles.vocoder = file.path; + else if (filename === 'tokens.txt') modelFiles.tokens = file.path; + else if (filename === 'lexicon.txt') modelFiles.lexicon = file.path; + else if (filename === 'voices.txt') modelFiles.voices = file.path; + else if (filename === 'kokoro_model.onnx') modelFiles.kokoroModel = file.path; + }); + + // Return the model information + return { + modelDir: result.modelDir, + type: modelConfig.type, + files: modelFiles + }; + } catch(e) { + console.error(`TTS.loadModel: Error loading model:`, e); + throw e; + } + }, + + /** + * Create a TTS engine with the loaded model + * @param {Object} loadedModel - Model information returned by loadModel + * @param {Object} options - Additional configuration options + * @returns {OfflineTts} - An instance of OfflineTts + */ + createOfflineTts: function(loadedModel, options = {}) { + const debug = options.debug !== undefined ? options.debug : false; + + if (debug) { + console.log("Creating TTS engine with loaded model:", loadedModel); + } + + let config = null; + + if (loadedModel.type === 'vits') { + if (!loadedModel.files || !loadedModel.files.model || !loadedModel.files.tokens) { + throw new Error("Missing required files for VITS model configuration"); + } + + const offlineTtsVitsModelConfig = { + model: loadedModel.files.model, + lexicon: loadedModel.files.lexicon || '', + tokens: loadedModel.files.tokens, + dataDir: `${loadedModel.modelDir}/espeak-ng-data`, // Path to espeak-ng-data in model directory + dictDir: '', + noiseScale: options.noiseScale || 0.667, + noiseScaleW: options.noiseScaleW || 0.8, + lengthScale: options.lengthScale || 1.0, + }; + + const offlineTtsMatchaModelConfig = { + acousticModel: '', + vocoder: '', + lexicon: '', + tokens: '', + dataDir: '', + dictDir: '', + noiseScale: 0.667, + lengthScale: 1.0, + }; + + const offlineTtsKokoroModelConfig = { + model: '', + voices: '', + tokens: '', + dataDir: '', + lengthScale: 1.0, + dictDir: '', + lexicon: '', + }; + + const offlineTtsModelConfig = { + offlineTtsVitsModelConfig: offlineTtsVitsModelConfig, + offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig, + offlineTtsKokoroModelConfig: offlineTtsKokoroModelConfig, + numThreads: options.numThreads || 1, + debug: debug ? 1 : 0, + provider: 'cpu', + }; + + config = { + offlineTtsModelConfig: offlineTtsModelConfig, + ruleFsts: '', + ruleFars: '', + maxNumSentences: 1, + silenceScale: options.silenceScale || 1.0 + }; + } else if (loadedModel.type === 'matcha') { + // Similar configuration for matcha... + // (Omitted for brevity) + } else if (loadedModel.type === 'kokoro') { + // Similar configuration for kokoro... + // (Omitted for brevity) + } else { + throw new Error(`Unsupported TTS model type: ${loadedModel.type}`); + } + + if (debug) { + console.log("Final TTS configuration:", JSON.stringify(config)); + } + + try { + // Create the offline TTS object + const tts = this.createOfflineTtsInternal(config, global.Module); + + // Track the resource for cleanup if tracking function is available + if (SherpaOnnx.trackResource) { + SherpaOnnx.trackResource('tts', tts); + } + + return tts; + } catch (error) { + console.error("Error creating TTS engine:", error); + throw error; + } + }, + + /** + * Internal function to create an offline TTS engine + * Following the reference implementation pattern + */ + createOfflineTtsInternal: function(config, Module) { + if (!config) { + console.error("TTS configuration is null or undefined"); + return null; + } + + if (typeof createOfflineTts === 'function') { + // Use the global createOfflineTts function if available + return createOfflineTts(Module, config); + } + + // Otherwise use our own implementation + return new global.OfflineTts(config, Module); + } + }; + + /** + * Free configuration memory allocated in WASM + * @param {Object} config - Configuration object with allocated memory + * @param {Object} Module - WebAssembly module + * @private + */ + function freeConfig(config, Module) { + if ('buffer' in config) { + Module._free(config.buffer); + } + + if ('config' in config) { + freeConfig(config.config, Module); + } + + if ('matcha' in config) { + freeConfig(config.matcha, Module); + } + + if ('kokoro' in config) { + freeConfig(config.kokoro, Module); + } + + if (config.ptr) { + Module._free(config.ptr); + } + } + + /** + * Initialize VITS model configuration + * @param {Object} config - VITS configuration + * @param {Object} Module - WebAssembly module + * @returns {Object} - Configuration with pointers + */ + function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) { + const modelLen = Module.lengthBytesUTF8(config.model || '') + 1; + const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1; + const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; + const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1; + const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1; + + const n = modelLen + lexiconLen + tokensLen + dataDirLen + dictDirLen; + const buffer = Module._malloc(n); + + const len = 8 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module.stringToUTF8(config.model || '', buffer + offset, modelLen); + offset += modelLen; + + Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen); + offset += lexiconLen; + + Module.stringToUTF8(config.tokens || '', buffer + offset, tokensLen); + offset += tokensLen; + + Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen); + offset += dataDirLen; + + Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen); + offset += dictDirLen; + + offset = 0; + Module.setValue(ptr, buffer + offset, 'i8*'); + offset += modelLen; + + Module.setValue(ptr + 4, buffer + offset, 'i8*'); + offset += lexiconLen; + + Module.setValue(ptr + 8, buffer + offset, 'i8*'); + offset += tokensLen; + + Module.setValue(ptr + 12, buffer + offset, 'i8*'); + offset += dataDirLen; + + Module.setValue(ptr + 16, config.noiseScale || 0.667, 'float'); + Module.setValue(ptr + 20, config.noiseScaleW || 0.8, 'float'); + Module.setValue(ptr + 24, config.lengthScale || 1.0, 'float'); + Module.setValue(ptr + 28, buffer + offset, 'i8*'); + offset += dictDirLen; + + return { + buffer: buffer, ptr: ptr, len: len, + }; + } + + /** + * Initialize Matcha model configuration + * @param {Object} config - Matcha configuration + * @param {Object} Module - WebAssembly module + * @returns {Object} - Configuration with pointers + */ + function initSherpaOnnxOfflineTtsMatchaModelConfig(config, Module) { + const acousticModelLen = Module.lengthBytesUTF8(config.acousticModel || '') + 1; + const vocoderLen = Module.lengthBytesUTF8(config.vocoder || '') + 1; + const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1; + const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; + const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1; + const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1; + + const n = acousticModelLen + vocoderLen + lexiconLen + tokensLen + + dataDirLen + dictDirLen; + + const buffer = Module._malloc(n); + const len = 8 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module.stringToUTF8( + config.acousticModel || '', buffer + offset, acousticModelLen); + offset += acousticModelLen; + + Module.stringToUTF8(config.vocoder || '', buffer + offset, vocoderLen); + offset += vocoderLen; + + Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen); + offset += lexiconLen; + + Module.stringToUTF8(config.tokens || '', buffer + offset, tokensLen); + offset += tokensLen; + + Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen); + offset += dataDirLen; + + Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen); + offset += dictDirLen; + + offset = 0; + Module.setValue(ptr, buffer + offset, 'i8*'); + offset += acousticModelLen; + + Module.setValue(ptr + 4, buffer + offset, 'i8*'); + offset += vocoderLen; + + Module.setValue(ptr + 8, buffer + offset, 'i8*'); + offset += lexiconLen; + + Module.setValue(ptr + 12, buffer + offset, 'i8*'); + offset += tokensLen; + + Module.setValue(ptr + 16, buffer + offset, 'i8*'); + offset += dataDirLen; + + Module.setValue(ptr + 20, config.noiseScale || 0.667, 'float'); + Module.setValue(ptr + 24, config.lengthScale || 1.0, 'float'); + Module.setValue(ptr + 28, buffer + offset, 'i8*'); + offset += dictDirLen; + + return { + buffer: buffer, ptr: ptr, len: len, + }; + } + + /** + * Initialize Kokoro model configuration + * @param {Object} config - Kokoro configuration + * @param {Object} Module - WebAssembly module + * @returns {Object} - Configuration with pointers + */ + function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) { + const modelLen = Module.lengthBytesUTF8(config.model || '') + 1; + const voicesLen = Module.lengthBytesUTF8(config.voices || '') + 1; + const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; + const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1; + const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1; + const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1; + + const n = modelLen + voicesLen + tokensLen + dataDirLen + dictDirLen + lexiconLen; + const buffer = Module._malloc(n); + + const len = 7 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module.stringToUTF8(config.model || '', buffer + offset, modelLen); + offset += modelLen; + + Module.stringToUTF8(config.voices || '', buffer + offset, voicesLen); + offset += voicesLen; + + Module.stringToUTF8(config.tokens || '', buffer + offset, tokensLen); + offset += tokensLen; + + Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen); + offset += dataDirLen; + + Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen); + offset += dictDirLen; + + Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen); + offset += lexiconLen; + + offset = 0; + Module.setValue(ptr, buffer + offset, 'i8*'); + offset += modelLen; + + Module.setValue(ptr + 4, buffer + offset, 'i8*'); + offset += voicesLen; + + Module.setValue(ptr + 8, buffer + offset, 'i8*'); + offset += tokensLen; + + Module.setValue(ptr + 12, buffer + offset, 'i8*'); + offset += dataDirLen; + + Module.setValue(ptr + 16, config.lengthScale || 1.0, 'float'); + + Module.setValue(ptr + 20, buffer + offset, 'i8*'); + offset += dictDirLen; + + Module.setValue(ptr + 24, buffer + offset, 'i8*'); + offset += lexiconLen; + + return { + buffer: buffer, ptr: ptr, len: len, + }; + } + + /** + * Initialize offline TTS model configuration + * @param {Object} config - Model configuration + * @param {Object} Module - WebAssembly module + * @returns {Object} - Configuration with pointers + */ + function initSherpaOnnxOfflineTtsModelConfig(config, Module) { + if (!('offlineTtsVitsModelConfig' in config)) { + config.offlineTtsVitsModelConfig = { + model: './model.onnx', + lexicon: '', + tokens: './tokens.txt', + dataDir: './espeak-ng-data', // Use relative path in the model directory + dictDir: '', + noiseScale: 0.667, + noiseScaleW: 0.8, + lengthScale: 1.0, + }; + } + + if (!('offlineTtsMatchaModelConfig' in config)) { + config.offlineTtsMatchaModelConfig = { + acousticModel: '', + vocoder: '', + lexicon: '', + tokens: '', + dataDir: '', + dictDir: '', + noiseScale: 0.667, + lengthScale: 1.0, + }; + } + + if (!('offlineTtsKokoroModelConfig' in config)) { + config.offlineTtsKokoroModelConfig = { + model: '', + voices: '', + tokens: '', + lengthScale: 1.0, + dataDir: '', + dictDir: '', + lexicon: '', + }; + } + + const vitsModelConfig = initSherpaOnnxOfflineTtsVitsModelConfig( + config.offlineTtsVitsModelConfig, Module); + + const matchaModelConfig = initSherpaOnnxOfflineTtsMatchaModelConfig( + config.offlineTtsMatchaModelConfig, Module); + + const kokoroModelConfig = initSherpaOnnxOfflineTtsKokoroModelConfig( + config.offlineTtsKokoroModelConfig, Module); + + const len = vitsModelConfig.len + matchaModelConfig.len + + kokoroModelConfig.len + 3 * 4; + + const ptr = Module._malloc(len); + + let offset = 0; + Module._CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset); + offset += vitsModelConfig.len; + + Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.debug || 0, 'i32'); + offset += 4; + + const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; + const buffer = Module._malloc(providerLen); + Module.stringToUTF8(config.provider || 'cpu', buffer, providerLen); + Module.setValue(ptr + offset, buffer, 'i8*'); + offset += 4; + + Module._CopyHeap(matchaModelConfig.ptr, matchaModelConfig.len, ptr + offset); + offset += matchaModelConfig.len; + + Module._CopyHeap(kokoroModelConfig.ptr, kokoroModelConfig.len, ptr + offset); + offset += kokoroModelConfig.len; + + return { + buffer: buffer, ptr: ptr, len: len, config: vitsModelConfig, + matcha: matchaModelConfig, kokoro: kokoroModelConfig, + }; + } + + /** + * Initialize the TTS configuration + * @param {Object} config - TTS configuration + * @param {Object} Module - WebAssembly module + * @returns {Object} - Configuration with pointers + */ + function initSherpaOnnxOfflineTtsConfig(config, Module) { + const modelConfig = + initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig, Module); + const len = modelConfig.len + 4 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset); + offset += modelConfig.len; + + const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1; + const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1; + + const buffer = Module._malloc(ruleFstsLen + ruleFarsLen); + Module.stringToUTF8(config.ruleFsts || '', buffer, ruleFstsLen); + Module.stringToUTF8(config.ruleFars || '', buffer + ruleFstsLen, ruleFarsLen); + + Module.setValue(ptr + offset, buffer, 'i8*'); + offset += 4; + + Module.setValue(ptr + offset, config.maxNumSentences || 1, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, buffer + ruleFstsLen, 'i8*'); + offset += 4; + + Module.setValue(ptr + offset, config.silenceScale || 1.0, 'float'); + offset += 4; + + return { + buffer: buffer, ptr: ptr, len: len, config: modelConfig, + }; + } + + /** + * OfflineTts class for text-to-speech synthesis + */ + global.OfflineTts = global.OfflineTts || function(configObj, Module) { + if (Module.debug) { + console.log("Creating OfflineTts with config:", JSON.stringify(configObj)); + } + + const config = initSherpaOnnxOfflineTtsConfig(configObj, Module); + + if (Module.debug) { + try { + Module._MyPrintTTS(config.ptr); + } catch (e) { + console.warn("Failed to print TTS config:", e); + } + } + + const handle = Module._SherpaOnnxCreateOfflineTts(config.ptr); + + if (!handle) { + const error = new Error("Failed to create TTS engine - null handle returned"); + freeConfig(config, Module); + throw error; + } + + freeConfig(config, Module); + + this.handle = handle; + this.sampleRate = Module._SherpaOnnxOfflineTtsSampleRate(this.handle); + this.numSpeakers = Module._SherpaOnnxOfflineTtsNumSpeakers(this.handle); + this.Module = Module; + this.generatedAudios = []; // Track generated audios for cleanup + + /** + * Generate speech from text + * @param {string} text - Text to synthesize + * @param {number} sid - Speaker ID (0 to numSpeakers-1) + * @param {number} speed - Speed factor (1.0 is normal speed) + * @returns {Object} - Object containing audio samples and sample rate + */ + this.generate = function(text, sid = 0, speed = 1.0) { + const textLen = this.Module.lengthBytesUTF8(text) + 1; + const textPtr = this.Module._malloc(textLen); + this.Module.stringToUTF8(text, textPtr, textLen); + + const h = this.Module._SherpaOnnxOfflineTtsGenerate( + this.handle, textPtr, sid, speed); + + this.Module._free(textPtr); + + if (!h) { + throw new Error("Failed to generate speech - null pointer returned"); + } + + const numSamples = this.Module.HEAP32[h / 4 + 1]; + const sampleRate = this.Module.HEAP32[h / 4 + 2]; + + const samplesPtr = this.Module.HEAP32[h / 4] / 4; + const samples = new Float32Array(numSamples); + for (let i = 0; i < numSamples; i++) { + samples[i] = this.Module.HEAPF32[samplesPtr + i]; + } + + // Add to our tracking list + this.generatedAudios.push(h); + + return { + samples: samples, + sampleRate: sampleRate, + // Add a cleanup function for this specific audio + free: () => { + const index = this.generatedAudios.indexOf(h); + if (index !== -1) { + this.Module._SherpaOnnxDestroyOfflineTtsGeneratedAudio(h); + this.generatedAudios.splice(index, 1); + } + } + }; + }; + + /** + * Save generated audio to a WAV file (for browser environments) + * @param {Float32Array} samples - Audio samples + * @param {number} sampleRate - Sample rate + * @returns {Blob} - WAV file as Blob + */ + this.saveAsWav = function(samples, sampleRate) { + // Create WAV file in memory + const numSamples = samples.length; + const dataSize = numSamples * 2; // 16-bit samples + const bufferSize = 44 + dataSize; + + const buffer = new ArrayBuffer(bufferSize); + const view = new DataView(buffer); + + // WAV header (http://soundfile.sapp.org/doc/WaveFormat/) + view.setUint32(0, 0x46464952, true); // 'RIFF' + view.setUint32(4, bufferSize - 8, true); // chunk size + view.setUint32(8, 0x45564157, true); // 'WAVE' + view.setUint32(12, 0x20746d66, true); // 'fmt ' + view.setUint32(16, 16, true); // subchunk1 size + view.setUint16(20, 1, true); // PCM format + view.setUint16(22, 1, true); // mono + view.setUint32(24, sampleRate, true); // sample rate + view.setUint32(28, sampleRate * 2, true); // byte rate + view.setUint16(32, 2, true); // block align + view.setUint16(34, 16, true); // bits per sample + view.setUint32(36, 0x61746164, true); // 'data' + view.setUint32(40, dataSize, true); // subchunk2 size + + // Write audio data + for (let i = 0; i < numSamples; i++) { + // Convert float to 16-bit PCM + let sample = samples[i]; + if (sample > 1.0) sample = 1.0; + if (sample < -1.0) sample = -1.0; + + const pcm = Math.floor(sample * 32767); + view.setInt16(44 + i * 2, pcm, true); + } + + return new Blob([buffer], { type: 'audio/wav' }); + }; + + /** + * Free the TTS engine and all generated audios + */ + this.free = function() { + // Free all generated audios first + for (let i = this.generatedAudios.length - 1; i >= 0; i--) { + if (this.generatedAudios[i]) { + this.Module._SherpaOnnxDestroyOfflineTtsGeneratedAudio(this.generatedAudios[i]); + } + } + this.generatedAudios = []; + + // Free the TTS engine + if (this.handle) { + this.Module._SherpaOnnxDestroyOfflineTts(this.handle); + this.handle = 0; + } + }; + }; + + // For Node.js environments + if (typeof module !== 'undefined' && module.exports) { + module.exports = SherpaOnnx; + } +})(typeof window !== 'undefined' ? window : global); + +/** + * Global helper function to create an OfflineTts instance + */ +function createOfflineTts(Module, config) { + // Use provided config or create default + if (config) return new OfflineTts(config, Module); + + // Default configuration pointing to extracted espeak-ng-data + const defaultConfig = { + offlineTtsModelConfig: { + offlineTtsVitsModelConfig: { + model: './model.onnx', + lexicon: '', + tokens: './tokens.txt', + dataDir: './espeak-ng-data', // Use relative path in the model directory + dictDir: '', + noiseScale: 0.667, + noiseScaleW: 0.8, + lengthScale: 1.0, + }, + offlineTtsMatchaModelConfig: { + acousticModel: '', + vocoder: '', + lexicon: '', + tokens: '', + dataDir: '', + dictDir: '', + noiseScale: 0.667, + lengthScale: 1.0, + }, + offlineTtsKokoroModelConfig: { + model: '', + voices: '', + tokens: '', + dataDir: '', + lengthScale: 1.0, + dictDir: '', + lexicon: '', + }, + numThreads: 1, + debug: 1, + provider: 'cpu', + }, + ruleFsts: '', + ruleFars: '', + maxNumSentences: 1, + silenceScale: 1.0 + }; + + return new OfflineTts(defaultConfig, Module); +} \ No newline at end of file diff --git a/wasm/combined/sherpa-onnx-vad.js b/wasm/combined/sherpa-onnx-vad.js new file mode 100644 index 0000000000..05bb4e8c9b --- /dev/null +++ b/wasm/combined/sherpa-onnx-vad.js @@ -0,0 +1,297 @@ +/** + * sherpa-onnx-vad.js + * + * Voice Activity Detection functionality for SherpaOnnx + * Requires sherpa-onnx-core.js to be loaded first + */ + +(function(global) { + // Ensure the namespace exists + if (!global.SherpaOnnx) { + console.error('SherpaOnnx namespace not found. Make sure to load sherpa-onnx-core.js first.'); + return; + } + + // Get a reference to the SherpaOnnx namespace + const SherpaOnnx = global.SherpaOnnx; + + // Internal class for voice activity detection + class VoiceActivityDetector { + constructor(handle, Module) { + this.handle = handle; + this.Module = Module; + } + + /** + * Accept audio waveform data + * @param {Float32Array} samples - Audio samples in [-1, 1] range + */ + acceptWaveform(samples) { + const pointer = this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); + this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT); + + this.Module._SherpaOnnxVoiceActivityDetectorAcceptWaveform( + this.handle, pointer, samples.length + ); + + this.Module._free(pointer); + } + + /** + * Check if there are no speech segments available + * @returns {boolean} - True if no segments available, false otherwise + */ + isEmpty() { + return this.Module._SherpaOnnxVoiceActivityDetectorEmpty(this.handle) === 1; + } + + /** + * Check if voice is detected + * @returns {boolean} - True if voice detected, false otherwise + */ + detected() { + return this.Module._SherpaOnnxVoiceActivityDetectorDetected(this.handle) === 1; + } + + /** + * Reset the detector + */ + reset() { + this.Module._SherpaOnnxVoiceActivityDetectorReset(this.handle); + } + + /** + * Free the detector + */ + free() { + if (this.handle) { + this.Module._SherpaOnnxDestroyVoiceActivityDetector(this.handle); + this.handle = 0; + } + } + } + + // Define the VAD module functionality + SherpaOnnx.VAD = { + /** + * Load a VAD model from URL + * @param {Object} modelConfig - Configuration for the model + * @returns {Promise} - Information about the loaded model + */ + loadModel: async function(modelConfig) { + const debug = modelConfig.debug !== false; + + if (debug) console.log("VAD.loadModel: ModelConfig received:", JSON.stringify(modelConfig)); + + // Use configurable model directory with default + const modelDir = modelConfig.modelDir || 'vad-models'; + const fileName = modelConfig.fileName || 'silero_vad.onnx'; + const destPath = `${modelDir}/${fileName}`; + + if (debug) console.log(`VAD.loadModel: Using model directory: ${modelDir}`); + if (debug) console.log(`VAD.loadModel: Target file path: ${destPath}`); + + try { + // Clean up existing path if needed for fresh start + if (modelConfig.cleanStart) { + if (debug) console.log(`VAD.loadModel: Clean start requested, removing existing paths`); + SherpaOnnx.FileSystem.removePath(modelDir, debug); + } + + // Load the model using the safe loader + if (debug) console.log(`VAD.loadModel: Loading model from ${modelConfig.model || 'assets/vad/silero_vad.onnx'} to ${destPath}`); + + const loadResult = await SherpaOnnx.FileSystem.safeLoadFile( + modelConfig.model || 'assets/vad/silero_vad.onnx', + destPath, + debug + ); + + if (!loadResult || (typeof loadResult === 'object' && !loadResult.success)) { + throw new Error(`Failed to load model from ${modelConfig.model || 'assets/vad/silero_vad.onnx'} to ${destPath}`); + } + + // Update the path if it was changed + const actualPath = (typeof loadResult === 'object' && loadResult.path) ? loadResult.path : destPath; + if (actualPath !== destPath) { + if (debug) console.log(`VAD.loadModel: Note - Model loaded to alternate path: ${actualPath}`); + } + + if (debug) console.log(`VAD.loadModel: Model loaded successfully`); + + // Return model information with the actual path used + return { + modelDir, + fileName, + modelPath: actualPath + }; + } catch (error) { + console.error(`VAD.loadModel: Error loading model:`, error); + throw error; + } + }, + + /** + * Create a Voice Activity Detector with the loaded model + * @param {Object} loadedModel - Model information returned by loadModel + * @param {Object} options - Additional configuration options + * @returns {VoiceActivityDetector} - A VAD instance + */ + createVoiceActivityDetector: function(loadedModel, options = {}) { + const debug = options.debug !== false; + + try { + // Get the model path from loaded model info + const modelPath = loadedModel.modelPath || `${loadedModel.modelDir}/${loadedModel.fileName || 'silero_vad.onnx'}`; + + if (debug) console.log(`VAD.createVoiceActivityDetector: Using model at ${modelPath}`); + + // Verify model file exists before proceeding + if (!SherpaOnnx.FileSystem.fileExists(modelPath)) { + throw new Error(`Model file not found at ${modelPath}`); + } + + // Initialize the silero VAD config + const sileroVadConfig = this._initSileroVadConfig({ + model: modelPath, + threshold: options.threshold || 0.5, + minSilenceDuration: options.minSilenceDuration || 0.3, + minSpeechDuration: options.minSpeechDuration || 0.1, + windowSize: options.windowSize || 512, + maxSpeechDuration: options.maxSpeechDuration || 30.0, + }, global.Module); + + // Initialize the full VAD config + const vadConfig = this._initVadModelConfig({ + sileroVad: { + model: modelPath, + threshold: options.threshold || 0.5, + minSilenceDuration: options.minSilenceDuration || 0.3, + minSpeechDuration: options.minSpeechDuration || 0.1, + windowSize: options.windowSize || 512, + maxSpeechDuration: options.maxSpeechDuration || 30.0, + }, + sampleRate: options.sampleRate || 16000, + numThreads: options.numThreads || 1, + provider: options.provider || 'cpu', + debug: debug ? 1 : 0, + }, global.Module); + + // Debug print the config if requested + if (debug) { + try { + global.Module._MyPrintVAD(vadConfig.ptr); + } catch (printErr) { + console.warn("Could not print VAD config:", printErr); + } + } + + // Create the detector + if (debug) console.log("VAD.createVoiceActivityDetector: Creating detector"); + const vadPtr = global.Module.ccall( + 'SherpaOnnxCreateVoiceActivityDetector', + 'number', + ['number', 'number'], + [vadConfig.ptr, options.bufferSizeInSeconds || 5.0] + ); + + if (!vadPtr) { + throw new Error("Failed to create voice activity detector"); + } + + if (debug) console.log("VAD.createVoiceActivityDetector: Detector created successfully"); + + // Free configuration memory + SherpaOnnx.Utils.freeConfig(vadConfig, global.Module); + + return new VoiceActivityDetector(vadPtr, global.Module); + } catch (error) { + console.error("Error creating VAD detector:", error); + throw error; + } + }, + + /** + * Initialize SileroVad configuration in WASM + * @param {Object} config - SileroVad configuration + * @param {Object} Module - WebAssembly module + * @returns {Object} Configuration with WASM pointers + * @private + */ + _initSileroVadConfig: function(config, Module) { + const modelString = SherpaOnnx.Utils.allocateString(config.model, Module); + + const len = 6 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module.setValue(ptr, modelString.ptr, 'i8*'); + offset += 4; + + Module.setValue(ptr + offset, config.threshold || 0.5, 'float'); + offset += 4; + + Module.setValue(ptr + offset, config.minSilenceDuration || 0.3, 'float'); + offset += 4; + + Module.setValue(ptr + offset, config.minSpeechDuration || 0.1, 'float'); + offset += 4; + + Module.setValue(ptr + offset, config.windowSize || 512, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.maxSpeechDuration || 30.0, 'float'); + offset += 4; + + return { + buffer: modelString.ptr, + ptr: ptr, + len: len + }; + }, + + /** + * Initialize VAD model configuration in WASM + * @param {Object} config - VAD configuration + * @param {Object} Module - WebAssembly module + * @returns {Object} Configuration with WASM pointers + * @private + */ + _initVadModelConfig: function(config, Module) { + if (!('sileroVad' in config)) { + throw new Error("Missing sileroVad configuration"); + } + + const sileroVad = this._initSileroVadConfig(config.sileroVad, Module); + + const providerString = SherpaOnnx.Utils.allocateString(config.provider || 'cpu', Module); + + const len = sileroVad.len + 4 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module._CopyHeap(sileroVad.ptr, sileroVad.len, ptr + offset); + offset += sileroVad.len; + + Module.setValue(ptr + offset, config.sampleRate || 16000, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, providerString.ptr, 'i8*'); // provider + offset += 4; + + Module.setValue(ptr + offset, config.debug !== undefined ? config.debug : 1, 'i32'); + offset += 4; + + return { + buffer: providerString.ptr, + ptr: ptr, + len: len, + sileroVad: sileroVad + }; + } + }; + +})(typeof window !== 'undefined' ? window : global); \ No newline at end of file diff --git a/wasm/combined/sherpa-onnx-wasm-combined.cc b/wasm/combined/sherpa-onnx-wasm-combined.cc new file mode 100644 index 0000000000..7ba5da23ea --- /dev/null +++ b/wasm/combined/sherpa-onnx-wasm-combined.cc @@ -0,0 +1,292 @@ +// wasm/combined/sherpa-onnx-wasm-combined.cc +// +// Copyright (c) 2024 Xiaomi Corporation + +#include +#include +#include + +#include "sherpa-onnx/c-api/c-api.h" + +// This is a combined implementation that provides all the necessary C functions +// for the WASM module, incorporating debug printing for all supported features. + +extern "C" { + +// ============================================================================ +// Verify memory layouts with static assertions +// ============================================================================ + +// ASR memory layout verification +static_assert(sizeof(SherpaOnnxOnlineTransducerModelConfig) == 3 * 4, ""); +static_assert(sizeof(SherpaOnnxOnlineParaformerModelConfig) == 2 * 4, ""); +static_assert(sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) == 1 * 4, ""); +static_assert(sizeof(SherpaOnnxOnlineModelConfig) == + sizeof(SherpaOnnxOnlineTransducerModelConfig) + + sizeof(SherpaOnnxOnlineParaformerModelConfig) + + sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 9 * 4, + ""); +static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); +static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, ""); +static_assert(sizeof(SherpaOnnxOnlineRecognizerConfig) == + sizeof(SherpaOnnxFeatureConfig) + + sizeof(SherpaOnnxOnlineModelConfig) + 8 * 4 + + sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) + 5 * 4, + ""); + +// VAD memory layout verification +static_assert(sizeof(SherpaOnnxSileroVadModelConfig) == 6 * 4, ""); +static_assert(sizeof(SherpaOnnxVadModelConfig) == + sizeof(SherpaOnnxSileroVadModelConfig) + 4 * 4, + ""); + +// TTS memory layout verification +static_assert(sizeof(SherpaOnnxOfflineTtsVitsModelConfig) == 8 * 4, ""); +static_assert(sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) == 8 * 4, ""); +static_assert(sizeof(SherpaOnnxOfflineTtsKokoroModelConfig) == 7 * 4, ""); +static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) == + sizeof(SherpaOnnxOfflineTtsVitsModelConfig) + + sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) + + sizeof(SherpaOnnxOfflineTtsKokoroModelConfig) + 3 * 4, + ""); +static_assert(sizeof(SherpaOnnxOfflineTtsConfig) == + sizeof(SherpaOnnxOfflineTtsModelConfig) + 4 * 4, + ""); + +// Speaker Diarization memory layout verification +static_assert(sizeof(SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig) == + 1 * 4, + ""); +static_assert( + sizeof(SherpaOnnxOfflineSpeakerSegmentationModelConfig) == + sizeof(SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig) + 3 * 4, + ""); +static_assert(sizeof(SherpaOnnxFastClusteringConfig) == 2 * 4, ""); +static_assert(sizeof(SherpaOnnxSpeakerEmbeddingExtractorConfig) == 4 * 4, ""); +static_assert(sizeof(SherpaOnnxOfflineSpeakerDiarizationConfig) == + sizeof(SherpaOnnxOfflineSpeakerSegmentationModelConfig) + + sizeof(SherpaOnnxSpeakerEmbeddingExtractorConfig) + + sizeof(SherpaOnnxFastClusteringConfig) + 2 * 4, + ""); + +// Speech Enhancement memory layout verification +static_assert(sizeof(SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig) == 1 * 4, + ""); +static_assert(sizeof(SherpaOnnxOfflineSpeechDenoiserModelConfig) == + sizeof(SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig) + + 3 * 4, + ""); +static_assert(sizeof(SherpaOnnxOfflineSpeechDenoiserConfig) == + sizeof(SherpaOnnxOfflineSpeechDenoiserModelConfig), + ""); + +// Keyword Spotting memory layout verification +static_assert(sizeof(SherpaOnnxKeywordSpotterConfig) == + sizeof(SherpaOnnxFeatureConfig) + + sizeof(SherpaOnnxOnlineModelConfig) + 7 * 4, + ""); + +// ============================================================================ +// Debug printing functions for all model types +// ============================================================================ + +// Helper function to copy between heap locations +void CopyHeap(const char *src, int32_t num_bytes, char *dst) { + std::copy(src, src + num_bytes, dst); +} + +// Debug printing for Online ASR configuration +void MyPrintOnlineASR(SherpaOnnxOnlineRecognizerConfig *config) { + auto model_config = &config->model_config; + auto feat = &config->feat_config; + auto transducer_model_config = &model_config->transducer; + auto paraformer_model_config = &model_config->paraformer; + auto ctc_model_config = &model_config->zipformer2_ctc; + + fprintf(stdout, "----------Online ASR Configuration----------\n"); + fprintf(stdout, "----------online transducer model config----------\n"); + fprintf(stdout, "encoder: %s\n", transducer_model_config->encoder); + fprintf(stdout, "decoder: %s\n", transducer_model_config->decoder); + fprintf(stdout, "joiner: %s\n", transducer_model_config->joiner); + + fprintf(stdout, "----------online parformer model config----------\n"); + fprintf(stdout, "encoder: %s\n", paraformer_model_config->encoder); + fprintf(stdout, "decoder: %s\n", paraformer_model_config->decoder); + + fprintf(stdout, "----------online ctc model config----------\n"); + fprintf(stdout, "model: %s\n", ctc_model_config->model); + fprintf(stdout, "tokens: %s\n", model_config->tokens); + fprintf(stdout, "num_threads: %d\n", model_config->num_threads); + fprintf(stdout, "provider: %s\n", model_config->provider); + fprintf(stdout, "debug: %d\n", model_config->debug); + fprintf(stdout, "model type: %s\n", model_config->model_type); + fprintf(stdout, "modeling unit: %s\n", model_config->modeling_unit); + fprintf(stdout, "bpe vocab: %s\n", model_config->bpe_vocab); + fprintf(stdout, "tokens_buf: %s\n", + model_config->tokens_buf ? model_config->tokens_buf : ""); + fprintf(stdout, "tokens_buf_size: %d\n", model_config->tokens_buf_size); + + fprintf(stdout, "----------feat config----------\n"); + fprintf(stdout, "sample rate: %d\n", feat->sample_rate); + fprintf(stdout, "feat dim: %d\n", feat->feature_dim); + + fprintf(stdout, "----------recognizer config----------\n"); + fprintf(stdout, "decoding method: %s\n", config->decoding_method); + fprintf(stdout, "max active paths: %d\n", config->max_active_paths); + fprintf(stdout, "enable_endpoint: %d\n", config->enable_endpoint); + fprintf(stdout, "rule1_min_trailing_silence: %.2f\n", + config->rule1_min_trailing_silence); + fprintf(stdout, "rule2_min_trailing_silence: %.2f\n", + config->rule2_min_trailing_silence); + fprintf(stdout, "rule3_min_utterance_length: %.2f\n", + config->rule3_min_utterance_length); + fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file); + fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score); + fprintf(stdout, "rule_fsts: %s\n", config->rule_fsts); + fprintf(stdout, "rule_fars: %s\n", config->rule_fars); + fprintf(stdout, "blank_penalty: %f\n", config->blank_penalty); + + fprintf(stdout, "----------ctc fst decoder config----------\n"); + fprintf(stdout, "graph: %s\n", config->ctc_fst_decoder_config.graph); + fprintf(stdout, "max_active: %d\n", + config->ctc_fst_decoder_config.max_active); +} + +// Debug printing for VAD configuration +void MyPrintVAD(SherpaOnnxVadModelConfig *config) { + auto silero_vad = &config->silero_vad; + + fprintf(stdout, "----------Voice Activity Detection Configuration----------\n"); + fprintf(stdout, "----------silero_vad config----------\n"); + fprintf(stdout, "model: %s\n", silero_vad->model); + fprintf(stdout, "threshold: %.3f\n", silero_vad->threshold); + fprintf(stdout, "min_silence_duration: %.3f\n", + silero_vad->min_silence_duration); + fprintf(stdout, "min_speech_duration: %.3f\n", + silero_vad->min_speech_duration); + fprintf(stdout, "window_size: %d\n", silero_vad->window_size); + fprintf(stdout, "max_speech_duration: %.3f\n", + silero_vad->max_speech_duration); + + fprintf(stdout, "----------config----------\n"); + fprintf(stdout, "sample_rate: %d\n", config->sample_rate); + fprintf(stdout, "num_threads: %d\n", config->num_threads); + fprintf(stdout, "provider: %s\n", config->provider); + fprintf(stdout, "debug: %d\n", config->debug); +} + +// Debug printing for TTS configuration +void MyPrintTTS(SherpaOnnxOfflineTtsConfig *tts_config) { + auto tts_model_config = &tts_config->model; + auto vits_model_config = &tts_model_config->vits; + auto matcha_model_config = &tts_model_config->matcha; + auto kokoro = &tts_model_config->kokoro; + + fprintf(stdout, "----------Text-to-Speech Configuration----------\n"); + fprintf(stdout, "----------vits model config----------\n"); + fprintf(stdout, "model: %s\n", vits_model_config->model); + fprintf(stdout, "lexicon: %s\n", vits_model_config->lexicon); + fprintf(stdout, "tokens: %s\n", vits_model_config->tokens); + fprintf(stdout, "data_dir: %s\n", vits_model_config->data_dir); + fprintf(stdout, "noise scale: %.3f\n", vits_model_config->noise_scale); + fprintf(stdout, "noise scale w: %.3f\n", vits_model_config->noise_scale_w); + fprintf(stdout, "length scale: %.3f\n", vits_model_config->length_scale); + fprintf(stdout, "dict_dir: %s\n", vits_model_config->dict_dir); + + fprintf(stdout, "----------matcha model config----------\n"); + fprintf(stdout, "acoustic_model: %s\n", matcha_model_config->acoustic_model); + fprintf(stdout, "vocoder: %s\n", matcha_model_config->vocoder); + fprintf(stdout, "lexicon: %s\n", matcha_model_config->lexicon); + fprintf(stdout, "tokens: %s\n", matcha_model_config->tokens); + fprintf(stdout, "data_dir: %s\n", matcha_model_config->data_dir); + fprintf(stdout, "noise scale: %.3f\n", matcha_model_config->noise_scale); + fprintf(stdout, "length scale: %.3f\n", matcha_model_config->length_scale); + fprintf(stdout, "dict_dir: %s\n", matcha_model_config->dict_dir); + + fprintf(stdout, "----------kokoro model config----------\n"); + fprintf(stdout, "model: %s\n", kokoro->model); + fprintf(stdout, "voices: %s\n", kokoro->voices); + fprintf(stdout, "tokens: %s\n", kokoro->tokens); + fprintf(stdout, "data_dir: %s\n", kokoro->data_dir); + fprintf(stdout, "length scale: %.3f\n", kokoro->length_scale); + fprintf(stdout, "dict_dir: %s\n", kokoro->dict_dir); + fprintf(stdout, "lexicon: %s\n", kokoro->lexicon); + + fprintf(stdout, "----------tts model config----------\n"); + fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads); + fprintf(stdout, "debug: %d\n", tts_model_config->debug); + fprintf(stdout, "provider: %s\n", tts_model_config->provider); + + fprintf(stdout, "----------tts config----------\n"); + fprintf(stdout, "rule_fsts: %s\n", tts_config->rule_fsts); + fprintf(stdout, "rule_fars: %s\n", tts_config->rule_fars); + fprintf(stdout, "max num sentences: %d\n", tts_config->max_num_sentences); + fprintf(stdout, "silence scale: %.3f\n", tts_config->silence_scale); +} + +// Debug printing for Speaker Diarization configuration +void MyPrintSpeakerDiarization(const SherpaOnnxOfflineSpeakerDiarizationConfig *sd_config) { + const auto &segmentation = sd_config->segmentation; + const auto &embedding = sd_config->embedding; + const auto &clustering = sd_config->clustering; + + fprintf(stdout, "----------Speaker Diarization Configuration----------\n"); + fprintf(stdout, "----------segmentation config----------\n"); + fprintf(stdout, "pyannote model: %s\n", segmentation.pyannote.model); + fprintf(stdout, "num threads: %d\n", segmentation.num_threads); + fprintf(stdout, "debug: %d\n", segmentation.debug); + fprintf(stdout, "provider: %s\n", segmentation.provider); + + fprintf(stdout, "----------embedding config----------\n"); + fprintf(stdout, "model: %s\n", embedding.model); + fprintf(stdout, "num threads: %d\n", embedding.num_threads); + fprintf(stdout, "debug: %d\n", embedding.debug); + fprintf(stdout, "provider: %s\n", embedding.provider); + + fprintf(stdout, "----------clustering config----------\n"); + fprintf(stdout, "num_clusters: %d\n", clustering.num_clusters); + fprintf(stdout, "threshold: %.3f\n", clustering.threshold); + + fprintf(stdout, "min_duration_on: %.3f\n", sd_config->min_duration_on); + fprintf(stdout, "min_duration_off: %.3f\n", sd_config->min_duration_off); +} + +// Debug printing for Speech Enhancement configuration +void MyPrintSpeechEnhancement(SherpaOnnxOfflineSpeechDenoiserConfig *config) { + auto model = &config->model; + auto gtcrn = &model->gtcrn; + + fprintf(stdout, "----------Speech Enhancement Configuration----------\n"); + fprintf(stdout, "----------offline speech denoiser model config----------\n"); + fprintf(stdout, "gtcrn: %s\n", gtcrn->model); + fprintf(stdout, "num threads: %d\n", model->num_threads); + fprintf(stdout, "debug: %d\n", model->debug); + fprintf(stdout, "provider: %s\n", model->provider); +} + +// Debug printing for Keyword Spotting configuration +void MyPrintKeywordSpotting(SherpaOnnxKeywordSpotterConfig *config) { + auto feat = &config->feat_config; + auto model = &config->model_config; + auto transducer = &model->transducer; + + fprintf(stdout, "----------Keyword Spotting Configuration----------\n"); + fprintf(stdout, "model_config.transducer.encoder: %s\n", transducer->encoder); + fprintf(stdout, "model_config.transducer.decoder: %s\n", transducer->decoder); + fprintf(stdout, "model_config.transducer.joiner: %s\n", transducer->joiner); + fprintf(stdout, "model_config.tokens: %s\n", model->tokens); + fprintf(stdout, "model_config.num_threads: %d\n", model->num_threads); + fprintf(stdout, "model_config.provider: %s\n", model->provider); + fprintf(stdout, "model_config.debug: %d\n", model->debug); + + fprintf(stdout, "feat_config.sample_rate: %d\n", feat->sample_rate); + fprintf(stdout, "feat_config.feature_dim: %d\n", feat->feature_dim); + + fprintf(stdout, "max_active_paths: %d\n", config->max_active_paths); + fprintf(stdout, "num_trailing_blanks: %d\n", config->num_trailing_blanks); + fprintf(stdout, "keywords_score: %.3f\n", config->keywords_score); + fprintf(stdout, "keywords_threshold: %.3f\n", config->keywords_threshold); + fprintf(stdout, "keywords_file: %s\n", config->keywords_file ? config->keywords_file : ""); +} + +} // extern "C" \ No newline at end of file From ddee01f8e32b0db00f12dbf2a4557ec28bc7e282 Mon Sep 17 00:00:00 2001 From: abretonc7s Date: Mon, 7 Apr 2025 17:51:49 +0800 Subject: [PATCH 2/9] Update README to reflect new WebAssembly build process and output locations --- wasm/combined/README.md | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/wasm/combined/README.md b/wasm/combined/README.md index b4f5ee54d7..3f0d15f678 100644 --- a/wasm/combined/README.md +++ b/wasm/combined/README.md @@ -83,17 +83,14 @@ The included `index.html` demonstrates how to use the combined module. It shows: ## Building the Module -The WebAssembly module is built using Emscripten. To rebuild it: +The WebAssembly module can be built using the provided build script: ```bash cd /path/to/sherpa-onnx -mkdir -p build-wasm-combined -cd build-wasm-combined -emcmake cmake -DCMAKE_BUILD_TYPE=Release -DSHERPA_ONNX_ENABLE_WASM=ON -DSHERPA_ONNX_ENABLE_CHECK=OFF .. -make -j$(nproc) +./build-wasm-combined.sh ``` -The built files will be located in `build-wasm-combined/wasm/combined/`. +The built files will be located in `bin/wasm/combined/` and are also copied to `wasm/combined/`. ## Setting Up Models From e0a9eb72b31136be2fc3db404e14916df68785f7 Mon Sep 17 00:00:00 2001 From: abretonc7s Date: Tue, 8 Apr 2025 13:48:47 +0800 Subject: [PATCH 3/9] Implement build script for WebAssembly combined output and update .gitignore to include build artifacts --- .gitignore | 7 +++++++ build-wasm-combined.sh | 47 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100755 build-wasm-combined.sh diff --git a/.gitignore b/.gitignore index f84da661b2..782d315709 100644 --- a/.gitignore +++ b/.gitignore @@ -137,6 +137,13 @@ sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16 cmake-build-debug README-DEV.txt +# WASM combined build artifacts +wasm/combined/*.wasm +wasm/combined/sherpa-onnx-wasm-combined.js +build-wasm-combined/ +# Don't ignore the build script +!build-wasm-combined.sh + ##clion .idea scripts/dotnet/examples/obj/Debug/net8.0/Common.AssemblyInfo.cs diff --git a/build-wasm-combined.sh b/build-wasm-combined.sh new file mode 100755 index 0000000000..4f23f0cd68 --- /dev/null +++ b/build-wasm-combined.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# +# Copyright (c) 2024 Xiaomi Corporation + +# Exit on error and print commands +set -ex + +echo "=== Starting build process for sherpa-onnx WASM combined ===" + +# Set environment flag to indicate we're using this script +export SHERPA_ONNX_IS_USING_BUILD_WASM_SH=1 + +# Create build directory +mkdir -p build-wasm-combined +cd build-wasm-combined + +echo "=== Running CMake configuration ===" +# Configure with CMake +emcmake cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DSHERPA_ONNX_ENABLE_WASM=ON \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_BINARY=OFF \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=OFF \ + -DSHERPA_ONNX_ENABLE_C_API=ON \ + -DSHERPA_ONNX_ENABLE_TEST=OFF \ + -DSHERPA_ONNX_ENABLE_WASM_COMBINED=ON \ + -DSHERPA_ONNX_INSTALL_TO_REPO=ON \ + .. + +echo "=== Building the target ===" +# Build the target with full path to the target +emmake make -j $(nproc) sherpa-onnx-wasm-combined + +echo "=== Installing the files ===" +# Install the files +emmake make install/strip + +if [ $? -eq 0 ]; then + echo "=== Build completed successfully! ===" + echo "Files have been installed to bin/wasm/combined and copied to wasm/combined/" +else + echo "=== Build failed! Check the error messages above ===" + exit 1 +fi \ No newline at end of file From 3b8fef17034bdfb2d0144396184120ea8605cd0a Mon Sep 17 00:00:00 2001 From: abretonc7s Date: Tue, 8 Apr 2025 13:59:53 +0800 Subject: [PATCH 4/9] Add .gitkeep file to ensure assets directory is tracked --- wasm/combined/assets/.gitkeep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 wasm/combined/assets/.gitkeep diff --git a/wasm/combined/assets/.gitkeep b/wasm/combined/assets/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 From 25c6a2c60fafb02ac1d8409156dbd1376c62934f Mon Sep 17 00:00:00 2001 From: abretonc7s Date: Wed, 9 Apr 2025 14:53:58 +0800 Subject: [PATCH 5/9] Update README to include instructions for testing demos using Python's built-in HTTP server --- wasm/combined/README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/wasm/combined/README.md b/wasm/combined/README.md index 3f0d15f678..27f7071910 100644 --- a/wasm/combined/README.md +++ b/wasm/combined/README.md @@ -103,6 +103,17 @@ cd /path/to/sherpa-onnx/wasm/combined This script will download necessary model files to the `assets/` directory. +## Testing the Demos + +After building the module and setting up the models, you can test the demos using Python's built-in HTTP server: + +```bash +cd /path/to/sherpa-onnx/wasm/combined +python3 -m http.server 8080 +``` + +Then navigate to http://localhost:8080/demos/ in your web browser to access the demos. + ## Troubleshooting - **Module load errors**: Ensure the WASM module is loaded before any other scripts From 490e8f785a58517eaab16b73a4c8566b7812ab37 Mon Sep 17 00:00:00 2001 From: Arthur Breton Date: Fri, 25 Apr 2025 20:54:41 +0800 Subject: [PATCH 6/9] feat: tts with preloaded models --- wasm/combined/.gitignore | 2 +- wasm/combined/CMakeLists.txt | 62 +- wasm/combined/README.md | 132 +-- wasm/combined/assets/setup-assets.sh | 257 ++++-- wasm/combined/demos/common.css | 201 +++++ wasm/combined/demos/common.js | 195 +++++ wasm/combined/demos/index.html | 37 +- wasm/combined/demos/tts.html | 524 ++++++++++-- wasm/combined/index.html | 928 -------------------- wasm/combined/sherpa-onnx-asr.js | 99 ++- wasm/combined/sherpa-onnx-core.js | 952 +++++---------------- wasm/combined/sherpa-onnx-tts.js | 738 ++++++++++------ wasm/combined/sherpa-onnx-vad.js | 107 ++- wasm/combined/sherpa-onnx-wasm-combined.cc | 73 -- 14 files changed, 2017 insertions(+), 2290 deletions(-) mode change 100755 => 100644 wasm/combined/assets/setup-assets.sh delete mode 100644 wasm/combined/index.html diff --git a/wasm/combined/.gitignore b/wasm/combined/.gitignore index 4b5b215ffd..d055e657c8 100644 --- a/wasm/combined/.gitignore +++ b/wasm/combined/.gitignore @@ -1,7 +1,7 @@ # Generated WASM files *.wasm sherpa-onnx-wasm-combined.js - +sherpa-onnx-wasm-combined.data # Local model files *.onnx *tokens.txt \ No newline at end of file diff --git a/wasm/combined/CMakeLists.txt b/wasm/combined/CMakeLists.txt index b553dfd3a9..4cb9b65e99 100644 --- a/wasm/combined/CMakeLists.txt +++ b/wasm/combined/CMakeLists.txt @@ -1,5 +1,22 @@ if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH}) - message(FATAL_ERROR "Please use ./build-wasm-combined.sh to build") + message(FATAL_ERROR "Please use ./build-wasm-combined.sh to build for wasm combined module") +endif() + +# Check for asset directories +if(NOT IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/asr") + message(WARNING "ASR assets directory not found at ${CMAKE_CURRENT_SOURCE_DIR}/assets/asr") +endif() + +if(NOT IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/vad") + message(WARNING "VAD assets directory not found at ${CMAKE_CURRENT_SOURCE_DIR}/assets/vad") +endif() + +if(NOT IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/tts") + message(WARNING "TTS assets directory not found at ${CMAKE_CURRENT_SOURCE_DIR}/assets/tts") +endif() + +if(NOT IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/kws") + message(WARNING "KWS assets directory not found at ${CMAKE_CURRENT_SOURCE_DIR}/assets/kws") endif() # Collect all exported functions from all modules @@ -100,6 +117,31 @@ string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_malloc,_free,${all_exported_func # No preloaded assets - all models will be loaded dynamically string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString','FS'] ") +# Load precompiled assets using structured paths +if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/asr") + string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets/asr@/sherpa_assets/asr ") +endif() + +if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/vad") + string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets/vad@/sherpa_assets/vad ") +endif() + +if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/tts") + string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets/tts@/sherpa_assets/tts ") +endif() + +if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/kws") + string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets/kws@/sherpa_assets/kws ") +endif() + +if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/speakers") + string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets/speakers@/sherpa_assets/speakers ") +endif() + +if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/assets/enhancement") + string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets/enhancement@/sherpa_assets/enhancement ") +endif() + message(STATUS "MY_FLAGS: ${MY_FLAGS}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}") @@ -113,9 +155,16 @@ install(TARGETS sherpa-onnx-wasm-combined DESTINATION bin/wasm/combined) install( FILES "$/sherpa-onnx-wasm-combined.js" - "index.html" - "sherpa-onnx-combined.js" "$/sherpa-onnx-wasm-combined.wasm" + "$/sherpa-onnx-wasm-combined.data" + "sherpa-onnx-core.js" + "sherpa-onnx-asr.js" + "sherpa-onnx-vad.js" + "sherpa-onnx-tts.js" + "sherpa-onnx-kws.js" + "sherpa-onnx-speaker.js" + "sherpa-onnx-enhancement.js" + "sherpa-onnx-combined.js" DESTINATION bin/wasm/combined ) @@ -144,6 +193,13 @@ if(SHERPA_ONNX_INSTALL_TO_REPO) -DCOPY_FILES="sherpa-onnx-wasm-combined.wasm" -P ${CMAKE_CURRENT_SOURCE_DIR}/copy_with_confirm.cmake + # Copy the DATA file + COMMAND ${CMAKE_COMMAND} + -DSRC_DIR=${CMAKE_BINARY_DIR}/bin + -DDEST_DIR=${SHERPA_ONNX_REPO_PATH} + -DCOPY_FILES="sherpa-onnx-wasm-combined.data" + -P ${CMAKE_CURRENT_SOURCE_DIR}/copy_with_confirm.cmake + # Copy the index.html file COMMAND ${CMAKE_COMMAND} -DSRC_DIR=${CMAKE_CURRENT_SOURCE_DIR} diff --git a/wasm/combined/README.md b/wasm/combined/README.md index 27f7071910..503f0ce09c 100644 --- a/wasm/combined/README.md +++ b/wasm/combined/README.md @@ -1,122 +1,60 @@ -# Sherpa-ONNX Combined WebAssembly Module - -This directory contains a combined WebAssembly module for the Sherpa-ONNX project, which integrates multiple features: +# Sherpa-ONNX WASM Combined Module +This directory contains the WebAssembly (WASM) combined module for Sherpa-ONNX, which includes support for: - Automatic Speech Recognition (ASR) - Voice Activity Detection (VAD) -- Text-to-Speech Synthesis (TTS) -- Speech Enhancement +- Text-to-Speech (TTS) +- Keyword Spotting (KWS) - Speaker Diarization -- Keyword Spotting - -## How to Use - -### Loading the Module - -You can use the combined module in two ways: - -#### Option 1: Load Individual Modules (Recommended) - -This approach loads only the components you need: - -```html - - - - - - - - - - - -``` - -#### Option 2: Load All Modules via the Combined Loader - -This approach loads all available modules: - -```html - - - - - - - -``` - -### Module Structure - -The codebase has been organized into modular files: - -- `sherpa-onnx-core.js`: Core functionality, utilities, and file system operations -- `sherpa-onnx-vad.js`: Voice Activity Detection functionality -- `sherpa-onnx-combined.js`: Loader that loads all individual modules - -Additional modules will be added in the future: -- `sherpa-onnx-asr.js`: Automatic Speech Recognition functionality -- `sherpa-onnx-tts.js`: Text-to-Speech functionality -- And more... - -## Demo Application +- Speech Enhancement -The included `index.html` demonstrates how to use the combined module. It shows: +## File Structure -1. How to load models from URLs -2. How to initialize each component (ASR, VAD, TTS) -3. How to stream audio from the microphone -4. How to get results from each component +When built, the following files are generated: +- `sherpa-onnx-wasm-combined.js` - The main JavaScript glue code +- `sherpa-onnx-wasm-combined.wasm` - The WebAssembly binary +- `sherpa-onnx-wasm-combined.data` - The preloaded assets (models) +- JS library files: + - `sherpa-onnx-core.js` - Core functionality + - `sherpa-onnx-asr.js` - ASR functionality + - `sherpa-onnx-vad.js` - VAD functionality + - `sherpa-onnx-tts.js` - TTS functionality + - `sherpa-onnx-kws.js` - Keyword Spotting functionality + - `sherpa-onnx-speaker.js` - Speaker Diarization functionality + - `sherpa-onnx-enhancement.js` - Speech Enhancement functionality + - `sherpa-onnx-combined.js` - Combined functionality wrapper -## Building the Module +## Building -The WebAssembly module can be built using the provided build script: +To build the WASM module: ```bash cd /path/to/sherpa-onnx ./build-wasm-combined.sh ``` -The built files will be located in `bin/wasm/combined/` and are also copied to `wasm/combined/`. - -## Setting Up Models - -Before using the demo, you need to set up model files: +This script will: +1. Create a `build-wasm-combined` directory +2. Configure CMake with the necessary options +3. Build the WASM module +4. Install the files to `bin/wasm/combined` +5. Copy the files to the original repo at `wasm/combined` -```bash -cd /path/to/sherpa-onnx/wasm/combined -./setup-assets.sh -``` +## Important Notes -This script will download necessary model files to the `assets/` directory. +1. **Large Asset Bundle**: The `.data` file can be very large (300MB+) as it contains all preloaded models. For production, consider using dynamic loading of models instead. -## Testing the Demos +2. **File Locations**: All files must be in the same directory for the WASM module to work correctly. The `.data` file MUST be in the same directory as the `.js` and `.wasm` files. -After building the module and setting up the models, you can test the demos using Python's built-in HTTP server: +3. **Local Testing**: To test locally, run a web server from the `wasm/combined` directory: ```bash cd /path/to/sherpa-onnx/wasm/combined -python3 -m http.server 8080 +python -m http.server 8000 ``` -Then navigate to http://localhost:8080/demos/ in your web browser to access the demos. +Then open `http://localhost:8000` in your browser. -## Troubleshooting +## License -- **Module load errors**: Ensure the WASM module is loaded before any other scripts -- **Model load errors**: Check the browser console for specific error messages -- **Audio capture issues**: Make sure your browser has permission to access the microphone -- **Performance issues**: Try reducing buffer sizes or using smaller models \ No newline at end of file +Same as Sherpa-ONNX. \ No newline at end of file diff --git a/wasm/combined/assets/setup-assets.sh b/wasm/combined/assets/setup-assets.sh old mode 100755 new mode 100644 index 0fc25a9aab..69ea49d11e --- a/wasm/combined/assets/setup-assets.sh +++ b/wasm/combined/assets/setup-assets.sh @@ -5,115 +5,194 @@ set -e -# Create a tmp directory for downloads -mkdir -p tmp -cd tmp +# Parse command line arguments +FORCE=false +for arg in "$@" +do + case $arg in + --force) + FORCE=true + shift + ;; + esac +done echo "===== Setting up assets for Sherpa-ONNX Combined WASM Demo =====" echo "" -# Function to check if a file exists and download only if needed -download_if_missing() { - local target_file="../$1" - local download_url="$2" - local is_archive="$3" - local extract_dir="$4" - - if [ -f "$target_file" ]; then - echo "File $target_file already exists. Skipping download." - return 0 - fi - - echo "Downloading $download_url..." - - if [ "$is_archive" = "yes" ]; then - wget -q "$download_url" - local file=$(basename "$download_url") - - echo "Extracting $file..." - tar xvf "$file" - rm "$file" - - if [ ! -z "$extract_dir" ]; then - mv "$extract_dir" "../$(dirname "$target_file")" - fi - else - wget -q -O "$target_file" "$download_url" - fi - - echo "Downloaded and setup $target_file" -} +if [ "$FORCE" = true ]; then + echo "Force mode enabled - will delete existing assets" +fi # Create subdirectories for each model type -mkdir -p ../asr ../vad ../tts ../speakers ../enhancement ../kws +mkdir -p asr vad tts speakers enhancement kws + +# Function to check if a directory exists and has content +check_dir_not_empty() { + local dir="$1" + if [ -d "$dir" ] && [ "$(ls -A "$dir" 2>/dev/null)" ]; then + return 0 # Directory exists and not empty + else + return 1 # Directory doesn't exist or is empty + fi +} -echo "1. Setting up ASR Models (Speech Recognition)..." +# Create a tmp directory for downloads +mkdir -p tmp +cd tmp # Download ASR models -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 -tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 -rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 - -# Rename for compatibility -mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx ../asr/encoder.onnx -mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ../asr/decoder.onnx -mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx ../asr/joiner.onnx -mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ../asr/tokens.txt -rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/ - -echo "2. Setting up VAD Models (Voice Activity Detection)..." +echo "1. Setting up ASR Models (Speech Recognition)..." +if [ "$FORCE" = true ] || ! check_dir_not_empty "../asr"; then + # Clean up if force is enabled + if [ "$FORCE" = true ] && [ -d "../asr" ]; then + rm -rf "../asr" + mkdir -p "../asr" + fi + + # Download and extract ASR models + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + + # Rename for compatibility + mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx ../asr/encoder.onnx + mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ../asr/decoder.onnx + mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx ../asr/joiner.onnx + mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ../asr/tokens.txt + rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/ + echo "ASR models downloaded and set up." +else + echo "ASR models already exist. Skipping download. Use --force to re-download." +fi # Download VAD model -wget -q -O ../vad/silero_vad.onnx https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx - -echo "3. Setting up TTS Models (Text-to-Speech)..." +echo "2. Setting up VAD Models (Voice Activity Detection)..." +if [ "$FORCE" = true ] || ! [ -f "../vad/silero_vad.onnx" ]; then + # Clean up if force is enabled + if [ "$FORCE" = true ] && [ -d "../vad" ]; then + rm -rf "../vad" + mkdir -p "../vad" + fi + + wget -q -O ../vad/silero_vad.onnx https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx + echo "VAD model downloaded and set up." +else + echo "VAD model already exists. Skipping download. Use --force to re-download." +fi # Download TTS models -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 -tar xvf vits-piper-en_US-amy-low.tar.bz2 -rm vits-piper-en_US-amy-low.tar.bz2 - -mv vits-piper-en_US-amy-low/en_US-amy-low.onnx ../tts/model.onnx -mv vits-piper-en_US-amy-low/tokens.txt ../tts/tokens.txt -# Create a zip archive of the espeak-ng-data directory for efficient loading in WASM -mv vits-piper-en_US-amy-low/espeak-ng-data ../tts/ - -# Create zip archive of espeak-ng-data -echo "Creating zip archive of espeak-ng-data..." -cd ../tts -zip -r espeak-ng-data.zip espeak-ng-data/ -cd ../../tmp - -rm -rf vits-piper-en_US-amy-low/ +echo "3. Setting up TTS Models (Text-to-Speech)..." +if [ "$FORCE" = true ] || ! check_dir_not_empty "../tts"; then + # Clean up if force is enabled + if [ "$FORCE" = true ] && [ -d "../tts" ]; then + rm -rf "../tts" + mkdir -p "../tts" + fi + + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 + tar xvf vits-piper-en_US-amy-low.tar.bz2 + rm vits-piper-en_US-amy-low.tar.bz2 -echo "4. Setting up Speaker Diarization Models..." + # Move required files to TTS directory + mv vits-piper-en_US-amy-low/en_US-amy-low.onnx ../tts/model.onnx + mv vits-piper-en_US-amy-low/tokens.txt ../tts/tokens.txt + + # Handle espeak-ng-data directory safely + if [ -d "../tts/espeak-ng-data" ] && [ "$FORCE" = false ]; then + echo "espeak-ng-data directory already exists. Skipping..." + else + # Remove existing directory if it exists and we're forcing + if [ -d "../tts/espeak-ng-data" ]; then + rm -rf "../tts/espeak-ng-data" + fi + mv vits-piper-en_US-amy-low/espeak-ng-data ../tts/ + fi + + # Create zip archive of espeak-ng-data if needed + if [ ! -f "../tts/espeak-ng-data.zip" ] || [ "$FORCE" = true ]; then + echo "Creating zip archive of espeak-ng-data..." + cd ../tts + # Remove existing zip if force is enabled + if [ -f "espeak-ng-data.zip" ] && [ "$FORCE" = true ]; then + rm espeak-ng-data.zip + fi + zip -r espeak-ng-data.zip espeak-ng-data/ + cd ../tmp + else + echo "espeak-ng-data.zip already exists. Skipping..." + fi + + rm -rf vits-piper-en_US-amy-low/ + echo "TTS models downloaded and set up." +else + echo "TTS models already exist. Skipping download. Use --force to re-download." +fi # Download speaker diarization models -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 -tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 -rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 -mv sherpa-onnx-pyannote-segmentation-3-0/model.onnx ../speakers/segmentation.onnx -rm -rf sherpa-onnx-pyannote-segmentation-3-0 - -wget -q -O ../speakers/embedding.onnx https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx - -echo "5. Setting up Speech Enhancement Models..." +echo "4. Setting up Speaker Diarization Models..." +if [ "$FORCE" = true ] || ! check_dir_not_empty "../speakers"; then + # Clean up if force is enabled + if [ "$FORCE" = true ] && [ -d "../speakers" ]; then + rm -rf "../speakers" + mkdir -p "../speakers" + fi + + # Download segmentation model + if [ "$FORCE" = true ] || ! [ -f "../speakers/segmentation.onnx" ]; then + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + mv sherpa-onnx-pyannote-segmentation-3-0/model.onnx ../speakers/segmentation.onnx + rm -rf sherpa-onnx-pyannote-segmentation-3-0 + fi + + # Download embedding model + if [ "$FORCE" = true ] || ! [ -f "../speakers/embedding.onnx" ]; then + wget -q -O ../speakers/embedding.onnx https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + fi + echo "Speaker diarization models downloaded and set up." +else + echo "Speaker diarization models already exist. Skipping download. Use --force to re-download." +fi # Download speech enhancement model -wget -q -O ../enhancement/gtcrn.onnx https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx - -echo "6. Setting up Keyword Spotting Models..." +echo "5. Setting up Speech Enhancement Models..." +if [ "$FORCE" = true ] || ! [ -f "../enhancement/gtcrn.onnx" ]; then + # Clean up if force is enabled + if [ "$FORCE" = true ] && [ -d "../enhancement" ]; then + rm -rf "../enhancement" + mkdir -p "../enhancement" + fi + + wget -q -O ../enhancement/gtcrn.onnx https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx + echo "Speech enhancement model downloaded and set up." +else + echo "Speech enhancement model already exists. Skipping download. Use --force to re-download." +fi # Download keyword spotting models -wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 -tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 -rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 - -mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx ../kws/encoder.onnx -mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx ../kws/decoder.onnx -mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx ../kws/joiner.onnx -mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ../kws/tokens.txt -rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 +echo "6. Setting up Keyword Spotting Models..." +if [ "$FORCE" = true ] || ! check_dir_not_empty "../kws"; then + # Clean up if force is enabled + if [ "$FORCE" = true ] && [ -d "../kws" ]; then + rm -rf "../kws" + mkdir -p "../kws" + fi + + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 + tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 + rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2 + + mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx ../kws/encoder.onnx + mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx ../kws/decoder.onnx + mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx ../kws/joiner.onnx + mv sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ../kws/tokens.txt + rm -rf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 + echo "Keyword spotting models downloaded and set up." +else + echo "Keyword spotting models already exist. Skipping download. Use --force to re-download." +fi # Clean up tmp directory cd .. diff --git a/wasm/combined/demos/common.css b/wasm/combined/demos/common.css index 1d6a8efa33..a06a3fc5bb 100644 --- a/wasm/combined/demos/common.css +++ b/wasm/combined/demos/common.css @@ -169,3 +169,204 @@ select { .nav-menu a.active { background-color: #4285f4; } + +/* Filesystem Validation Styles */ +#fs-validation-section { + margin-top: 30px; + border-top: 1px solid #ddd; + padding-top: 20px; +} + +.inspect-button { + background-color: #009688; +} + +.inspect-button:hover { + background-color: #00796b; +} + +.filesystem-inspector { + font-family: monospace; + background-color: #f5f5f5; + border: 1px solid #ddd; + border-radius: 4px; + padding: 10px; + margin: 10px 0; + max-height: 400px; + overflow-y: auto; +} + +.fs-section { + margin-bottom: 15px; + padding: 8px; + border-bottom: 1px dashed #ccc; +} + +.fs-section:last-child { + border-bottom: none; +} + +.fs-section h3 { + margin: 0 0 10px 0; + font-size: 1.1em; + color: #333; + background-color: #e0e0e0; + padding: 5px; + border-radius: 3px; +} + +.path-info { + margin: 8px 0; + padding: 5px 0 5px 10px; + border-left: 3px solid #4285f4; +} + +.error { + color: #cc0000; + font-weight: bold; + padding: 5px; + background-color: #ffeeee; + border-radius: 3px; +} + +.warning { + color: #cc7700; + padding: 5px; + background-color: #fff8e1; + border-radius: 3px; +} + +.filesystem-inspector ul { + margin: 5px 0; + padding-left: 20px; + list-style-type: square; +} + +.filesystem-inspector li { + padding: 2px 0; +} + +.info-box { + background-color: #e8f5e9; + border-left: 4px solid #4caf50; + padding: 12px 15px; + margin-bottom: 20px; + border-radius: 4px; +} + +.info-box p { + margin: 5px 0; + color: #2e7d32; +} + +.info-box code { + background-color: rgba(0, 0, 0, 0.06); + border-radius: 3px; + padding: 2px 4px; + font-family: monospace; +} + +.debug-button { + background-color: #673ab7; +} + +.debug-button:hover { + background-color: #512da8; +} + +#debug-console { + margin-top: 20px; + border: 1px solid #ccc; + border-radius: 4px; + padding: 10px; + background-color: #f8f8f8; +} + +.debug-log { + height: 300px; + overflow-y: auto; + font-family: monospace; + background-color: #1e1e1e; + color: #d4d4d4; + padding: 10px; + border-radius: 4px; + margin: 10px 0; + white-space: pre-wrap; + word-wrap: break-word; +} + +.log-entry { + margin-bottom: 5px; + border-bottom: 1px solid #333; + padding-bottom: 5px; +} + +.log-entry.error { + color: #f44336; + font-weight: bold; +} + +/* Tabs styling */ +.tabs { + display: flex; + margin-bottom: 20px; + border-bottom: 1px solid #ccc; +} + +.tab-button { + padding: 10px 20px; + background-color: #f0f0f0; + border: none; + border-radius: 4px 4px 0 0; + cursor: pointer; + margin-right: 5px; + color: #333; + font-weight: 500; +} + +.tab-button:hover { + background-color: #e0e0e0; +} + +.tab-button.active { + background-color: #4285f4; + color: white; +} + +.tab-content { + display: none; + padding: 15px 0; +} + +.tab-content.active { + display: block; +} + +/* Range slider styling */ +input[type="range"] { + width: 100%; + margin: 5px 0; +} + +input[type="range"] + span { + display: inline-block; + width: 40px; + text-align: center; + font-weight: bold; + color: #4285f4; +} + +/* Model parameters styling */ +.model-params { + background-color: #f9f9f9; + padding: 15px; + border-radius: 4px; + margin-bottom: 15px; +} + +#model-config h3 { + margin-top: 0; + margin-bottom: 15px; + padding-bottom: 8px; + border-bottom: 1px solid #e0e0e0; +} diff --git a/wasm/combined/demos/common.js b/wasm/combined/demos/common.js index c22dc35a9e..fb53619861 100644 --- a/wasm/combined/demos/common.js +++ b/wasm/combined/demos/common.js @@ -77,3 +77,198 @@ function createUnloadButton(container, modelType, resource, statusElem) { container.appendChild(button); return button; } + +// Validate WASM filesystem assets +function validateAssets(targetElement, moduleTypes = ['vad', 'tts', 'asr', 'kws']) { + if (!window.SherpaOnnx || !window.SherpaOnnx.FileSystem) { + targetElement.innerHTML = '
SherpaOnnx FileSystem not available
'; + return false; + } + + const fs = window.SherpaOnnx.FileSystem; + const container = document.createElement('div'); + container.className = 'filesystem-inspector'; + + // Check root directories + const rootSection = document.createElement('div'); + rootSection.className = 'fs-section'; + rootSection.innerHTML = '

Root Directory

'; + + try { + const rootFiles = fs.listFiles('/'); + + if (rootFiles.length === 0) { + rootSection.innerHTML += '
No files found in root directory
'; + } else { + const rootList = document.createElement('ul'); + rootFiles.forEach(file => { + const item = document.createElement('li'); + item.textContent = file; + rootList.appendChild(item); + }); + rootSection.appendChild(rootList); + } + } catch (e) { + rootSection.innerHTML += `
Error listing root directory: ${e.message}
`; + } + + container.appendChild(rootSection); + + // Check each module type + moduleTypes.forEach(moduleType => { + const section = document.createElement('div'); + section.className = 'fs-section'; + section.innerHTML = `

${moduleType.toUpperCase()} Assets

`; + + // Check various possible asset paths - updated to include sherpa_assets + const assetPaths = [ + `/sherpa_assets/${moduleType}`, // Added - This is the correct path per CMakeLists.txt + `/assets/${moduleType}`, + `/assets/${moduleType}/models`, + `/preloaded/${moduleType}` + ]; + + let assetsFound = false; + + assetPaths.forEach(assetPath => { + if (fs.fileExists(assetPath)) { + assetsFound = true; + const files = fs.listFiles(assetPath); + + const pathDiv = document.createElement('div'); + pathDiv.className = 'path-info'; + pathDiv.innerHTML = `${assetPath}:`; + + if (files.length === 0) { + pathDiv.innerHTML += ' Directory exists but is empty'; + } else { + const fileList = document.createElement('ul'); + files.forEach(file => { + const item = document.createElement('li'); + item.textContent = file; + fileList.appendChild(item); + }); + pathDiv.appendChild(fileList); + } + + section.appendChild(pathDiv); + } + }); + + if (!assetsFound) { + section.innerHTML += `
No ${moduleType} asset directories found
`; + } + + container.appendChild(section); + }); + + // Also check if sherpa_assets directory exists + try { + if (fs.fileExists('/sherpa_assets')) { + const sherpaSection = document.createElement('div'); + sherpaSection.className = 'fs-section'; + sherpaSection.innerHTML = '

Sherpa Assets Directory

'; + + const sherpaFiles = fs.listFiles('/sherpa_assets'); + if (sherpaFiles.length === 0) { + sherpaSection.innerHTML += '
Directory exists but is empty
'; + } else { + const pathDiv = document.createElement('div'); + pathDiv.className = 'path-info'; + pathDiv.innerHTML = '/sherpa_assets:'; + + const fileList = document.createElement('ul'); + sherpaFiles.forEach(file => { + const item = document.createElement('li'); + item.textContent = file; + + // Recursively show contents for each subdirectory + if (fs.fileExists(`/sherpa_assets/${file}`)) { + try { + const subFiles = fs.listFiles(`/sherpa_assets/${file}`); + if (subFiles.length > 0) { + const subList = document.createElement('ul'); + subFiles.forEach(subFile => { + const subItem = document.createElement('li'); + subItem.textContent = subFile; + subList.appendChild(subItem); + }); + item.appendChild(subList); + } + } catch (e) { + // Ignore errors for subdir listing + } + } + + fileList.appendChild(item); + }); + + pathDiv.appendChild(fileList); + sherpaSection.appendChild(pathDiv); + } + + container.appendChild(sherpaSection); + } + } catch (e) { + // Ignore errors if sherpa_assets doesn't exist + } + + // Clear and update target element + targetElement.innerHTML = ''; + targetElement.appendChild(container); + + // Add some basic styling + const style = document.createElement('style'); + style.textContent = ` + .filesystem-inspector { + font-family: monospace; + background-color: #f5f5f5; + border: 1px solid #ddd; + border-radius: 4px; + padding: 10px; + margin: 10px 0; + max-height: 400px; + overflow-y: auto; + } + .fs-section { + margin-bottom: 15px; + } + .fs-section h3 { + margin: 0 0 5px 0; + font-size: 1em; + color: #333; + } + .path-info { + margin: 5px 0; + padding-left: 10px; + } + .error { + color: #cc0000; + font-weight: bold; + } + .warning { + color: #cc7700; + } + ul { + margin: 5px 0; + padding-left: 20px; + } + `; + targetElement.appendChild(style); + + return true; +} + +// Create inspect assets button +function createInspectAssetsButton(container, targetElement) { + const button = document.createElement('button'); + button.textContent = 'Inspect Filesystem Assets'; + button.classList.add('inspect-button'); + + button.addEventListener('click', function() { + validateAssets(targetElement); + }); + + container.appendChild(button); + return button; +} diff --git a/wasm/combined/demos/index.html b/wasm/combined/demos/index.html index 6bb709f620..044250c2b9 100644 --- a/wasm/combined/demos/index.html +++ b/wasm/combined/demos/index.html @@ -9,7 +9,10 @@ - + + + +

Sherpa-ONNX Demos

@@ -37,5 +40,37 @@

Memory Management

  • Prevents memory leaks in long-running applications
  • + +
    +

    WASM Filesystem Validation

    +

    Use this tool to verify that assets are correctly loaded in the WASM virtual filesystem.

    + +
    + +
    + +
    +
    + + diff --git a/wasm/combined/demos/tts.html b/wasm/combined/demos/tts.html index 60909fd80b..01dab9660f 100644 --- a/wasm/combined/demos/tts.html +++ b/wasm/combined/demos/tts.html @@ -31,40 +31,92 @@

    Sherpa-ONNX TTS Demo

    Text-to-Speech (TTS)

    -
    +
    +

    This demo uses either the preloaded TTS models from the /sherpa_assets/tts directory in the WASM filesystem or a custom model you upload.

    +

    Use the "Inspect Filesystem Assets" button below to verify available models.

    +
    + +
    + + +
    + +
    +

    Use the preloaded VITS model included with Sherpa-ONNX.

    +
    + +
    +
    + + + The archive should contain the model file (.onnx), tokens.txt, and other required files. +
    +
    + +
    + +
    +

    WASM Filesystem Validation

    +

    Use this tool to verify that TTS assets are correctly loaded in the WASM virtual filesystem.

    + +
    + +
    + +
    - - - - - - - -

    Sherpa-ONNX Combined Demo

    -
    Loading WebAssembly module...
    - -
    -

    Modular Design

    -

    This demo uses a modular architecture. You can load modules individually:

    -
    -<script src="sherpa-onnx-core.js"></script>
    -<script src="sherpa-onnx-vad.js"></script>
    -    
    -

    Or load all modules at once:

    -
    -<script src="sherpa-onnx-combined.js"></script>
    -    
    -
    - -
    -

    Voice Activity Detection (VAD)

    - -
    -

    Model Configuration

    -
    - - -
    -
    - - - (The directory where model files will be stored) -
    -
    - - -
    -
    - - -
    -
    - -
    - - - -
    -
    Status: Not active
    -
    - - -
    -

    Automatic Speech Recognition (ASR)

    - -
    -

    Model Configuration

    -
    - - -
    -
    - - - (The directory where model files will be stored) -
    -
    - - -
    -
    - - -
    -
    - - -
    -
    - - -
    -
    - - -
    -
    - - -
    -
    - -
    - - - -
    -
    Status: Not active
    -
    -
    - - -
    -

    Text-to-Speech (TTS)

    - -
    -

    Model Configuration

    -
    - - -
    -
    - - - (The directory where model files will be stored) -
    -
    - - -
    -
    - - -
    -
    - - - (Required for VITS models) -
    -
    - - -
    -
    - - -
    -
    - -
    - - -
    - -
    - - -
    -
    Status: Not active
    -
    -
    - - -
    -

    Keyword Spotting (KWS)

    - -
    -

    Model Configuration

    -
    - - - (The directory where model files will be stored) -
    -
    - - -
    -
    - - -
    -
    - - -
    -
    - - -
    -
    - - - Format: Phonetic tokens with spaces between letters, followed by @ and the keyword label -
    -
    - - -
    -
    - - -
    -
    - -
    - - - -
    -
    Status: Not active
    -
    -
    - - - - \ No newline at end of file diff --git a/wasm/combined/sherpa-onnx-asr.js b/wasm/combined/sherpa-onnx-asr.js index 04c0fdafc1..842a23c72b 100644 --- a/wasm/combined/sherpa-onnx-asr.js +++ b/wasm/combined/sherpa-onnx-asr.js @@ -26,13 +26,77 @@ * @returns {Promise} - Information about the loaded model */ loadModel: async function(modelConfig) { + const debug = modelConfig.debug || false; const modelDir = modelConfig.modelDir || 'asr-models'; + // First check for preloaded assets + if (!modelConfig.forceDownload) { + const assetPath = SherpaOnnx.Config.assetPaths.asr; + if (debug) console.log(`Checking for preloaded ASR assets at ${assetPath}`); + + if (SherpaOnnx.FileSystem.fileExists(assetPath)) { + const files = SherpaOnnx.FileSystem.listFiles(assetPath); + if (debug) console.log(`Found preloaded files: ${files.join(', ')}`); + + // Check for model files based on type + if (modelConfig.type === 'transducer' || !modelConfig.type) { + if (files.includes('encoder.onnx') && + files.includes('decoder.onnx') && + files.includes('joiner.onnx') && + files.includes('tokens.txt')) { + if (debug) console.log("Using preloaded transducer model"); + return { + modelDir: assetPath, + type: 'transducer', + actualPaths: { + encoder: `${assetPath}/encoder.onnx`, + decoder: `${assetPath}/decoder.onnx`, + joiner: `${assetPath}/joiner.onnx`, + tokens: `${assetPath}/tokens.txt` + }, + preloaded: true + }; + } + } else if (modelConfig.type === 'ctc') { + if (files.includes('model.onnx') && files.includes('tokens.txt')) { + if (debug) console.log("Using preloaded CTC model"); + return { + modelDir: assetPath, + type: 'ctc', + actualPaths: { + model: `${assetPath}/model.onnx`, + tokens: `${assetPath}/tokens.txt` + }, + preloaded: true + }; + } + } else if (modelConfig.type === 'paraformer') { + if (files.includes('encoder.onnx') && + files.includes('decoder.onnx') && + files.includes('tokens.txt')) { + if (debug) console.log("Using preloaded paraformer model"); + return { + modelDir: assetPath, + type: 'paraformer', + actualPaths: { + encoder: `${assetPath}/encoder.onnx`, + decoder: `${assetPath}/decoder.onnx`, + tokens: `${assetPath}/tokens.txt` + }, + preloaded: true + }; + } + } + + if (debug) console.log("Preloaded ASR assets found but missing required files for model type"); + } + } + // Create directory if it doesn't exist try { - global.Module.FS.mkdir(modelDir, 0o777); + SherpaOnnx.FileSystem.ensureDirectory(modelDir); } catch(e) { - if (e.code !== 'EEXIST') throw e; + console.error(`Failed to create directory ${modelDir}:`, e); } // Collection for actual file paths @@ -41,10 +105,10 @@ // Load model files based on type if (modelConfig.type === 'transducer') { const results = await Promise.all([ - SherpaOnnx.FileSystem.safeLoadFile(modelConfig.encoder || 'assets/asr/encoder.onnx', `${modelDir}/encoder.onnx`, modelConfig.debug), - SherpaOnnx.FileSystem.safeLoadFile(modelConfig.decoder || 'assets/asr/decoder.onnx', `${modelDir}/decoder.onnx`, modelConfig.debug), - SherpaOnnx.FileSystem.safeLoadFile(modelConfig.joiner || 'assets/asr/joiner.onnx', `${modelDir}/joiner.onnx`, modelConfig.debug), - SherpaOnnx.FileSystem.safeLoadFile(modelConfig.tokens || 'assets/asr/tokens.txt', `${modelDir}/tokens.txt`, modelConfig.debug) + SherpaOnnx.FileSystem.loadFile(modelConfig.encoder || 'assets/asr/encoder.onnx', `${modelDir}/encoder.onnx`, debug), + SherpaOnnx.FileSystem.loadFile(modelConfig.decoder || 'assets/asr/decoder.onnx', `${modelDir}/decoder.onnx`, debug), + SherpaOnnx.FileSystem.loadFile(modelConfig.joiner || 'assets/asr/joiner.onnx', `${modelDir}/joiner.onnx`, debug), + SherpaOnnx.FileSystem.loadFile(modelConfig.tokens || 'assets/asr/tokens.txt', `${modelDir}/tokens.txt`, debug) ]); // Collect actual paths @@ -55,9 +119,9 @@ } else if (modelConfig.type === 'paraformer') { const results = await Promise.all([ - SherpaOnnx.FileSystem.safeLoadFile(modelConfig.encoder || 'assets/asr/encoder.onnx', `${modelDir}/encoder.onnx`, modelConfig.debug), - SherpaOnnx.FileSystem.safeLoadFile(modelConfig.decoder || 'assets/asr/decoder.onnx', `${modelDir}/decoder.onnx`, modelConfig.debug), - SherpaOnnx.FileSystem.safeLoadFile(modelConfig.tokens || 'assets/asr/tokens.txt', `${modelDir}/tokens.txt`, modelConfig.debug) + SherpaOnnx.FileSystem.loadFile(modelConfig.encoder || 'assets/asr/encoder.onnx', `${modelDir}/encoder.onnx`, debug), + SherpaOnnx.FileSystem.loadFile(modelConfig.decoder || 'assets/asr/decoder.onnx', `${modelDir}/decoder.onnx`, debug), + SherpaOnnx.FileSystem.loadFile(modelConfig.tokens || 'assets/asr/tokens.txt', `${modelDir}/tokens.txt`, debug) ]); // Collect actual paths @@ -67,8 +131,8 @@ } else if (modelConfig.type === 'ctc') { const results = await Promise.all([ - SherpaOnnx.FileSystem.safeLoadFile(modelConfig.model || 'assets/asr/model.onnx', `${modelDir}/model.onnx`, modelConfig.debug), - SherpaOnnx.FileSystem.safeLoadFile(modelConfig.tokens || 'assets/asr/tokens.txt', `${modelDir}/tokens.txt`, modelConfig.debug) + SherpaOnnx.FileSystem.loadFile(modelConfig.model || 'assets/asr/model.onnx', `${modelDir}/model.onnx`, debug), + SherpaOnnx.FileSystem.loadFile(modelConfig.tokens || 'assets/asr/tokens.txt', `${modelDir}/tokens.txt`, debug) ]); // Collect actual paths @@ -76,19 +140,10 @@ actualPaths.tokens = results[1].path; } - // Get base directory from the tokens path - let effectiveModelDir = modelDir; - if (actualPaths.tokens) { - const lastSlash = actualPaths.tokens.lastIndexOf('/'); - if (lastSlash > 0) { - effectiveModelDir = actualPaths.tokens.substring(0, lastSlash); - } - } - return { - modelDir: effectiveModelDir, + modelDir, type: modelConfig.type, - actualPaths: actualPaths + actualPaths }; }, diff --git a/wasm/combined/sherpa-onnx-core.js b/wasm/combined/sherpa-onnx-core.js index 13386234d7..cef7038aef 100644 --- a/wasm/combined/sherpa-onnx-core.js +++ b/wasm/combined/sherpa-onnx-core.js @@ -5,6 +5,9 @@ */ (function(global) { + // Create the SherpaOnnx namespace if it doesn't exist + global.SherpaOnnx = global.SherpaOnnx || {}; + // Create main namespace const SherpaOnnx = {}; @@ -18,6 +21,34 @@ }; } + // Configuration for SherpaOnnx + SherpaOnnx.Config = { + // Paths to preloaded assets + assetPaths: { + vad: '/sherpa_assets/vad', + tts: '/sherpa_assets/tts', + asr: '/sherpa_assets/asr', + kws: '/sherpa_assets/kws', + speakers: '/sherpa_assets/speakers', + enhancement: '/sherpa_assets/enhancement' + }, + + // Allow users to override the location of the data file + setDataFileLocation: function(location) { + if (global.Module) { + const originalLocateFile = global.Module.locateFile; + global.Module.locateFile = function(path) { + if (path.endsWith('.data')) { + return location; + } + return typeof originalLocateFile === 'function' + ? originalLocateFile(path) + : path; + }; + } + } + }; + // Common utilities for memory management and shared functionality SherpaOnnx.Utils = { /** @@ -59,191 +90,13 @@ // File system utilities for model loading SherpaOnnx.FileSystem = { /** - * Safely create a directory in the WASM filesystem - * Handles cases where the path already exists as a file - * @param {string} dirPath - Path of the directory to create - * @param {boolean} debug - Whether to output debug logs - * @returns {boolean|Object} - True if successful or object with alternative path - */ - safeCreateDirectory: function(dirPath, debug = false) { - try { - // Skip empty paths - if (!dirPath || dirPath === '') { - if (debug) console.log("Empty directory path, skipping"); - return true; - } - - if (debug) console.log(`Creating directory: ${dirPath}`); - - // Generate a unique directory path to avoid conflicts - const timestamp = Date.now(); - const random = Math.floor(Math.random() * 100000); - const uniquePath = `temp_${timestamp}_${random}`; - - try { - // Create the temporary directory first - if (debug) console.log(`Creating unique temporary directory: ${uniquePath}`); - global.Module.FS.mkdir(uniquePath, 0o777); - - // Then create our target directory inside the unique temp directory - const safePath = `${uniquePath}/${dirPath}`; - if (debug) console.log(`Creating directory in safe location: ${safePath}`); - - // Create all directories in the path - let currentPath = uniquePath; - const parts = dirPath.split('/'); - - for (const part of parts) { - if (!part) continue; - currentPath += '/' + part; - try { - global.Module.FS.mkdir(currentPath, 0o777); - if (debug) console.log(`Created directory component: ${currentPath}`); - } catch (mkErr) { - if (mkErr.errno !== 17) { // Not EEXIST - console.error(`Error creating directory component ${currentPath}:`, mkErr); - throw mkErr; - } - } - } - - if (debug) console.log(`Successfully created nested directory at ${safePath}`); - - // Return the full alternative path - return { - success: true, - altPath: safePath - }; - } catch (nestErr) { - console.error(`Failed to create nested directory structure:`, nestErr); - - // Try a different approach - directly creating a unique directory - const directUniquePath = `${dirPath}_${timestamp}_${random}`; - try { - if (debug) console.log(`Trying direct unique path creation: ${directUniquePath}`); - global.Module.FS.mkdir(directUniquePath, 0o777); - if (debug) console.log(`Created unique directory: ${directUniquePath}`); - - return { - success: true, - altPath: directUniquePath - }; - } catch (directErr) { - console.error(`Failed to create directory with unique name:`, directErr); - - // Last attempt - try creating the original directory - try { - global.Module.FS.mkdir(dirPath, 0o777); - if (debug) console.log(`Successfully created original directory: ${dirPath}`); - return true; - } catch (origErr) { - // If it exists and is a directory, that's fine - if (origErr.errno === 17) { // EEXIST - try { - const stat = global.Module.FS.stat(dirPath); - if (stat.isDir) { - if (debug) console.log(`Directory ${dirPath} already exists`); - return true; - } - } catch (statErr) { - console.error(`Error checking if ${dirPath} is a directory:`, statErr); - } - } - - console.error(`All attempts to create directory failed:`, origErr); - throw origErr; - } - } - } - } catch (error) { - console.error(`Failed to create directory ${dirPath}:`, error); - return false; - } - }, - - /** - * Safely load a file from a URL into the WASM file system - * Takes care of creating parent directories and verifying the file was written - * @param {string} url - URL to fetch the file from - * @param {string} localPath - Path where to save the file in WASM filesystem - * @param {boolean} debug - Whether to output debug logs - * @returns {Promise} - True if successful, false otherwise - */ - safeLoadFile: async function(url, localPath, debug = false) { - try { - if (debug) console.log(`Loading file from ${url} to ${localPath}`); - - // Get the directory - const lastSlash = localPath.lastIndexOf('/'); - let targetPath = localPath; - - if (lastSlash > 0) { - const dirPath = localPath.substring(0, lastSlash); - if (debug) console.log(`Ensuring directory exists: ${dirPath}`); - - const dirResult = this.safeCreateDirectory(dirPath, debug); - - // Check if we need to use an alternate path - if (dirResult && typeof dirResult === 'object' && dirResult.altPath) { - // Adjust the target path to use the alternate directory path - targetPath = `${dirResult.altPath}/${localPath.substring(lastSlash + 1)}`; - if (debug) console.log(`Using alternate target path: ${targetPath}`); - } else if (!dirResult) { - throw new Error(`Failed to create directory ${dirPath}`); - } - } - - // Fetch the file - if (debug) console.log(`Fetching ${url}`); - const response = await fetch(url); - - if (!response.ok) { - throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`); - } - - const buffer = await response.arrayBuffer(); - - if (!buffer || buffer.byteLength === 0) { - throw new Error(`Empty response from ${url}`); - } - - if (debug) console.log(`Downloaded ${url}, size: ${buffer.byteLength} bytes`); - - // Write the file - try { - global.Module.FS.writeFile(targetPath, new Uint8Array(buffer)); - - // Verify the file was written - try { - const stat = global.Module.FS.stat(targetPath); - if (debug) console.log(`File written to ${targetPath}, size: ${stat.size} bytes`); - } catch (statErr) { - throw new Error(`Failed to verify file was written: ${statErr.message}`); - } - - // Return both the success status and the actual path used - return { - success: true, - path: targetPath - }; - } catch (writeErr) { - console.error(`Error writing file to ${targetPath}:`, writeErr); - throw writeErr; - } - } catch (error) { - console.error(`Error loading ${url}:`, error); - return false; - } - }, - - /** - * Check if a file exists in the WASM filesystem + * Check if a file exists in the filesystem * @param {string} path - Path to check - * @returns {boolean} - True if file exists, false otherwise + * @returns {boolean} - Whether the file exists */ fileExists: function(path) { try { - global.Module.FS.stat(path); + global.Module.FS.lookupPath(path); return true; } catch (e) { return false; @@ -251,121 +104,120 @@ }, /** - * Check if path exists and is a directory - * @param {string} path - Path to check - * @returns {boolean} - True if path exists and is a directory, false otherwise + * Get a valid asset path for a given module type and filename + * @param {string} moduleType - Type of module (vad, tts, kws, asr) + * @param {string} filename - Name of the file to look for + * @returns {string} - The first valid path where the asset exists */ - isDirectory: function(path) { - try { - const stat = global.Module.FS.stat(path); - return stat.isDir; - } catch (e) { - return false; + getAssetPath: function(moduleType, filename) { + // Check in the preloaded assets directory structure + const paths = [ + `/assets/${moduleType}/${filename}`, + `/assets/${moduleType}/models/${filename}`, + `/preloaded/${moduleType}/${filename}` + ]; + + // Return first path that exists + for (const path of paths) { + if (this.fileExists(path)) { + return path; + } } + + // Default fallback + return `/assets/${moduleType}/${filename}`; }, /** - * Remove a file or directory from the WASM filesystem - * @param {string} path - Path to remove - * @param {boolean} debug - Whether to output debug logs - * @returns {boolean} - True if successful, false otherwise + * List files in a directory in the filesystem + * @param {string} dirPath - Directory path + * @returns {Array} List of files */ - removePath: function(path, debug = false) { + listFiles: function(dirPath) { try { - if (!this.fileExists(path)) { - if (debug) console.log(`Path ${path} doesn't exist, nothing to remove`); - return true; - } - - if (this.isDirectory(path)) { - if (debug) console.log(`Removing directory ${path}`); - global.Module.FS.rmdir(path); - } else { - if (debug) console.log(`Removing file ${path}`); - global.Module.FS.unlink(path); - } - - return true; - } catch (error) { - console.error(`Error removing path ${path}:`, error); - return false; + if (!global.Module || !global.Module.FS) return []; + return global.Module.FS.readdir(dirPath).filter( + name => name !== '.' && name !== '..' + ); + } catch (e) { + console.warn(`Error listing files in ${dirPath}: ${e.message}`); + return []; } }, - // Backward compatibility aliases - DIRECTLY COPY FUNCTIONALITY to avoid any reference issues - ensureDirectory: function(dirPath) { - console.log(`Using legacy ensureDirectory on path: ${dirPath}`); + /** + * Safely load a file with error handling and fallback options + * @param {string} path - Path to load + * @param {string} moduleType - Type of module (vad, tts, kws, asr) for alternative paths + * @param {object} options - Options for loading + * @param {boolean} [options.tryAlternativePaths=true] - Whether to try alternative paths if first load fails + * @param {any} [options.defaultValue=null] - Default value to return if loading fails + * @returns {object} - Result object with success flag, data, and error message + */ + safeLoadFile: function(path, moduleType, options = {}) { + const { tryAlternativePaths = true, defaultValue = null } = options; + let result = { + success: false, + data: defaultValue, + error: null + }; try { - // Skip empty paths - if (!dirPath || dirPath === '') { - console.log("Empty directory path, skipping"); - return true; + // First try loading from the original path + if (this.fileExists(path)) { + const data = global.Module.FS.readFile(path); + result.success = true; + result.data = data; + console.log(`Successfully loaded file from: ${path}`); + return result; } - console.log(`Creating directory: ${dirPath}`); - - // First check if the path exists - try { - const info = global.Module.FS.analyzePath(dirPath); - if (info.exists) { - const stat = global.Module.FS.stat(dirPath); - if (stat.isDir) { - console.log(`Directory ${dirPath} already exists`); - return true; - } else { - // It exists as a file, remove it first - console.log(`Path ${dirPath} exists as a file, removing it`); - global.Module.FS.unlink(dirPath); - // Then create as directory - global.Module.FS.mkdir(dirPath, 0o777); - console.log(`Successfully created directory at ${dirPath}`); - return true; - } - } else { - // Path doesn't exist, create it - global.Module.FS.mkdir(dirPath, 0o777); - console.log(`Created new directory at ${dirPath}`); - return true; - } - } catch (e) { - if (e.errno === 44 || e.errno === 2) { // ENOENT - path doesn't exist - // Create the directory - global.Module.FS.mkdir(dirPath, 0o777); - console.log(`Created directory at ${dirPath}`); - return true; - } else if (e.errno === 17) { // EEXIST - already exists - console.log(`Directory ${dirPath} already exists`); - return true; - } else { - console.error(`Error creating directory ${dirPath}:`, e); - throw e; + // If the file doesn't exist at the original path and we should try alternatives + if (tryAlternativePaths && moduleType) { + // Extract filename from path + const filename = path.split('/').pop(); + const alternativePath = this.getAssetPath(moduleType, filename); + + if (this.fileExists(alternativePath) && alternativePath !== path) { + const data = global.Module.FS.readFile(alternativePath); + result.success = true; + result.data = data; + console.log(`Loaded file from alternative path: ${alternativePath}`); + return result; } } + + // If we get here, we couldn't find the file anywhere + result.error = `File not found at path: ${path} or any alternative locations`; + console.warn(result.error); + return result; } catch (error) { - console.error(`Failed to create directory ${dirPath}:`, error); - return false; + result.error = `Error loading file: ${error.message || error}`; + console.error(result.error); + return result; } }, - loadFile: async function(url, localPath) { - console.log(`DEBUG: DIRECT loadFile called with url: ${url}, localPath: ${localPath}`); - + /** + * Safely load a file from a URL into the WASM file system + * @param {string} url - URL to fetch the file from + * @param {string} localPath - Path where to save the file in WASM filesystem + * @param {boolean} debug - Whether to output debug logs + * @returns {Promise} - Info about the loaded file + */ + loadFile: async function(url, localPath, debug = false) { try { - console.log(`Loading file from ${url} to ${localPath}`); + if (debug) console.log(`Loading file from ${url} to ${localPath}`); - // Get the directory + // Create parent directory if needed const lastSlash = localPath.lastIndexOf('/'); if (lastSlash > 0) { const dirPath = localPath.substring(0, lastSlash); - console.log(`Ensuring directory exists: ${dirPath}`); - - // Use the ensureDirectory function directly to avoid any reference issues this.ensureDirectory(dirPath); } // Fetch the file - console.log(`Fetching ${url}`); + if (debug) console.log(`Fetching ${url}`); const response = await fetch(url); if (!response.ok) { @@ -378,101 +230,53 @@ throw new Error(`Empty response from ${url}`); } - console.log(`Downloaded ${url}, size: ${buffer.byteLength} bytes`); + if (debug) console.log(`Downloaded ${url}, size: ${buffer.byteLength} bytes`); // Write the file - try { - global.Module.FS.writeFile(localPath, new Uint8Array(buffer)); - - // Verify the file was written - try { - const stat = global.Module.FS.stat(localPath); - console.log(`File written to ${localPath}, size: ${stat.size} bytes`); - } catch (statErr) { - throw new Error(`Failed to verify file was written: ${statErr.message}`); - } - - return true; - } catch (writeErr) { - console.error(`Error writing file to ${localPath}:`, writeErr); - throw writeErr; - } + global.Module.FS.writeFile(localPath, new Uint8Array(buffer)); + + return { + success: true, + path: localPath + }; } catch (error) { console.error(`Error loading ${url}:`, error); - return false; + return { + success: false, + error: error.message + }; } }, /** - * Create a model directory with a guaranteed unique path to avoid conflicts - * Will create a completely new unique directory for models - * - * @param {string} baseName - Base name for the model directory - * @param {boolean} debug - Whether to enable debug logging - * @returns {Promise} - Object with success status and path information + * Create directory and parents if needed + * @param {string} dirPath - Directory path */ - createModelDirectory: async function(baseName, debug = false) { + ensureDirectory: function(dirPath) { + if (!dirPath) return; + + // Skip if it's the root directory + if (dirPath === '/') return; + try { - if (!baseName || typeof baseName !== 'string') { - baseName = 'model-dir'; - } - - // Generate a unique path with timestamp and random ID - const timestamp = Date.now(); - const randomId = Math.floor(Math.random() * 1000000); - const uniqueDirName = `${baseName}_${timestamp}_${randomId}`; - - if (debug) console.log(`Creating unique model directory: ${uniqueDirName}`); - - try { - // Create the directory - global.Module.FS.mkdir(uniqueDirName, 0o777); - - if (debug) console.log(`Successfully created unique model directory: ${uniqueDirName}`); - - return { - success: true, - baseName: baseName, - uniquePath: uniqueDirName, - timestamp: timestamp, - randomId: randomId - }; - } catch (error) { - console.error(`Failed to create unique model directory: ${uniqueDirName}`, error); - - // Try a different random ID - const newRandomId = Math.floor(Math.random() * 1000000); - const backupDirName = `backup_${baseName}_${timestamp}_${newRandomId}`; - - if (debug) console.log(`Trying backup directory name: ${backupDirName}`); + // Check if directory exists + const stat = global.Module.FS.stat(dirPath); + if (stat.isDir) return; // Already exists + throw new Error(`Path exists but is not a directory: ${dirPath}`); + } catch (error) { + // If error is that the path doesn't exist, create it + if (error.errno === 44 || error.errno === 2 || error.message.includes('No such file or directory')) { + // Ensure parent directory exists first + const parentDir = dirPath.substring(0, dirPath.lastIndexOf('/')); + if (parentDir) this.ensureDirectory(parentDir); - try { - global.Module.FS.mkdir(backupDirName, 0o777); - - if (debug) console.log(`Successfully created backup model directory: ${backupDirName}`); - - return { - success: true, - baseName: baseName, - uniquePath: backupDirName, - timestamp: timestamp, - randomId: newRandomId, - isBackup: true - }; - } catch (backupError) { - console.error(`Failed to create backup model directory: ${backupDirName}`, backupError); - return { - success: false, - error: backupError - }; - } + // Create this directory + global.Module.FS.mkdir(dirPath); + return; } - } catch (error) { - console.error(`Error in createModelDirectory:`, error); - return { - success: false, - error: error - }; + + // For other errors, rethrow + throw error; } }, @@ -487,29 +291,8 @@ if (debug) console.log(`Extracting zip to ${targetPath}`); try { - // Force clean the target path if it exists as a file - try { - const stat = global.Module.FS.stat(targetPath); - const isFile = (stat.mode & 61440) === 32768; - if (isFile) { - if (debug) console.log(`Target path ${targetPath} exists as a FILE - removing it`); - global.Module.FS.unlink(targetPath); - } - } catch (err) { - // Path doesn't exist, which is fine - } - // Make sure the base directory exists - try { - this.mkdirp(targetPath); - if (debug) console.log(`Created base directory ${targetPath}`); - } catch (dirErr) { - console.error(`Failed to create base directory ${targetPath}: ${dirErr.message}`); - return { - success: false, - error: `Failed to create target directory: ${dirErr.message}` - }; - } + this.ensureDirectory(targetPath); // Load JSZip from CDN if needed if (typeof JSZip === 'undefined') { @@ -532,33 +315,19 @@ const extractedFiles = []; // First, create all directories - const directories = new Set(); for (const path in zip.files) { const file = zip.files[path]; if (file.dir) { - // Add directory path - directories.add(`${targetPath}/${path}`); + this.ensureDirectory(`${targetPath}/${path}`); } else { - // Add parent directory path for files const dirPath = path.substring(0, path.lastIndexOf('/')); if (dirPath) { - directories.add(`${targetPath}/${dirPath}`); + this.ensureDirectory(`${targetPath}/${dirPath}`); } } } - // Create directories in sorted order to ensure parents are created first - if (debug) console.log(`Creating ${directories.size} directories`); - const sortedDirs = [...directories].sort((a, b) => a.split('/').length - b.split('/').length); - for (const dir of sortedDirs) { - try { - this.mkdirp(dir); - } catch (e) { - console.warn(`Error creating directory ${dir}: ${e.message}`); - } - } - // Now extract all files for (const path in zip.files) { const file = zip.files[path]; @@ -570,12 +339,8 @@ // Extract and write the file const content = await file.async('arraybuffer'); - FS.writeFile(fullPath, new Uint8Array(content)); + global.Module.FS.writeFile(fullPath, new Uint8Array(content)); extractedFiles.push(fullPath); - - if (debug && extractedFiles.length % 50 === 0) { - console.log(`Extracted ${extractedFiles.length} files so far...`); - } } catch (fileErr) { console.error(`Error extracting file ${path}: ${fileErr.message}`); } @@ -590,373 +355,84 @@ }, /** - * Create directory and parents if needed - * @param {string} dirPath - Directory path + * Debug the filesystem + * @param {string} [path="/"] - Path to list */ - mkdirp: function(dirPath) { - if (!dirPath || dirPath === '/') return; - - const parts = dirPath.split('/').filter(p => p); - let current = ''; - - for (const part of parts) { - current += '/' + part; - try { - const stat = global.Module.FS.stat(current); - // Only continue if it's a directory - if ((stat.mode & 61440) !== 16384) { // Not a directory (S_IFDIR = 16384) - console.error(`Path ${current} exists but is not a directory`); - - // Try to delete it if it's a file - if ((stat.mode & 61440) === 32768) { // Is a file (S_IFREG = 32768) - console.log(`Removing file at ${current} to create directory`); - global.Module.FS.unlink(current); - global.Module.FS.mkdir(current); - } else { - throw new Error(`Path exists but is not a directory: ${current}`); - } - } - } catch (e) { - // ENOENT error means directory doesn't exist, so create it - if (e.errno === 44 || e.errno === 2 || e.message.includes('No such file or directory')) { - try { - global.Module.FS.mkdir(current); - } catch (mkdirErr) { - console.error(`Failed to create directory ${current}:`, mkdirErr); - throw mkdirErr; - } - } else { - console.error(`Error processing path ${current}:`, e); - throw e; // Rethrow other errors - } - } - } - - // Verify the directory was created + debugFilesystem: function(path = "/") { try { - const stat = global.Module.FS.stat(dirPath); - if ((stat.mode & 61440) !== 16384) { // Not a directory - throw new Error(`Path ${dirPath} was created but is not a directory`); + console.log(`--- Filesystem contents of ${path} ---`); + if (!global.Module || !global.Module.FS) { + console.log("Module.FS not available"); + return; } - } catch (verifyErr) { - console.error(`Failed to verify directory ${dirPath}:`, verifyErr); - throw verifyErr; - } - }, - - /** - * Prepare a model directory and load files - * @param {Array} files - List of files to prepare - * @param {string} baseDir - Base directory for the model - * @param {boolean} debug - Enable debug logging - * @returns {Promise} - Result of the preparation - */ - prepareModelDirectory: async function(files, baseDir = 'models', debug = false) { - if (debug) console.log(`Preparing model directory with base: ${baseDir}`); - - try { - // Create a unique directory name with random suffix to avoid conflicts - const uniqueSuffix = Math.random().toString(36).substring(2, 10); - const uniqueDir = `${baseDir}-${uniqueSuffix}`; - - if (debug) console.log(`Creating model directory: ${uniqueDir}`); - - // Force clean any problematic paths before creating new directories - this.forceCleanPaths(baseDir, uniqueDir, debug); - // Track results for each file - const fileResults = []; + const entries = this.listFiles(path); + console.log(entries); - // Process each file in the file list - const archiveFiles = files.filter(f => f.isZip); - const regularFiles = files.filter(f => !f.isZip); - - // First process all regular files to ensure the model directory is created - for (const file of regularFiles) { - try { - if (file.content) { - // Write string content directly to file - const filename = this.joinPaths(uniqueDir, file.filename); - const directoryPath = filename.substring(0, filename.lastIndexOf('/')); - - if (debug) console.log(`Writing content to ${filename}`); - - // Ensure the directory exists - this.mkdirp(directoryPath); - - // Write the file - FS.writeFile(filename, file.content); - - fileResults.push({ - success: true, - path: filename, - original: file - }); - } else if (file.url) { - // Load file from URL - if (debug) console.log(`Fetching file from ${file.url}`); - const response = await fetch(file.url); - - if (!response.ok) { - console.error(`Failed to fetch ${file.url}: ${response.status} ${response.statusText}`); - fileResults.push({ - success: false, - error: `HTTP error: ${response.status}`, - original: file - }); - continue; - } - - // Write the downloaded file - const filename = this.joinPaths(uniqueDir, file.filename); - const directoryPath = filename.substring(0, filename.lastIndexOf('/')); - - if (debug) console.log(`Writing downloaded file to ${filename}`); - - // Ensure the directory exists - this.mkdirp(directoryPath); - - // Get binary data and write to file - const arrayBuffer = await response.arrayBuffer(); - FS.writeFile(filename, new Uint8Array(arrayBuffer), { encoding: 'binary' }); - - fileResults.push({ - success: true, - path: filename, - original: file - }); - } else { - console.error('Invalid file specification: no content or URL'); - fileResults.push({ - success: false, - error: 'Invalid file specification', - original: file - }); - } - } catch (error) { - console.error(`Error processing file: ${error.message}`); - fileResults.push({ - success: false, - error: error.message, - original: file - }); - } - } - - // Now process archives with the correct model directory path - for (const file of archiveFiles) { - try { - if (debug) console.log(`Fetching archive from ${file.url}`); - const response = await fetch(file.url); - - if (!response.ok) { - console.error(`Failed to fetch ${file.url}: ${response.status} ${response.statusText}`); - fileResults.push({ - success: false, - error: `HTTP error: ${response.status}`, - original: file - }); - continue; - } - - if (debug) console.log(`Processing archive ${file.url}`); - const zipData = await response.arrayBuffer(); - - // Set the extract path to the created model directory if not specified - const extractPath = file.extractToPath || uniqueDir; - - // Clean existing files if requested - if (file.cleanBeforeExtract) { - if (debug) console.log(`Cleaning before extraction at ${extractPath}`); - try { - // Create the directory if it doesn't exist - this.mkdirp(extractPath); - - // Remove any existing espeak-ng-data directory - const espeakDir = `${extractPath}/espeak-ng-data`; - try { - FS.stat(espeakDir); - if (debug) console.log(`Removing existing directory: ${espeakDir}`); - this.removePath(espeakDir); - } catch (e) { - // Directory doesn't exist, which is fine - } - } catch (cleanErr) { - console.warn(`Could not clean extraction path: ${cleanErr.message}`); - } - } - - const extractResult = await this.extractZip(zipData, extractPath, debug); - - if (extractResult.success) { - fileResults.push({ - success: true, - path: extractPath, - original: file, - extractedFiles: extractResult.files - }); - } else { - fileResults.push({ - success: false, - error: extractResult.error, - original: file - }); - } - } catch (error) { - console.error(`Error processing archive file: ${error.message}`); - fileResults.push({ - success: false, - error: error.message, - original: file - }); + // Show preloaded asset directories + Object.values(SherpaOnnx.Config.assetPaths).forEach(assetPath => { + if (this.fileExists(assetPath)) { + console.log(`--- ${assetPath} contents ---`); + console.log(this.listFiles(assetPath)); } - } - - // Check if any files failed to load - const success = fileResults.some(result => result.success); - - if (debug) { - console.log(`Model preparation ${success ? 'successful' : 'partially successful'}`); - console.log(`Loaded ${fileResults.filter(r => r.success).length} of ${fileResults.length} files`); - } - - return { - modelDir: uniqueDir, - success, - files: fileResults - }; - } catch (error) { - console.error(`Error in prepareModelDirectory:`, error); - return { - success: false, - error: error.message - }; - } - }, - - /** - * Join path segments properly - * @param {...string} paths - Path segments to join - * @returns {string} - Joined path - */ - joinPaths: function(...paths) { - return paths.join('/').replace(/\/+/g, '/'); - }, - - /** - * Ensure a directory exists, creating it if necessary - * @param {string} dirPath - Directory path to ensure - */ - ensureDirectory: function(dirPath) { - if (!dirPath) return; - - // Skip if it's the root directory - if (dirPath === '/') return; - - try { - // Check if directory exists - const stat = FS.stat(dirPath); - if (stat.isDir) return; // Already exists - using isDir property, not a function - throw new Error(`Path exists but is not a directory: ${dirPath}`); - } catch (error) { - // If error is that the path doesn't exist, create it - if (error.errno === 44 || error.message.includes('No such file or directory')) { - // Ensure parent directory exists first - const parentDir = dirPath.split('/').slice(0, -1).join('/'); - if (parentDir) this.ensureDirectory(parentDir); - - // Create this directory - FS.mkdir(dirPath); - return; - } - - // For other errors, rethrow - throw error; + }); + } catch (err) { + console.error("Error debugging filesystem:", err); } + } + }; + + // Resource tracking for cleanup + SherpaOnnx.Resources = { + // List of active resources by type + active: { + asr: [], + vad: [], + tts: [], + kws: [], + speakers: [], + enhancement: [] }, /** - * Debug the filesystem by listing key directories - * @param {boolean} debug - Enable debug output + * Track a resource for later cleanup + * @param {string} type - Resource type + * @param {Object} resource - Resource to track + * @returns {Object} The resource (for chaining) */ - debugFilesystem: function(debug = true) { - if (!debug) return; - - try { - console.log("--- FILESYSTEM DEBUG ---"); - - // List root directory - const rootEntries = global.Module.FS.readdir('/'); - console.log("Root directory contents:", rootEntries); - - // Check relevant TTS model directories - for (const entry of rootEntries) { - // Only check TTS-related directories - if (entry === 'tts-models' || entry.startsWith('tts-models-')) { - try { - const stat = global.Module.FS.stat('/' + entry); - const isDir = (stat.mode & 61440) === 16384; // S_IFDIR - - if (isDir) { - const subEntries = global.Module.FS.readdir('/' + entry); - console.log(`Contents of /${entry}:`, subEntries); - } else { - console.log(`/${entry}: Not a directory`); - } - } catch (err) { - console.log(`Error checking /${entry}:`, err.message); - } - } - } - - console.log("--- END FILESYSTEM DEBUG ---"); - } catch (err) { - console.log("Filesystem debug error:", err.message); + track: function(type, resource) { + if (this.active[type]) { + this.active[type].push(resource); } + return resource; }, /** - * Clean up paths that may cause conflicts - * @param {string} modelDir - The model directory - * @param {string} uniqueDir - The unique model directory - * @param {boolean} debug - Enable debug output + * Clean up resources of a specific type + * @param {string} [type] - Resource type (if omitted, clean all types) */ - forceCleanPaths: function(modelDir, uniqueDir, debug = false) { - try { - if (debug) console.log(`Cleaning paths: ${modelDir}, ${uniqueDir}`); - - // Clean up common problematic paths - const pathsToClean = [ - '/espeak-ng-data', - `/${modelDir}`, - `/${uniqueDir}` - ]; - - for (const path of pathsToClean) { - try { - // Check if path exists - const stat = global.Module.FS.stat(path); - const type = stat.mode & 61440; - const isFile = type === 32768; - const isDir = type === 16384; - - if (isFile) { - if (debug) console.log(`Removing file at ${path}`); - global.Module.FS.unlink(path); - } else if (isDir) { - if (debug) console.log(`Removing directory at ${path}`); - this.removePath(path, debug); + cleanup: function(type) { + if (type) { + // Clean up specific type + if (this.active[type]) { + this.active[type].forEach(resource => { + if (resource && typeof resource.free === 'function') { + resource.free(); } - } catch (err) { - // Path doesn't exist, which is fine - } + }); + this.active[type] = []; } - } catch (err) { - console.error("Error cleaning paths:", err.message); + } else { + // Clean up all types + Object.keys(this.active).forEach(t => this.cleanup(t)); } } }; + // For convenience, add alias methods + SherpaOnnx.trackResource = SherpaOnnx.Resources.track.bind(SherpaOnnx.Resources); + SherpaOnnx.cleanup = SherpaOnnx.Resources.cleanup.bind(SherpaOnnx.Resources); + // Expose SherpaOnnx to the global object global.SherpaOnnx = SherpaOnnx; })(typeof window !== 'undefined' ? window : global); \ No newline at end of file diff --git a/wasm/combined/sherpa-onnx-tts.js b/wasm/combined/sherpa-onnx-tts.js index 4b22a3a104..d6f04aa606 100644 --- a/wasm/combined/sherpa-onnx-tts.js +++ b/wasm/combined/sherpa-onnx-tts.js @@ -21,159 +21,160 @@ // Define the TTS module functionality SherpaOnnx.TTS = { /** - * Load a TTS model from URLs + * Load a Text-to-Speech model * @param {Object} modelConfig - Configuration for the model * @returns {Promise} - Information about the loaded model */ loadModel: async function(modelConfig) { const debug = modelConfig.debug || false; - const modelDir = modelConfig.modelDir || 'tts-models'; + if (debug) console.log("TTS.loadModel: ModelConfig:", JSON.stringify(modelConfig)); - if (debug) console.log(`TTS.loadModel: Starting with base dir ${modelDir}`); + // Handle custom model upload case + if (modelConfig.customModel) { + if (debug) console.log("Using custom uploaded model"); + + // Validate basic requirements + if (!modelConfig.customModel.model && !modelConfig.customModel.acousticModel) { + throw new Error("Missing required model file in custom model"); + } + + if (!modelConfig.customModel.tokens) { + throw new Error("Missing required tokens.txt file in custom model"); + } + + return { + modelDir: modelConfig.customModel.dataDir || + (modelConfig.customModel.model + ? modelConfig.customModel.model.split('/').slice(0, -1).join('/') + : modelConfig.customModel.acousticModel.split('/').slice(0, -1).join('/')), + modelType: modelConfig.modelType || 'vits', + actualPaths: modelConfig.customModel, + preloaded: false, + options: modelConfig.options || {} + }; + } - try { - // Always use clean start to avoid conflicts - if (debug) console.log(`Cleaning model directory to prevent conflicts: ${modelDir}`); - SherpaOnnx.FileSystem.removePath(modelDir, debug); - - // Flag to track if we need espeak data - let needsEspeakData = false; - - // Prepare file list based on model type - const files = []; - - if (modelConfig.type === 'vits') { - // Add model file - files.push({ - url: modelConfig.model || 'assets/tts/model.onnx', - filename: 'model.onnx' - }); + // Default model directory and type handling + const modelDir = modelConfig.modelDir || 'tts-models'; + const modelType = modelConfig.modelType || 'vits'; + + // First check for preloaded assets + if (!modelConfig.forceDownload) { + const assetPath = SherpaOnnx.Config.assetPaths.tts; + if (debug) console.log(`Checking for preloaded TTS assets at ${assetPath}`); + + if (SherpaOnnx.FileSystem.fileExists(assetPath)) { + const files = SherpaOnnx.FileSystem.listFiles(assetPath); + if (debug) console.log(`Found preloaded files: ${files.join(', ')}`); - // Add tokens file - files.push({ - url: modelConfig.tokens || 'assets/tts/tokens.txt', - filename: 'tokens.txt' - }); + // Check for required model files based on type + let hasRequiredFiles = false; + const actualPaths = {}; - // Add lexicon if provided - if (modelConfig.lexicon) { - files.push({ - url: modelConfig.lexicon, - filename: 'lexicon.txt' - }); + if (modelType === 'vits') { + // VITS model requires model, lexicon, and tokens files + const modelFile = files.find(f => f.endsWith('.onnx')); + const tokensFile = files.find(f => f === 'tokens.txt'); + + // Check for espeak data directory or zip + let hasEspeakData = files.find(f => f === 'espeak-ng-data' || f === 'espeak-ng-data.zip'); + + if (modelFile && tokensFile) { + hasRequiredFiles = true; + actualPaths.model = `${assetPath}/${modelFile}`; + actualPaths.tokens = `${assetPath}/${tokensFile}`; + + // Add espeak data if found + if (hasEspeakData) { + if (hasEspeakData === 'espeak-ng-data') { + actualPaths.dataDir = `${assetPath}/espeak-ng-data`; + } else { + // Will need to extract this later + actualPaths.espeakZip = `${assetPath}/espeak-ng-data.zip`; + } + } + } } - // Flag that we need espeak-ng-data - if (debug) console.log("Will load espeak-ng-data after model directory creation"); - needsEspeakData = true; - } else if (modelConfig.type === 'matcha') { - // Add required files for matcha - files.push({ - url: modelConfig.acousticModel || 'assets/tts/acoustic_model.onnx', - filename: 'acoustic_model.onnx' - }); - - files.push({ - url: modelConfig.vocoder || 'assets/tts/vocoder.onnx', - filename: 'vocoder.onnx' - }); - - files.push({ - url: modelConfig.tokens || 'assets/tts/tokens.txt', - filename: 'tokens.txt' - }); - - if (modelConfig.lexicon) { - files.push({ - url: modelConfig.lexicon, - filename: 'lexicon.txt' - }); + if (hasRequiredFiles) { + if (debug) console.log("Using preloaded TTS model with paths:", actualPaths); + return { + modelDir: assetPath, + modelType, + actualPaths, + preloaded: true, + options: modelConfig.options || {} + }; } - } else if (modelConfig.type === 'kokoro') { - // Add required files for kokoro - files.push({ - url: modelConfig.model || 'assets/tts/kokoro/model.onnx', - filename: 'kokoro_model.onnx' - }); - - files.push({ - url: modelConfig.tokens || 'assets/tts/kokoro/tokens.txt', - filename: 'tokens.txt' - }); - if (modelConfig.voices) { - files.push({ - url: modelConfig.voices, - filename: 'voices.txt' - }); - } + if (debug) console.log("Preloaded TTS assets found but missing required files"); + } else if (debug) { + console.log(`Asset path ${assetPath} not found, will need to download models`); } - if (debug) console.log(`Prepared ${files.length} files to load for TTS model`); - - // Create unique model directory and load files - const result = await SherpaOnnx.FileSystem.prepareModelDirectory( - files, - modelDir, - debug - ); - - if (!result.success) { - console.error("Failed to load model files:", result); - throw new Error("Failed to load TTS model files"); - } + // Also check alternative locations for preloaded assets + const alternativePaths = [ + `/sherpa_assets/tts`, + `/assets/tts`, + `/preloaded/tts` + ]; - // Handle espeak-ng-data for VITS models - if (modelConfig.type === 'vits' && needsEspeakData) { - if (debug) console.log(`Loading espeak-ng-data.zip into ${result.modelDir}`); + for (const altPath of alternativePaths) { + if (altPath === assetPath) continue; // Skip if we've already checked this path + + if (debug) console.log(`Checking alternative path: ${altPath}`); - try { - // Use configurable URL if provided, otherwise use default - const espeakZipUrl = modelConfig.espeakDataZip || 'assets/tts/espeak-ng-data.zip'; - if (debug) console.log(`Fetching espeak-ng-data from ${espeakZipUrl}`); + if (SherpaOnnx.FileSystem.fileExists(altPath)) { + const files = SherpaOnnx.FileSystem.listFiles(altPath); + if (debug) console.log(`Found files at ${altPath}: ${files.join(', ')}`); - const zipResponse = await fetch(espeakZipUrl); - const zipData = await zipResponse.arrayBuffer(); + // Similar check for required files + let hasRequiredFiles = false; + const actualPaths = {}; - await SherpaOnnx.FileSystem.extractZip( - zipData, - result.modelDir, - debug - ); - } catch (zipError) { - console.error("Error processing espeak-ng-data.zip:", zipError); + if (modelType === 'vits') { + const modelFile = files.find(f => f.endsWith('.onnx')); + const tokensFile = files.find(f => f === 'tokens.txt'); + + // Check for espeak data directory or zip + let hasEspeakData = files.find(f => f === 'espeak-ng-data' || f === 'espeak-ng-data.zip'); + + if (modelFile && tokensFile) { + hasRequiredFiles = true; + actualPaths.model = `${altPath}/${modelFile}`; + actualPaths.tokens = `${altPath}/${tokensFile}`; + + // Add espeak data if found + if (hasEspeakData) { + if (hasEspeakData === 'espeak-ng-data') { + actualPaths.dataDir = `${altPath}/espeak-ng-data`; + } else { + // Will need to extract this later + actualPaths.espeakZip = `${altPath}/espeak-ng-data.zip`; + } + } + } + } + + if (hasRequiredFiles) { + if (debug) console.log(`Using alternative preloaded TTS model path: ${altPath}`); + // Update the config to use this path in the future + SherpaOnnx.Config.assetPaths.tts = altPath; + + return { + modelDir: altPath, + modelType, + actualPaths, + preloaded: true, + options: modelConfig.options || {} + }; + } } } - - // Organize files by type - const modelFiles = {}; - const successFiles = result.files.filter(f => f.success); - - if (debug) console.log(`Successfully loaded ${successFiles.length} of ${result.files.length} files`); - - // Map files to their proper keys - successFiles.forEach(file => { - const filename = file.original.filename; - - if (filename === 'model.onnx') modelFiles.model = file.path; - else if (filename === 'acoustic_model.onnx') modelFiles.acousticModel = file.path; - else if (filename === 'vocoder.onnx') modelFiles.vocoder = file.path; - else if (filename === 'tokens.txt') modelFiles.tokens = file.path; - else if (filename === 'lexicon.txt') modelFiles.lexicon = file.path; - else if (filename === 'voices.txt') modelFiles.voices = file.path; - else if (filename === 'kokoro_model.onnx') modelFiles.kokoroModel = file.path; - }); - - // Return the model information - return { - modelDir: result.modelDir, - type: modelConfig.type, - files: modelFiles - }; - } catch(e) { - console.error(`TTS.loadModel: Error loading model:`, e); - throw e; } + + // If we reached here, we couldn't find preloaded assets + throw new Error("No preloaded TTS model found and dynamic loading is not implemented"); }, /** @@ -189,35 +190,120 @@ console.log("Creating TTS engine with loaded model:", loadedModel); } + // Always use a single consistent property name for model type + const modelType = loadedModel.modelType || 'vits'; + + if (debug) { + console.log(`Using model type: ${modelType}`); + } + + // Merge options from loadedModel with function options, prioritizing function options + const mergedOptions = { + ...loadedModel.options, + ...options + }; + + if (debug) { + console.log("Using merged options:", mergedOptions); + } + let config = null; - if (loadedModel.type === 'vits') { - if (!loadedModel.files || !loadedModel.files.model || !loadedModel.files.tokens) { + if (modelType === 'vits') { + // For preloaded assets, we use actualPaths + const paths = loadedModel.actualPaths || loadedModel.files || {}; + + if (debug) { + console.log("Using model paths:", paths); + } + + if (!paths.model || !paths.tokens) { throw new Error("Missing required files for VITS model configuration"); } const offlineTtsVitsModelConfig = { - model: loadedModel.files.model, - lexicon: loadedModel.files.lexicon || '', - tokens: loadedModel.files.tokens, - dataDir: `${loadedModel.modelDir}/espeak-ng-data`, // Path to espeak-ng-data in model directory - dictDir: '', - noiseScale: options.noiseScale || 0.667, - noiseScaleW: options.noiseScaleW || 0.8, - lengthScale: options.lengthScale || 1.0, + model: paths.model, + lexicon: paths.lexicon || '', + tokens: paths.tokens, + dataDir: paths.dataDir || `${loadedModel.modelDir}/espeak-ng-data`, // Path to espeak-ng-data in model directory + dictDir: paths.dictDir || '', + noiseScale: mergedOptions.noiseScale || 0.667, + noiseScaleW: mergedOptions.noiseScaleW || 0.8, + lengthScale: mergedOptions.lengthScale || 1.0, }; + if (debug) { + console.log("VITS model config:", offlineTtsVitsModelConfig); + } + const offlineTtsMatchaModelConfig = { - acousticModel: '', - vocoder: '', + acousticModel: paths.acousticModel || '', + vocoder: paths.vocoder || '', + lexicon: paths.lexicon || '', + tokens: paths.tokens || '', + dataDir: paths.dataDir || '', + dictDir: paths.dictDir || '', + noiseScale: mergedOptions.noiseScale || 0.667, + lengthScale: mergedOptions.lengthScale || 1.0, + }; + + const offlineTtsKokoroModelConfig = { + model: paths.model || '', + voices: paths.voices || '', + tokens: paths.tokens || '', + dataDir: paths.dataDir || '', + lengthScale: mergedOptions.lengthScale || 1.0, + dictDir: paths.dictDir || '', + lexicon: paths.lexicon || '', + }; + + // Use the correct field names expected by the C API + const offlineTtsModelConfig = { + offlineTtsVitsModelConfig: offlineTtsVitsModelConfig, + offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig, + offlineTtsKokoroModelConfig: offlineTtsKokoroModelConfig, + numThreads: mergedOptions.numThreads || 1, + debug: debug ? 1 : 0, + provider: mergedOptions.provider || 'cpu', + }; + + config = { + offlineTtsModelConfig: offlineTtsModelConfig, + ruleFsts: mergedOptions.ruleFsts || '', + ruleFars: mergedOptions.ruleFars || '', + maxNumSentences: mergedOptions.maxNumSentences || 1, + silenceScale: mergedOptions.silenceScale || 1.0 + }; + } else if (modelType === 'matcha') { + // Similar configuration for matcha... + const paths = loadedModel.actualPaths || loadedModel.files || {}; + + if (!paths.acousticModel || !paths.vocoder || !paths.tokens) { + throw new Error("Missing required files for Matcha model configuration"); + } + + const offlineTtsVitsModelConfig = { + model: '', lexicon: '', tokens: '', dataDir: '', dictDir: '', noiseScale: 0.667, + noiseScaleW: 0.8, lengthScale: 1.0, }; + const offlineTtsMatchaModelConfig = { + acousticModel: paths.acousticModel, + vocoder: paths.vocoder, + lexicon: paths.lexicon || '', + tokens: paths.tokens, + dataDir: paths.dataDir || '', + dictDir: paths.dictDir || '', + noiseScale: mergedOptions.noiseScale || 0.667, + lengthScale: mergedOptions.lengthScale || 1.0, + }; + const offlineTtsKokoroModelConfig = { model: '', voices: '', @@ -232,26 +318,76 @@ offlineTtsVitsModelConfig: offlineTtsVitsModelConfig, offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig, offlineTtsKokoroModelConfig: offlineTtsKokoroModelConfig, - numThreads: options.numThreads || 1, + numThreads: mergedOptions.numThreads || 1, debug: debug ? 1 : 0, - provider: 'cpu', + provider: mergedOptions.provider || 'cpu', }; config = { offlineTtsModelConfig: offlineTtsModelConfig, - ruleFsts: '', - ruleFars: '', - maxNumSentences: 1, - silenceScale: options.silenceScale || 1.0 + ruleFsts: mergedOptions.ruleFsts || '', + ruleFars: mergedOptions.ruleFars || '', + maxNumSentences: mergedOptions.maxNumSentences || 1, + silenceScale: mergedOptions.silenceScale || 1.0 }; - } else if (loadedModel.type === 'matcha') { - // Similar configuration for matcha... - // (Omitted for brevity) - } else if (loadedModel.type === 'kokoro') { + } else if (modelType === 'kokoro') { // Similar configuration for kokoro... - // (Omitted for brevity) + const paths = loadedModel.actualPaths || loadedModel.files || {}; + + if (!paths.model || !paths.voices || !paths.tokens) { + throw new Error("Missing required files for Kokoro model configuration"); + } + + const offlineTtsVitsModelConfig = { + model: '', + lexicon: '', + tokens: '', + dataDir: '', + dictDir: '', + noiseScale: 0.667, + noiseScaleW: 0.8, + lengthScale: 1.0, + }; + + const offlineTtsMatchaModelConfig = { + acousticModel: '', + vocoder: '', + lexicon: '', + tokens: '', + dataDir: '', + dictDir: '', + noiseScale: 0.667, + lengthScale: 1.0, + }; + + const offlineTtsKokoroModelConfig = { + model: paths.model, + voices: paths.voices, + tokens: paths.tokens, + dataDir: paths.dataDir || '', + lengthScale: mergedOptions.lengthScale || 1.0, + dictDir: paths.dictDir || '', + lexicon: paths.lexicon || '', + }; + + const offlineTtsModelConfig = { + offlineTtsVitsModelConfig: offlineTtsVitsModelConfig, + offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig, + offlineTtsKokoroModelConfig: offlineTtsKokoroModelConfig, + numThreads: mergedOptions.numThreads || 1, + debug: debug ? 1 : 0, + provider: mergedOptions.provider || 'cpu', + }; + + config = { + offlineTtsModelConfig: offlineTtsModelConfig, + ruleFsts: mergedOptions.ruleFsts || '', + ruleFars: mergedOptions.ruleFars || '', + maxNumSentences: mergedOptions.maxNumSentences || 1, + silenceScale: mergedOptions.silenceScale || 1.0 + }; } else { - throw new Error(`Unsupported TTS model type: ${loadedModel.type}`); + throw new Error(`Unsupported TTS model type: ${modelType}`); } if (debug) { @@ -520,52 +656,51 @@ * @returns {Object} - Configuration with pointers */ function initSherpaOnnxOfflineTtsModelConfig(config, Module) { - if (!('offlineTtsVitsModelConfig' in config)) { - config.offlineTtsVitsModelConfig = { - model: './model.onnx', - lexicon: '', - tokens: './tokens.txt', - dataDir: './espeak-ng-data', // Use relative path in the model directory - dictDir: '', - noiseScale: 0.667, - noiseScaleW: 0.8, - lengthScale: 1.0, - }; + if (Module.debug) { + console.log("Initializing offline TTS model config:", JSON.stringify(config)); } + + // Get configurations, supporting both old and new formats + const vitsConfig = config.vits || config.offlineTtsVitsModelConfig || { + model: './model.onnx', + lexicon: '', + tokens: './tokens.txt', + dataDir: './espeak-ng-data', // Use relative path in the model directory + dictDir: '', + noiseScale: 0.667, + noiseScaleW: 0.8, + lengthScale: 1.0, + }; - if (!('offlineTtsMatchaModelConfig' in config)) { - config.offlineTtsMatchaModelConfig = { - acousticModel: '', - vocoder: '', - lexicon: '', - tokens: '', - dataDir: '', - dictDir: '', - noiseScale: 0.667, - lengthScale: 1.0, - }; - } + const matchaConfig = config.matcha || config.offlineTtsMatchaModelConfig || { + acousticModel: '', + vocoder: '', + lexicon: '', + tokens: '', + dataDir: '', + dictDir: '', + noiseScale: 0.667, + lengthScale: 1.0, + }; - if (!('offlineTtsKokoroModelConfig' in config)) { - config.offlineTtsKokoroModelConfig = { - model: '', - voices: '', - tokens: '', - lengthScale: 1.0, - dataDir: '', - dictDir: '', - lexicon: '', - }; - } + const kokoroConfig = config.kokoro || config.offlineTtsKokoroModelConfig || { + model: '', + voices: '', + tokens: '', + dataDir: '', + lengthScale: 1.0, + dictDir: '', + lexicon: '', + }; const vitsModelConfig = initSherpaOnnxOfflineTtsVitsModelConfig( - config.offlineTtsVitsModelConfig, Module); + vitsConfig, Module); const matchaModelConfig = initSherpaOnnxOfflineTtsMatchaModelConfig( - config.offlineTtsMatchaModelConfig, Module); + matchaConfig, Module); const kokoroModelConfig = initSherpaOnnxOfflineTtsKokoroModelConfig( - config.offlineTtsKokoroModelConfig, Module); + kokoroConfig, Module); const len = vitsModelConfig.len + matchaModelConfig.len + kokoroModelConfig.len + 3 * 4; @@ -607,14 +742,64 @@ * @returns {Object} - Configuration with pointers */ function initSherpaOnnxOfflineTtsConfig(config, Module) { - const modelConfig = + // Log for debugging + if (Module.debug) { + console.log("Initializing TTS config:", JSON.stringify(config)); + } + + // Make sure we have an offlineTtsModelConfig + if (!config.offlineTtsModelConfig) { + if (Module.debug) { + console.log("No offlineTtsModelConfig found, creating default"); + } + + // Use provided defaults or create new ones + config.offlineTtsModelConfig = { + offlineTtsVitsModelConfig: { + model: './model.onnx', + lexicon: '', + tokens: './tokens.txt', + dataDir: './espeak-ng-data', + dictDir: '', + noiseScale: 0.667, + noiseScaleW: 0.8, + lengthScale: 1.0, + }, + offlineTtsMatchaModelConfig: { + acousticModel: '', + vocoder: '', + lexicon: '', + tokens: '', + dataDir: '', + dictDir: '', + noiseScale: 0.667, + lengthScale: 1.0, + }, + offlineTtsKokoroModelConfig: { + model: '', + voices: '', + tokens: '', + dataDir: '', + lengthScale: 1.0, + dictDir: '', + lexicon: '', + }, + numThreads: 1, + debug: Module.debug ? 1 : 0, + provider: 'cpu', + }; + } + + // Initialize model config + const initializedModelConfig = initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig, Module); - const len = modelConfig.len + 4 * 4; + + const len = initializedModelConfig.len + 4 * 4; const ptr = Module._malloc(len); let offset = 0; - Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset); - offset += modelConfig.len; + Module._CopyHeap(initializedModelConfig.ptr, initializedModelConfig.len, ptr + offset); + offset += initializedModelConfig.len; const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1; const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1; @@ -636,7 +821,7 @@ offset += 4; return { - buffer: buffer, ptr: ptr, len: len, config: modelConfig, + buffer: buffer, ptr: ptr, len: len, config: initializedModelConfig, }; } @@ -644,35 +829,74 @@ * OfflineTts class for text-to-speech synthesis */ global.OfflineTts = global.OfflineTts || function(configObj, Module) { - if (Module.debug) { - console.log("Creating OfflineTts with config:", JSON.stringify(configObj)); + if (!Module) { + throw new Error("WASM Module is required for OfflineTts"); } - const config = initSherpaOnnxOfflineTtsConfig(configObj, Module); + this.Module = Module; + this.handle = null; + this.sampleRate = 0; + this.numSpeakers = 0; + this.generatedAudios = []; // Track generated audios for cleanup - if (Module.debug) { - try { - Module._MyPrintTTS(config.ptr); - } catch (e) { - console.warn("Failed to print TTS config:", e); - } - } + const debug = Module.debug || (configObj && configObj.debug); - const handle = Module._SherpaOnnxCreateOfflineTts(config.ptr); + if (debug) { + console.log("Creating OfflineTts with config:", JSON.stringify(configObj)); + } - if (!handle) { - const error = new Error("Failed to create TTS engine - null handle returned"); + try { + // Initialize the TTS configuration + const config = initSherpaOnnxOfflineTtsConfig(configObj, Module); + + if (debug) { + try { + Module._MyPrintTTS(config.ptr); + } catch (e) { + console.warn("Failed to print TTS config:", e); + } + } + + // Create the TTS engine + const handle = Module._SherpaOnnxCreateOfflineTts(config.ptr); + + if (!handle || handle === 0) { + const error = new Error("Failed to create TTS engine - null handle returned"); + freeConfig(config, Module); + throw error; + } + + // Free the configuration memory now that we have the handle freeConfig(config, Module); - throw error; + + // Store the handle and get basic information about the TTS engine + this.handle = handle; + + try { + this.sampleRate = Module._SherpaOnnxOfflineTtsSampleRate(this.handle); + this.numSpeakers = Module._SherpaOnnxOfflineTtsNumSpeakers(this.handle); + + if (debug) { + console.log(`TTS engine initialized. Sample rate: ${this.sampleRate}Hz, Number of speakers: ${this.numSpeakers}`); + } + } catch (e) { + console.error("Error getting TTS engine information:", e); + // Don't throw here, we can continue with defaults + } + } catch (e) { + // Clean up any resources if initialization failed + if (this.handle) { + try { + Module._SherpaOnnxDestroyOfflineTts(this.handle); + } catch (cleanupError) { + console.error("Error cleaning up after failed initialization:", cleanupError); + } + this.handle = null; + } + + // Re-throw the original error + throw e; } - - freeConfig(config, Module); - - this.handle = handle; - this.sampleRate = Module._SherpaOnnxOfflineTtsSampleRate(this.handle); - this.numSpeakers = Module._SherpaOnnxOfflineTtsNumSpeakers(this.handle); - this.Module = Module; - this.generatedAudios = []; // Track generated audios for cleanup /** * Generate speech from text @@ -682,6 +906,10 @@ * @returns {Object} - Object containing audio samples and sample rate */ this.generate = function(text, sid = 0, speed = 1.0) { + if (this.Module.debug) { + console.log(`Generating speech for text: "${text}", sid: ${sid}, speed: ${speed}`); + } + const textLen = this.Module.lengthBytesUTF8(text) + 1; const textPtr = this.Module._malloc(textLen); this.Module.stringToUTF8(text, textPtr, textLen); @@ -691,34 +919,62 @@ this.Module._free(textPtr); - if (!h) { + if (!h || h === 0) { throw new Error("Failed to generate speech - null pointer returned"); } - const numSamples = this.Module.HEAP32[h / 4 + 1]; - const sampleRate = this.Module.HEAP32[h / 4 + 2]; - - const samplesPtr = this.Module.HEAP32[h / 4] / 4; - const samples = new Float32Array(numSamples); - for (let i = 0; i < numSamples; i++) { - samples[i] = this.Module.HEAPF32[samplesPtr + i]; - } + // Access the generated audio structure + // The structure has this format in C: + // struct SherpaOnnxOfflineTtsGeneratedAudio { + // float *samples; + // int32_t n; + // int32_t sample_rate; + // }; + try { + // Read the number of samples and sample rate from memory + const numSamples = this.Module.getValue(h + 4, 'i32'); + const sampleRate = this.Module.getValue(h + 8, 'i32'); + + if (this.Module.debug) { + console.log(`Generated ${numSamples} samples at ${sampleRate}Hz`); + } + + // Get the pointer to the audio samples array + const samplesPtr = this.Module.getValue(h, '*'); + + if (!samplesPtr) { + throw new Error("Failed to read audio samples pointer"); + } + + // Copy samples to a new Float32Array + const samples = new Float32Array(numSamples); + for (let i = 0; i < numSamples; i++) { + samples[i] = this.Module.getValue(samplesPtr + (i * 4), 'float'); + } - // Add to our tracking list - this.generatedAudios.push(h); - - return { - samples: samples, - sampleRate: sampleRate, - // Add a cleanup function for this specific audio - free: () => { - const index = this.generatedAudios.indexOf(h); - if (index !== -1) { - this.Module._SherpaOnnxDestroyOfflineTtsGeneratedAudio(h); - this.generatedAudios.splice(index, 1); + // Add to our tracking list + this.generatedAudios.push(h); + + return { + samples: samples, + sampleRate: sampleRate, + // Add a cleanup function for this specific audio + free: () => { + const index = this.generatedAudios.indexOf(h); + if (index !== -1) { + this.Module._SherpaOnnxDestroyOfflineTtsGeneratedAudio(h); + this.generatedAudios.splice(index, 1); + } } + }; + } catch (error) { + // Clean up on error to avoid memory leaks + if (h) { + this.Module._SherpaOnnxDestroyOfflineTtsGeneratedAudio(h); } - }; + console.error("Error accessing generated audio data:", error); + throw new Error("Failed to process generated audio: " + error.message); + } }; /** diff --git a/wasm/combined/sherpa-onnx-vad.js b/wasm/combined/sherpa-onnx-vad.js index 05bb4e8c9b..a84046210f 100644 --- a/wasm/combined/sherpa-onnx-vad.js +++ b/wasm/combined/sherpa-onnx-vad.js @@ -15,6 +15,9 @@ // Get a reference to the SherpaOnnx namespace const SherpaOnnx = global.SherpaOnnx; + // Create or use existing VAD namespace + SherpaOnnx.VAD = SherpaOnnx.VAD || {}; + // Internal class for voice activity detection class VoiceActivityDetector { constructor(handle, Module) { @@ -74,61 +77,63 @@ // Define the VAD module functionality SherpaOnnx.VAD = { /** - * Load a VAD model from URL + * Load a Voice Activity Detection model * @param {Object} modelConfig - Configuration for the model * @returns {Promise} - Information about the loaded model */ loadModel: async function(modelConfig) { - const debug = modelConfig.debug !== false; - - if (debug) console.log("VAD.loadModel: ModelConfig received:", JSON.stringify(modelConfig)); - - // Use configurable model directory with default + const debug = modelConfig.debug || false; const modelDir = modelConfig.modelDir || 'vad-models'; - const fileName = modelConfig.fileName || 'silero_vad.onnx'; - const destPath = `${modelDir}/${fileName}`; - - if (debug) console.log(`VAD.loadModel: Using model directory: ${modelDir}`); - if (debug) console.log(`VAD.loadModel: Target file path: ${destPath}`); - try { - // Clean up existing path if needed for fresh start - if (modelConfig.cleanStart) { - if (debug) console.log(`VAD.loadModel: Clean start requested, removing existing paths`); - SherpaOnnx.FileSystem.removePath(modelDir, debug); - } + // First check for preloaded assets + if (!modelConfig.forceDownload) { + const assetPath = SherpaOnnx.Config.assetPaths.vad; + if (debug) console.log(`Checking for preloaded VAD assets at ${assetPath}`); - // Load the model using the safe loader - if (debug) console.log(`VAD.loadModel: Loading model from ${modelConfig.model || 'assets/vad/silero_vad.onnx'} to ${destPath}`); - - const loadResult = await SherpaOnnx.FileSystem.safeLoadFile( - modelConfig.model || 'assets/vad/silero_vad.onnx', - destPath, - debug - ); - - if (!loadResult || (typeof loadResult === 'object' && !loadResult.success)) { - throw new Error(`Failed to load model from ${modelConfig.model || 'assets/vad/silero_vad.onnx'} to ${destPath}`); - } - - // Update the path if it was changed - const actualPath = (typeof loadResult === 'object' && loadResult.path) ? loadResult.path : destPath; - if (actualPath !== destPath) { - if (debug) console.log(`VAD.loadModel: Note - Model loaded to alternate path: ${actualPath}`); + if (SherpaOnnx.FileSystem.fileExists(assetPath)) { + const files = SherpaOnnx.FileSystem.listFiles(assetPath); + if (debug) console.log(`Found preloaded files: ${files.join(', ')}`); + + // Check for required model file + if (files.includes('silero_vad.onnx')) { + if (debug) console.log("Using preloaded VAD model"); + return { + modelDir: assetPath, + actualPaths: { + model: `${assetPath}/silero_vad.onnx` + }, + preloaded: true + }; + } + + if (debug) console.log("Preloaded VAD assets found but missing required files"); } - - if (debug) console.log(`VAD.loadModel: Model loaded successfully`); - - // Return model information with the actual path used - return { - modelDir, - fileName, - modelPath: actualPath - }; - } catch (error) { - console.error(`VAD.loadModel: Error loading model:`, error); - throw error; } + + // Create directory if it doesn't exist + try { + SherpaOnnx.FileSystem.ensureDirectory(modelDir); + } catch(e) { + console.error(`Failed to create directory ${modelDir}:`, e); + } + + // Collection for actual file paths + const actualPaths = {}; + + // Load VAD model file + const result = await SherpaOnnx.FileSystem.loadFile( + modelConfig.model || 'assets/vad/silero_vad.onnx', + `${modelDir}/silero_vad.onnx`, + debug + ); + + // Collect actual path + actualPaths.model = result.path; + + return { + modelDir, + actualPaths + }; }, /** @@ -204,7 +209,15 @@ // Free configuration memory SherpaOnnx.Utils.freeConfig(vadConfig, global.Module); - return new VoiceActivityDetector(vadPtr, global.Module); + // Create the detector object + const detector = new VoiceActivityDetector(vadPtr, global.Module); + + // Track the resource for cleanup if tracking function is available + if (SherpaOnnx.trackResource) { + SherpaOnnx.trackResource('vad', detector); + } + + return detector; } catch (error) { console.error("Error creating VAD detector:", error); throw error; diff --git a/wasm/combined/sherpa-onnx-wasm-combined.cc b/wasm/combined/sherpa-onnx-wasm-combined.cc index 7ba5da23ea..82253ebae2 100644 --- a/wasm/combined/sherpa-onnx-wasm-combined.cc +++ b/wasm/combined/sherpa-onnx-wasm-combined.cc @@ -13,79 +13,6 @@ extern "C" { -// ============================================================================ -// Verify memory layouts with static assertions -// ============================================================================ - -// ASR memory layout verification -static_assert(sizeof(SherpaOnnxOnlineTransducerModelConfig) == 3 * 4, ""); -static_assert(sizeof(SherpaOnnxOnlineParaformerModelConfig) == 2 * 4, ""); -static_assert(sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) == 1 * 4, ""); -static_assert(sizeof(SherpaOnnxOnlineModelConfig) == - sizeof(SherpaOnnxOnlineTransducerModelConfig) + - sizeof(SherpaOnnxOnlineParaformerModelConfig) + - sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 9 * 4, - ""); -static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); -static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, ""); -static_assert(sizeof(SherpaOnnxOnlineRecognizerConfig) == - sizeof(SherpaOnnxFeatureConfig) + - sizeof(SherpaOnnxOnlineModelConfig) + 8 * 4 + - sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) + 5 * 4, - ""); - -// VAD memory layout verification -static_assert(sizeof(SherpaOnnxSileroVadModelConfig) == 6 * 4, ""); -static_assert(sizeof(SherpaOnnxVadModelConfig) == - sizeof(SherpaOnnxSileroVadModelConfig) + 4 * 4, - ""); - -// TTS memory layout verification -static_assert(sizeof(SherpaOnnxOfflineTtsVitsModelConfig) == 8 * 4, ""); -static_assert(sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) == 8 * 4, ""); -static_assert(sizeof(SherpaOnnxOfflineTtsKokoroModelConfig) == 7 * 4, ""); -static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) == - sizeof(SherpaOnnxOfflineTtsVitsModelConfig) + - sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) + - sizeof(SherpaOnnxOfflineTtsKokoroModelConfig) + 3 * 4, - ""); -static_assert(sizeof(SherpaOnnxOfflineTtsConfig) == - sizeof(SherpaOnnxOfflineTtsModelConfig) + 4 * 4, - ""); - -// Speaker Diarization memory layout verification -static_assert(sizeof(SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig) == - 1 * 4, - ""); -static_assert( - sizeof(SherpaOnnxOfflineSpeakerSegmentationModelConfig) == - sizeof(SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig) + 3 * 4, - ""); -static_assert(sizeof(SherpaOnnxFastClusteringConfig) == 2 * 4, ""); -static_assert(sizeof(SherpaOnnxSpeakerEmbeddingExtractorConfig) == 4 * 4, ""); -static_assert(sizeof(SherpaOnnxOfflineSpeakerDiarizationConfig) == - sizeof(SherpaOnnxOfflineSpeakerSegmentationModelConfig) + - sizeof(SherpaOnnxSpeakerEmbeddingExtractorConfig) + - sizeof(SherpaOnnxFastClusteringConfig) + 2 * 4, - ""); - -// Speech Enhancement memory layout verification -static_assert(sizeof(SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig) == 1 * 4, - ""); -static_assert(sizeof(SherpaOnnxOfflineSpeechDenoiserModelConfig) == - sizeof(SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig) + - 3 * 4, - ""); -static_assert(sizeof(SherpaOnnxOfflineSpeechDenoiserConfig) == - sizeof(SherpaOnnxOfflineSpeechDenoiserModelConfig), - ""); - -// Keyword Spotting memory layout verification -static_assert(sizeof(SherpaOnnxKeywordSpotterConfig) == - sizeof(SherpaOnnxFeatureConfig) + - sizeof(SherpaOnnxOnlineModelConfig) + 7 * 4, - ""); - // ============================================================================ // Debug printing functions for all model types // ============================================================================ From 720ce683d2a2f251fbd42099bb0b7d95b54aca9d Mon Sep 17 00:00:00 2001 From: Arthur Breton Date: Sat, 26 Apr 2025 17:34:08 +0800 Subject: [PATCH 7/9] wip --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 782d315709..cceb72f628 100644 --- a/.gitignore +++ b/.gitignore @@ -149,5 +149,8 @@ build-wasm-combined/ scripts/dotnet/examples/obj/Debug/net8.0/Common.AssemblyInfo.cs scripts/dotnet/examples/obj/Debug/net8.0/Common.GeneratedMSBuildEditorConfig.editorconfig scripts/dotnet/examples/obj/Debug/net8.0/Common.AssemblyInfoInputs.cache +wasm/asr/sherpa-onnx-wasm-main-asr.data +wasm/asr/sherpa-onnx-wasm-main-asr.js +wasm/asr/sherpa-onnx-wasm-main-asr.wasm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02 From d9d76499ad7abe6441bfb4cba6b4f160c6d7d51a Mon Sep 17 00:00:00 2001 From: Arthur Breton Date: Sat, 26 Apr 2025 21:34:42 +0800 Subject: [PATCH 8/9] wip: failure state --- wasm/combined/README.md | 14 +- wasm/combined/demos/asr.html | 108 +++++- wasm/combined/demos/common.js | 17 + ...nnx-asr.js => sherpa-onnx-combined-asr.js} | 336 ++++++++++++++---- ...x-core.js => sherpa-onnx-combined-core.js} | 223 +++++++++++- ...js => sherpa-onnx-combined-enhancement.js} | 0 ...nnx-kws.js => sherpa-onnx-combined-kws.js} | 0 ...ker.js => sherpa-onnx-combined-speaker.js} | 0 ...nnx-tts.js => sherpa-onnx-combined-tts.js} | 0 ...nnx-vad.js => sherpa-onnx-combined-vad.js} | 0 wasm/combined/sherpa-onnx-combined.js | 114 ++++-- 11 files changed, 683 insertions(+), 129 deletions(-) rename wasm/combined/{sherpa-onnx-asr.js => sherpa-onnx-combined-asr.js} (68%) rename wasm/combined/{sherpa-onnx-core.js => sherpa-onnx-combined-core.js} (52%) rename wasm/combined/{sherpa-onnx-enhancement.js => sherpa-onnx-combined-enhancement.js} (100%) rename wasm/combined/{sherpa-onnx-kws.js => sherpa-onnx-combined-kws.js} (100%) rename wasm/combined/{sherpa-onnx-speaker.js => sherpa-onnx-combined-speaker.js} (100%) rename wasm/combined/{sherpa-onnx-tts.js => sherpa-onnx-combined-tts.js} (100%) rename wasm/combined/{sherpa-onnx-vad.js => sherpa-onnx-combined-vad.js} (100%) diff --git a/wasm/combined/README.md b/wasm/combined/README.md index 503f0ce09c..87006f52fd 100644 --- a/wasm/combined/README.md +++ b/wasm/combined/README.md @@ -15,13 +15,13 @@ When built, the following files are generated: - `sherpa-onnx-wasm-combined.wasm` - The WebAssembly binary - `sherpa-onnx-wasm-combined.data` - The preloaded assets (models) - JS library files: - - `sherpa-onnx-core.js` - Core functionality - - `sherpa-onnx-asr.js` - ASR functionality - - `sherpa-onnx-vad.js` - VAD functionality - - `sherpa-onnx-tts.js` - TTS functionality - - `sherpa-onnx-kws.js` - Keyword Spotting functionality - - `sherpa-onnx-speaker.js` - Speaker Diarization functionality - - `sherpa-onnx-enhancement.js` - Speech Enhancement functionality + - `sherpa-onnx-combined-core.js` - Core functionality + - `sherpa-onnx-combined-asr.js` - ASR functionality + - `sherpa-onnx-combined-vad.js` - VAD functionality + - `sherpa-onnx-combined-tts.js` - TTS functionality + - `sherpa-onnx-combined-kws.js` - Keyword Spotting functionality + - `sherpa-onnx-combined-speaker.js` - Speaker Diarization functionality + - `sherpa-onnx-combined-enhancement.js` - Speech Enhancement functionality - `sherpa-onnx-combined.js` - Combined functionality wrapper ## Building diff --git a/wasm/combined/demos/asr.html b/wasm/combined/demos/asr.html index 5b287bc4d3..355963c3af 100644 --- a/wasm/combined/demos/asr.html +++ b/wasm/combined/demos/asr.html @@ -89,11 +89,44 @@

    Model Configuration

    let asrProcessor = null; function initializeUI() { + console.log('initializeUI called. Setting up ASR functionality...'); document.getElementById('status').textContent = 'WebAssembly module loaded. Ready to load models.'; setupASR(); + console.log('setupASR called from initializeUI.'); } + // Define the callback that sherpa-onnx-combined.js will execute + window.onSherpaOnnxReady = function(success, errorOrMissing) { + console.log('onSherpaOnnxReady callback triggered. Success status:', success); + if (success) { + console.log("Sherpa-ONNX modules ready, initializing UI."); + // Additional check for HEAPF32 availability before enabling UI + if (window.Module && window.Module.HEAPF32) { + console.log("HEAPF32 confirmed available. UI fully enabled."); + document.getElementById('status').textContent = 'WebAssembly module and HEAPF32 ready. Ready to load models.'; + initializeUI(); + document.getElementById('load-asr-model').disabled = false; + } else { + console.error("HEAPF32 not available despite modules being ready. UI partially enabled with warning."); + document.getElementById('status').textContent = 'Warning: WebAssembly module ready but HEAPF32 not available. Some functions may not work.'; + initializeUI(); + document.getElementById('load-asr-model').disabled = false; // Enable anyway for testing + } + } else { + console.error("Failed to load Sherpa-ONNX modules:", errorOrMissing); + document.getElementById('status').textContent = 'Error: Failed to load necessary modules. Check console.'; + // Disable controls if loading failed + document.getElementById('load-asr-model').disabled = true; + document.getElementById('start-asr').disabled = true; + document.getElementById('stop-asr').disabled = true; + } + // Force enable the button for testing purposes + console.log('Forcing enable of Load ASR Model button for debugging purposes.'); + document.getElementById('load-asr-model').disabled = false; + }; + function setupASR() { + console.log('setupASR function entered. Setting up event listeners...'); const loadBtn = document.getElementById('load-asr-model'); const startBtn = document.getElementById('start-asr'); const stopBtn = document.getElementById('stop-asr'); @@ -102,10 +135,14 @@

    Model Configuration

    const modelTypeSelect = document.getElementById('asr-model-type'); const controlsDiv = document.querySelector('.controls'); + console.log('DOM elements retrieved. Load button:', loadBtn); + let unloadBtn = null; // Update UI based on selected model type + console.log('Setting up model type change listener...'); modelTypeSelect.addEventListener('change', () => { + console.log('Model type changed to:', modelTypeSelect.value); const modelType = modelTypeSelect.value; const transducerElements = document.querySelectorAll('.transducer-model'); @@ -117,12 +154,20 @@

    Model Configuration

    } }); }); + console.log('Model type change listener setup complete.'); + console.log('Setting up click event listener for Load ASR Model button...'); loadBtn.addEventListener('click', async () => { + console.log('Load ASR Model button clicked. Starting model loading process.'); loadBtn.disabled = true; loadBtn.textContent = 'Loading...'; statusElem.textContent = 'Status: Loading model...'; - + console.log('Current SherpaOnnx readiness status:', !!SherpaOnnx.isReady); + console.log('window.Module status:', !!window.Module); + if (window.Module) { + console.log('window.Module.HEAPF32 status:', !!window.Module.HEAPF32); + } + console.log('Attempting to load model with current configuration...'); try { // Get options from UI const modelType = modelTypeSelect.value; @@ -158,22 +203,33 @@

    Model Configuration

    // Load the model const loadedModel = await SherpaOnnx.ASR.loadModel(modelConfig); - // Create the ASR recognizer - asr = SherpaOnnx.ASR.createOnlineRecognizer(loadedModel, { - debug: debug - }); - - loadBtn.textContent = 'Model Loaded'; - statusElem.textContent = 'Status: Model loaded successfully'; - startBtn.disabled = false; - - // Add unload button if it doesn't exist - if (!unloadBtn) { - unloadBtn = createUnloadButton(controlsDiv, 'ASR', asr, statusElem); - unloadBtn.id = 'unload-asr-model'; - } else { - unloadBtn.disabled = false; - } + // Use setTimeout to delay recognizer creation slightly + setTimeout(() => { + try { + // Create the ASR recognizer + asr = SherpaOnnx.ASR.createOnlineRecognizer(loadedModel, { + debug: debug + }); + + loadBtn.textContent = 'Model Loaded'; + statusElem.textContent = 'Status: Model loaded successfully'; + startBtn.disabled = false; + + // Add unload button if it doesn't exist + if (!unloadBtn) { + unloadBtn = createUnloadButton(controlsDiv, 'ASR', asr, statusElem); + unloadBtn.id = 'unload-asr-model'; + } else { + unloadBtn.disabled = false; + } + } catch (innerError) { + console.error('Failed to create ASR recognizer (inside setTimeout):', innerError); + loadBtn.textContent = 'Load Failed'; + statusElem.textContent = `Status: Error (delayed init) - ${innerError.message}`; + loadBtn.disabled = false; // Re-enable load button on inner error + } + }, 0); // Delay of 0ms allows event loop tick + } catch (error) { console.error('Failed to load ASR model:', error); loadBtn.textContent = 'Load Failed'; @@ -184,6 +240,24 @@

    Model Configuration

    startBtn.addEventListener('click', async () => { try { + // Check if WebAssembly module is fully initialized + if (!window.Module) { + statusElem.textContent = 'Status: Waiting for WebAssembly module to initialize...'; + await new Promise((resolve, reject) => { + const checkInterval = setInterval(() => { + if (window.Module) { + clearInterval(checkInterval); + resolve(); + } + }, 100); + setTimeout(() => { + clearInterval(checkInterval); + reject(new Error('WebAssembly module initialization timed out after 30 seconds')); + }, 30000); + }); + } + statusElem.textContent = 'Status: WebAssembly module initialized. Starting audio processing...'; + await getMicrophoneInput(); // Create an online stream diff --git a/wasm/combined/demos/common.js b/wasm/combined/demos/common.js index fb53619861..05c06adfb0 100644 --- a/wasm/combined/demos/common.js +++ b/wasm/combined/demos/common.js @@ -1,3 +1,20 @@ +// common.js - Shared utilities for Sherpa-ONNX WASM demos + +// --- Emscripten Module configuration --- +// This MUST run before sherpa-onnx-wasm-combined.js is loaded +var Module = Module || {}; +Module.locateFile = function(path, prefix) { + // If the path is the wasm or data file, load it from the parent directory + if (path.endsWith('.wasm') || path.endsWith('.data')) { + // Assumes demos are in a subdirectory (like /demos/) + // Adjust this path if your structure is different + return `../${path}`; + } + // Otherwise, use the default logic (usually prefix + path) + return prefix + path; +}; +// --- End Emscripten Module configuration --- + // Set up initialization callback window.onSherpaOnnxReady = function(success, error) { if (success) { diff --git a/wasm/combined/sherpa-onnx-asr.js b/wasm/combined/sherpa-onnx-combined-asr.js similarity index 68% rename from wasm/combined/sherpa-onnx-asr.js rename to wasm/combined/sherpa-onnx-combined-asr.js index 842a23c72b..84ec2a487e 100644 --- a/wasm/combined/sherpa-onnx-asr.js +++ b/wasm/combined/sherpa-onnx-combined-asr.js @@ -18,6 +18,125 @@ // Create or use existing ASR namespace SherpaOnnx.ASR = SherpaOnnx.ASR || {}; + // Add readiness promise for WebAssembly module + SherpaOnnx.ASR.ready = new Promise((resolve, reject) => { + console.log('Waiting for SherpaOnnx core module initialization...'); + let attempt = 0; + const checkInterval = setInterval(() => { + attempt++; + console.log(`Attempt ${attempt}: Checking SherpaOnnx readiness status...`); + console.log(`SherpaOnnx.isReady: ${!!SherpaOnnx.isReady}`); + console.log(`window.Module exists: ${!!window.Module}`); + if (window.Module) { + console.log(`window.Module.calledRun: ${!!window.Module.calledRun}`); + console.log(`window.Module.HEAPF32 exists: ${!!window.Module.HEAPF32}`); + console.log(`window.Module properties:`, Object.keys(window.Module).slice(0, 10), `... (first 10 shown)`); + // Enhanced workaround for HEAPF32 not available + if (!window.Module.HEAPF32) { + try { + if (window.Module.HEAP8) { + window.Module.HEAPF32 = new Float32Array(window.Module.HEAP8.buffer); + console.log('Successfully initialized HEAPF32 dynamically from HEAP8.'); + } else if (window.Module.asm && window.Module.asm.memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.asm.memory.buffer); + console.log('Successfully initialized HEAPF32 directly from WebAssembly memory.'); + } else if (window.Module.memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.memory.buffer); + console.log('Successfully initialized HEAPF32 from Module.memory.'); + } else if (window.Module._memory) { + window.Module.HEAPF32 = new Float32Array(window.Module._memory.buffer); + console.log('Successfully initialized HEAPF32 from Module._memory.'); + } else if (typeof WebAssembly !== 'undefined' && WebAssembly.Memory && window.Module.asm) { + // Attempt to find memory instance in WebAssembly runtime + for (const prop in window.Module.asm) { + if (window.Module.asm[prop] instanceof WebAssembly.Memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.asm[prop].buffer); + console.log(`Successfully initialized HEAPF32 from WebAssembly.Memory found in asm.${prop}.`); + break; + } + } + if (!window.Module.HEAPF32) { + console.warn('No WebAssembly.Memory found in asm properties.'); + } + } else { + console.warn('No suitable method found to initialize HEAPF32. Logging detailed Module info for debugging.'); + // Log detailed information about Module for debugging + console.log('Detailed Module properties:', Object.keys(window.Module)); + if (window.Module.asm) { + console.log('Module.asm properties:', Object.keys(window.Module.asm).slice(0, 10), '... (first 10 shown)'); + } + } + } catch (e) { + console.error('Failed to initialize HEAPF32 dynamically:', e); + } + // Log post-initialization status + console.log(`Post-workaround - window.Module.HEAPF32 exists: ${!!window.Module.HEAPF32}`); + } + } + if (SherpaOnnx.isReady || (window.Module && window.Module.calledRun)) { + console.log('Proceeding with ASR initialization. SherpaOnnx core module is ready or Module.calledRun is true.'); + SherpaOnnx.isReady = true; // Force set readiness flag + console.log('SherpaOnnx readiness flag manually set to true in ASR module.'); + clearInterval(checkInterval); + resolve(window.Module); + } else { + console.log('Still waiting for SherpaOnnx core module...'); + } + }, 500); + setTimeout(() => { + clearInterval(checkInterval); + console.error('SherpaOnnx core module initialization timed out after 60 seconds. Proceeding anyway if Module exists.'); + if (window.Module) { + // Enhanced workaround for HEAPF32 not available on timeout + if (!window.Module.HEAPF32) { + try { + if (window.Module.HEAP8) { + window.Module.HEAPF32 = new Float32Array(window.Module.HEAP8.buffer); + console.log('Successfully initialized HEAPF32 dynamically from HEAP8 on timeout.'); + } else if (window.Module.asm && window.Module.asm.memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.asm.memory.buffer); + console.log('Successfully initialized HEAPF32 directly from WebAssembly memory on timeout.'); + } else if (window.Module.memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.memory.buffer); + console.log('Successfully initialized HEAPF32 from Module.memory on timeout.'); + } else if (window.Module._memory) { + window.Module.HEAPF32 = new Float32Array(window.Module._memory.buffer); + console.log('Successfully initialized HEAPF32 from Module._memory on timeout.'); + } else if (typeof WebAssembly !== 'undefined' && WebAssembly.Memory && window.Module.asm) { + // Attempt to find memory instance in WebAssembly runtime + for (const prop in window.Module.asm) { + if (window.Module.asm[prop] instanceof WebAssembly.Memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.asm[prop].buffer); + console.log(`Successfully initialized HEAPF32 from WebAssembly.Memory found in asm.${prop} on timeout.`); + break; + } + } + if (!window.Module.HEAPF32) { + console.warn('No WebAssembly.Memory found in asm properties on timeout.'); + } + } else { + console.warn('No suitable method found to initialize HEAPF32 on timeout. Logging detailed Module info for debugging.'); + // Log detailed information about Module for debugging + console.log('Detailed Module properties on timeout:', Object.keys(window.Module)); + if (window.Module.asm) { + console.log('Module.asm properties on timeout:', Object.keys(window.Module.asm).slice(0, 10), '... (first 10 shown)'); + } + } + } catch (e) { + console.error('Failed to initialize HEAPF32 dynamically on timeout:', e); + } + // Log post-initialization status + console.log(`Post-workaround on timeout - window.Module.HEAPF32 exists: ${!!window.Module.HEAPF32}`); + } + SherpaOnnx.isReady = true; // Force set readiness flag on timeout + console.log('SherpaOnnx readiness flag manually set to true on timeout in ASR module.'); + resolve(window.Module); + } else { + reject(new Error('SherpaOnnx core module initialization timed out after 60 seconds and Module not found')); + } + }, 60000); + }); + // Define the ASR module functionality SherpaOnnx.ASR = { /** @@ -161,154 +280,154 @@ } try { + // Use window.Module instead of the parameter Module + const M = window.Module; + // First, allocate all the strings we need const allocatedStrings = {}; // Transducer model config if (config.modelConfig.transducer) { - allocatedStrings.encoder = SherpaOnnx.Utils.allocateString(config.modelConfig.transducer.encoder, Module); - allocatedStrings.decoder = SherpaOnnx.Utils.allocateString(config.modelConfig.transducer.decoder, Module); - allocatedStrings.joiner = SherpaOnnx.Utils.allocateString(config.modelConfig.transducer.joiner, Module); + allocatedStrings.encoder = SherpaOnnx.Utils.allocateString(config.modelConfig.transducer.encoder, M); + allocatedStrings.decoder = SherpaOnnx.Utils.allocateString(config.modelConfig.transducer.decoder, M); + allocatedStrings.joiner = SherpaOnnx.Utils.allocateString(config.modelConfig.transducer.joiner, M); } else { - allocatedStrings.encoder = SherpaOnnx.Utils.allocateString('', Module); - allocatedStrings.decoder = SherpaOnnx.Utils.allocateString('', Module); - allocatedStrings.joiner = SherpaOnnx.Utils.allocateString('', Module); + allocatedStrings.encoder = SherpaOnnx.Utils.allocateString('', M); + allocatedStrings.decoder = SherpaOnnx.Utils.allocateString('', M); + allocatedStrings.joiner = SherpaOnnx.Utils.allocateString('', M); } // Paraformer model config if (config.modelConfig.paraformer) { - allocatedStrings.paraEncoder = SherpaOnnx.Utils.allocateString(config.modelConfig.paraformer.encoder, Module); - allocatedStrings.paraDecoder = SherpaOnnx.Utils.allocateString(config.modelConfig.paraformer.decoder, Module); + allocatedStrings.paraEncoder = SherpaOnnx.Utils.allocateString(config.modelConfig.paraformer.encoder, M); + allocatedStrings.paraDecoder = SherpaOnnx.Utils.allocateString(config.modelConfig.paraformer.decoder, M); } else { - allocatedStrings.paraEncoder = SherpaOnnx.Utils.allocateString('', Module); - allocatedStrings.paraDecoder = SherpaOnnx.Utils.allocateString('', Module); + allocatedStrings.paraEncoder = SherpaOnnx.Utils.allocateString('', M); + allocatedStrings.paraDecoder = SherpaOnnx.Utils.allocateString('', M); } // Zipformer2 CTC model config if (config.modelConfig.zipformer2Ctc) { - allocatedStrings.zipformerModel = SherpaOnnx.Utils.allocateString(config.modelConfig.zipformer2Ctc.model, Module); + allocatedStrings.zipformerModel = SherpaOnnx.Utils.allocateString(config.modelConfig.zipformer2Ctc.model, M); } else { - allocatedStrings.zipformerModel = SherpaOnnx.Utils.allocateString('', Module); + allocatedStrings.zipformerModel = SherpaOnnx.Utils.allocateString('', M); } // Tokens, provider, model_type, modeling_unit, bpe_vocab - allocatedStrings.tokens = SherpaOnnx.Utils.allocateString(config.modelConfig.tokens, Module); - allocatedStrings.provider = SherpaOnnx.Utils.allocateString(config.modelConfig.provider || 'cpu', Module); - allocatedStrings.modelType = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API - allocatedStrings.modelingUnit = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API - allocatedStrings.bpeVocab = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + allocatedStrings.tokens = SherpaOnnx.Utils.allocateString(config.modelConfig.tokens, M); + allocatedStrings.provider = SherpaOnnx.Utils.allocateString(config.modelConfig.provider || 'cpu', M); + allocatedStrings.modelType = SherpaOnnx.Utils.allocateString('', M); // Not used in JS API + allocatedStrings.modelingUnit = SherpaOnnx.Utils.allocateString('', M); // Not used in JS API + allocatedStrings.bpeVocab = SherpaOnnx.Utils.allocateString('', M); // Not used in JS API // Token buffer is not used in JS API - allocatedStrings.tokensBuffer = SherpaOnnx.Utils.allocateString('', Module); + allocatedStrings.tokensBuffer = SherpaOnnx.Utils.allocateString('', M); // Decoding method - allocatedStrings.decodingMethod = SherpaOnnx.Utils.allocateString(config.decodingMethod || 'greedy_search', Module); + allocatedStrings.decodingMethod = SherpaOnnx.Utils.allocateString(config.decodingMethod || 'greedy_search', M); // Hotwords - allocatedStrings.hotwordsFile = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API - allocatedStrings.hotwordsBuffer = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + allocatedStrings.hotwordsFile = SherpaOnnx.Utils.allocateString('', M); // Not used in JS API + allocatedStrings.hotwordsBuffer = SherpaOnnx.Utils.allocateString('', M); // Not used in JS API // Rule FSTs and FARs - allocatedStrings.ruleFsts = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API - allocatedStrings.ruleFars = SherpaOnnx.Utils.allocateString('', Module); // Not used in JS API + allocatedStrings.ruleFsts = SherpaOnnx.Utils.allocateString('', M); // Not used in JS API + allocatedStrings.ruleFars = SherpaOnnx.Utils.allocateString('', M); // Not used in JS API // Now allocate the main config structure // Size needs to match the C structure size const configSize = 200; // Adjust if needed to match C struct - const configPtr = Module._malloc(configSize); - - // Zero out the memory - Module.HEAP8.fill(0, configPtr, configPtr + configSize); + const configPtr = M._malloc(configSize); - // Set feat_config fields + // Set feat_config fields (Starts populating the allocated memory) let offset = 0; - Module.setValue(configPtr + offset, config.featConfig.sampleRate || 16000, 'i32'); + M.setValue(configPtr + offset, config.featConfig.sampleRate || 16000, 'i32'); offset += 4; - Module.setValue(configPtr + offset, config.featConfig.featureDim || 80, 'i32'); + M.setValue(configPtr + offset, config.featConfig.featureDim || 80, 'i32'); offset += 4; // Set model_config fields - transducer - Module.setValue(configPtr + offset, allocatedStrings.encoder.ptr, 'i8*'); + M.setValue(configPtr + offset, allocatedStrings.encoder.ptr, 'i8*'); offset += 4; - Module.setValue(configPtr + offset, allocatedStrings.decoder.ptr, 'i8*'); + M.setValue(configPtr + offset, allocatedStrings.decoder.ptr, 'i8*'); offset += 4; - Module.setValue(configPtr + offset, allocatedStrings.joiner.ptr, 'i8*'); + M.setValue(configPtr + offset, allocatedStrings.joiner.ptr, 'i8*'); offset += 4; // Set model_config fields - paraformer - Module.setValue(configPtr + offset, allocatedStrings.paraEncoder.ptr, 'i8*'); + M.setValue(configPtr + offset, allocatedStrings.paraEncoder.ptr, 'i8*'); offset += 4; - Module.setValue(configPtr + offset, allocatedStrings.paraDecoder.ptr, 'i8*'); + M.setValue(configPtr + offset, allocatedStrings.paraDecoder.ptr, 'i8*'); offset += 4; // Set model_config fields - zipformer2_ctc - Module.setValue(configPtr + offset, allocatedStrings.zipformerModel.ptr, 'i8*'); + M.setValue(configPtr + offset, allocatedStrings.zipformerModel.ptr, 'i8*'); offset += 4; // Set remaining model_config fields - Module.setValue(configPtr + offset, allocatedStrings.tokens.ptr, 'i8*'); + M.setValue(configPtr + offset, allocatedStrings.tokens.ptr, 'i8*'); offset += 4; - Module.setValue(configPtr + offset, config.modelConfig.numThreads || 1, 'i32'); + M.setValue(configPtr + offset, config.modelConfig.numThreads || 1, 'i32'); offset += 4; - Module.setValue(configPtr + offset, allocatedStrings.provider.ptr, 'i8*'); + M.setValue(configPtr + offset, allocatedStrings.provider.ptr, 'i8*'); offset += 4; - Module.setValue(configPtr + offset, config.modelConfig.debug || 0, 'i32'); + M.setValue(configPtr + offset, config.modelConfig.debug || 0, 'i32'); offset += 4; - Module.setValue(configPtr + offset, allocatedStrings.modelType.ptr, 'i8*'); + M.setValue(configPtr + offset, allocatedStrings.modelType.ptr, 'i8*'); offset += 4; - Module.setValue(configPtr + offset, allocatedStrings.modelingUnit.ptr, 'i8*'); + M.setValue(configPtr + offset, allocatedStrings.modelingUnit.ptr, 'i8*'); offset += 4; - Module.setValue(configPtr + offset, allocatedStrings.bpeVocab.ptr, 'i8*'); + M.setValue(configPtr + offset, allocatedStrings.bpeVocab.ptr, 'i8*'); offset += 4; - Module.setValue(configPtr + offset, allocatedStrings.tokensBuffer.ptr, 'i8*'); + M.setValue(configPtr + offset, allocatedStrings.tokensBuffer.ptr, 'i8*'); offset += 4; - Module.setValue(configPtr + offset, 0, 'i32'); // tokens_buf_size + M.setValue(configPtr + offset, 0, 'i32'); // tokens_buf_size offset += 4; // Set recognizer config fields - Module.setValue(configPtr + offset, allocatedStrings.decodingMethod.ptr, 'i8*'); + M.setValue(configPtr + offset, allocatedStrings.decodingMethod.ptr, 'i8*'); offset += 4; - Module.setValue(configPtr + offset, config.maxActivePaths || 4, 'i32'); + M.setValue(configPtr + offset, config.maxActivePaths || 4, 'i32'); offset += 4; - Module.setValue(configPtr + offset, config.enableEndpoint || 1, 'i32'); + M.setValue(configPtr + offset, config.enableEndpoint || 1, 'i32'); offset += 4; - Module.setValue(configPtr + offset, config.rule1MinTrailingSilence || 2.4, 'float'); + M.setValue(configPtr + offset, config.rule1MinTrailingSilence || 2.4, 'float'); offset += 4; - Module.setValue(configPtr + offset, config.rule2MinTrailingSilence || 1.2, 'float'); + M.setValue(configPtr + offset, config.rule2MinTrailingSilence || 1.2, 'float'); offset += 4; - Module.setValue(configPtr + offset, config.rule3MinUtteranceLength || 300, 'float'); + M.setValue(configPtr + offset, config.rule3MinUtteranceLength || 300, 'float'); offset += 4; // Set hotwords fields - Module.setValue(configPtr + offset, allocatedStrings.hotwordsFile.ptr, 'i8*'); + M.setValue(configPtr + offset, allocatedStrings.hotwordsFile.ptr, 'i8*'); offset += 4; - Module.setValue(configPtr + offset, 0.0, 'float'); // hotwords_score + M.setValue(configPtr + offset, 0.0, 'float'); // hotwords_score offset += 4; // Set CTC FST decoder config - graph and max_active - Module.setValue(configPtr + offset, 0, 'i8*'); // graph + M.setValue(configPtr + offset, 0, 'i8*'); // graph offset += 4; - Module.setValue(configPtr + offset, 0, 'i32'); // max_active + M.setValue(configPtr + offset, 0, 'i32'); // max_active offset += 4; // Set rule FSTs and FARs - Module.setValue(configPtr + offset, allocatedStrings.ruleFsts.ptr, 'i8*'); + M.setValue(configPtr + offset, allocatedStrings.ruleFsts.ptr, 'i8*'); offset += 4; - Module.setValue(configPtr + offset, allocatedStrings.ruleFars.ptr, 'i8*'); + M.setValue(configPtr + offset, allocatedStrings.ruleFars.ptr, 'i8*'); offset += 4; // Set blank penalty - Module.setValue(configPtr + offset, 0.0, 'float'); // blank_penalty + M.setValue(configPtr + offset, 0.0, 'float'); // blank_penalty offset += 4; // Set hotwords buffer and size - Module.setValue(configPtr + offset, allocatedStrings.hotwordsBuffer.ptr, 'i8*'); + M.setValue(configPtr + offset, allocatedStrings.hotwordsBuffer.ptr, 'i8*'); offset += 4; - Module.setValue(configPtr + offset, 0, 'i32'); // hotwords_buf_size + M.setValue(configPtr + offset, 0, 'i32'); // hotwords_buf_size offset += 4; // Save the allocated strings for freeing later - Module.SherpaOnnxAllocatedStrings = allocatedStrings; + M.SherpaOnnxAllocatedStrings = allocatedStrings; return configPtr; } catch (error) { @@ -348,9 +467,12 @@ * Create an online ASR recognizer with the loaded model * @param {Object} loadedModel - Model information returned by loadModel * @param {Object} options - Additional configuration options - * @returns {OnlineRecognizer} - An instance of OnlineRecognizer + * @returns {Promise} - A promise resolving to an instance of OnlineRecognizer */ - createOnlineRecognizer: function(loadedModel, options = {}) { + createOnlineRecognizer: async function(loadedModel, options = {}) { + // Wait for WebAssembly module to be ready + await SherpaOnnx.ASR.ready; + const config = { featConfig: { sampleRate: options.sampleRate || 16000, @@ -359,7 +481,7 @@ modelConfig: { tokens: loadedModel.actualPaths.tokens || `${loadedModel.modelDir}/tokens.txt`, numThreads: options.numThreads || 1, - provider: options.provider || 'cpu', + provider: 'cpu', // Force to cpu to avoid issues with quantized ONNX in WebAssembly debug: options.debug !== undefined ? options.debug : 1, // Configurable debug }, decodingMethod: options.decodingMethod || 'greedy_search', @@ -387,7 +509,37 @@ }; } - const recognizer = new global.OnlineRecognizer(config, global.Module); + // Add readiness check using Module.calledRun + if (!window.Module || !window.Module.calledRun) { + console.error('CRITICAL: Emscripten runtime not initialized (Module.calledRun is not true) when creating recognizer.'); + throw new Error('WASM runtime not ready'); + } + console.log('Module.calledRun is true. Proceeding with recognizer creation.'); + console.log('Inspecting window.Module inside createOnlineRecognizer:', window.Module); + + const recognizer = new global.OnlineRecognizer(config, window.Module); // Use window.Module explicitly + + // Add detailed logging to inspect the recognizer object + console.log('Recognizer object created:', recognizer); + console.log('Checking if createStream method exists on recognizer:', typeof recognizer.createStream === 'function'); + if (typeof recognizer.createStream !== 'function') { + console.error('createStream method not found on recognizer. Attempting fallback instantiation.'); + // Fallback: Manually attach methods if they are missing due to instantiation issues + recognizer.createStream = function() { + console.log('Using fallback createStream method.'); + const streamHandle = window.Module.ccall( + 'SherpaOnnxCreateOnlineStream', + 'number', + ['number'], + [this.handle] + ); + const stream = new global.OnlineStream(streamHandle, this.Module); + // Track the stream for cleanup + this.streams.push(stream); + return stream; + }; + console.log('Fallback createStream method attached to recognizer.'); + } // Track the resource for cleanup if tracking function is available if (SherpaOnnx.trackResource) { @@ -519,10 +671,7 @@ const configSize = 200; // Adjust if needed to match C struct const configPtr = Module._malloc(configSize); - // Zero out the memory - Module.HEAP8.fill(0, configPtr, configPtr + configSize); - - // Set feat_config fields + // Set feat_config fields (Starts populating the allocated memory) let offset = 0; Module.setValue(configPtr + offset, config.featConfig.sampleRate || 16000, 'i32'); offset += 4; @@ -624,15 +773,19 @@ * OnlineRecognizer class for streaming speech recognition */ global.OnlineRecognizer = global.OnlineRecognizer || function(config, Module) { - this.Module = Module; + this.Module = window.Module; // Explicitly use window.Module this.config = config; this.streams = []; // Track streams created by this recognizer - // Initialize the configuration in WASM - const configPtr = SherpaOnnx.ASR._initOnlineRecognizerConfig(config, Module); + // Initialize the configuration in WASM, explicitly passing window.Module + const configPtr = SherpaOnnx.ASR._initOnlineRecognizerConfig(config, window.Module); - // Create the recognizer - this.handle = Module.ccall( + if (!configPtr) { + throw new Error("Failed to initialize ASR config pointer."); + } + + // Create the recognizer using window.Module + this.handle = window.Module.ccall( 'SherpaOnnxCreateOnlineRecognizer', 'number', ['number'], @@ -771,7 +924,10 @@ */ global.OnlineStream = global.OnlineStream || function(handle, Module) { this.handle = handle; - this.Module = Module; + this.Module = Module || window.Module; // Use passed Module or fallback to window.Module + if (!this.Module || !this.Module.HEAPF32) { + console.warn('WebAssembly module not fully initialized in OnlineStream constructor. Will retry on method calls.'); + } this.pointer = null; // buffer this.n = 0; // buffer size @@ -781,6 +937,36 @@ * @param {Float32Array} samples - Audio samples in [-1, 1] range */ this.acceptWaveform = function(sampleRate, samples) { + if (!this.Module || !this.Module.HEAPF32) { + console.warn('WebAssembly module or HEAPF32 not available. Attempting to find initialized module.'); + this.Module = window.Module || global.Module; + if (!this.Module || !this.Module.HEAPF32) { + console.error('HEAPF32 still not available. Attempting to initialize memory view.'); + // Attempt to access or initialize HEAPF32 dynamically + if (this.Module && this.Module.HEAP8) { + try { + this.Module.HEAPF32 = new Float32Array(this.Module.HEAP8.buffer); + console.log('Successfully initialized HEAPF32 dynamically from HEAP8.'); + } catch (e) { + console.error('Failed to initialize HEAPF32 dynamically from HEAP8:', e); + // Last resort: Try to access WebAssembly memory directly + if (this.Module && this.Module.asm && this.Module.asm.memory) { + try { + this.Module.HEAPF32 = new Float32Array(this.Module.asm.memory.buffer); + console.log('Successfully initialized HEAPF32 directly from WebAssembly memory.'); + } catch (e2) { + console.error('Failed to initialize HEAPF32 directly from WebAssembly memory:', e2); + throw new Error('WebAssembly module or HEAPF32 not available after all retries. Ensure the WASM module is fully initialized.'); + } + } else { + throw new Error('WebAssembly module or HEAPF32 not available after retry. Ensure the WASM module is fully initialized.'); + } + } + } else { + throw new Error('WebAssembly module or HEAPF32 not available after retry. Ensure the WASM module is fully initialized.'); + } + } + } if (this.n < samples.length) { if (this.pointer) { this.Module._free(this.pointer); diff --git a/wasm/combined/sherpa-onnx-core.js b/wasm/combined/sherpa-onnx-combined-core.js similarity index 52% rename from wasm/combined/sherpa-onnx-core.js rename to wasm/combined/sherpa-onnx-combined-core.js index cef7038aef..0e35a7e7fc 100644 --- a/wasm/combined/sherpa-onnx-core.js +++ b/wasm/combined/sherpa-onnx-combined-core.js @@ -12,13 +12,224 @@ const SherpaOnnx = {}; // Check if Module already exists and extend it - if (typeof global.Module !== 'undefined') { - const originalOnRuntimeInitialized = global.Module.onRuntimeInitialized; - global.Module.onRuntimeInitialized = function() { - console.log("SherpaOnnx Core module initialized"); - if (originalOnRuntimeInitialized) originalOnRuntimeInitialized(); - if (global.onModuleReady) global.onModuleReady(); + if (typeof window.Module !== 'undefined') { + console.log('Module already defined at script load time. Checking initialization status...'); + console.log('Module properties at load:', Object.keys(window.Module).slice(0, 10), '... (first 10 shown)'); + console.log('Module.onRuntimeInitialized exists:', !!window.Module.onRuntimeInitialized); + console.log('Module.calledRun status at load:', !!window.Module.calledRun); + // Immediate attempt to initialize HEAPF32 at load time + if (!window.Module.HEAPF32) { + try { + if (window.Module.HEAP8) { + window.Module.HEAPF32 = new Float32Array(window.Module.HEAP8.buffer); + console.log('Successfully initialized HEAPF32 dynamically from HEAP8 at load time in core module.'); + } else if (window.Module.asm && window.Module.asm.memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.asm.memory.buffer); + console.log('Successfully initialized HEAPF32 directly from WebAssembly memory at load time in core module.'); + } else if (window.Module.memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.memory.buffer); + console.log('Successfully initialized HEAPF32 from Module.memory at load time in core module.'); + } else if (window.Module._memory) { + window.Module.HEAPF32 = new Float32Array(window.Module._memory.buffer); + console.log('Successfully initialized HEAPF32 from Module._memory at load time in core module.'); + } else if (typeof WebAssembly !== 'undefined' && WebAssembly.Memory && window.Module.asm) { + for (const prop in window.Module.asm) { + if (window.Module.asm[prop] instanceof WebAssembly.Memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.asm[prop].buffer); + console.log(`Successfully initialized HEAPF32 from WebAssembly.Memory found in asm.${prop} at load time in core module.`); + break; + } + } + if (!window.Module.HEAPF32) { + console.warn('No WebAssembly.Memory found in asm properties at load time in core module.'); + } + } else { + console.warn('No standard method found to initialize HEAPF32 at load time in core module.'); + // Simplified deeper inspection of window.Module for any memory buffer + console.log('Inspecting window.Module for potential memory buffers...'); + let foundBuffer = false; + for (const prop in window.Module) { + try { + if (window.Module[prop] && typeof window.Module[prop] === 'object') { + if (window.Module[prop] instanceof ArrayBuffer) { + window.Module.HEAPF32 = new Float32Array(window.Module[prop]); + console.log(`Initialized HEAPF32 from ArrayBuffer in Module.${prop} at load time.`); + foundBuffer = true; + break; + } else if (window.Module[prop].buffer && window.Module[prop].buffer instanceof ArrayBuffer) { + window.Module.HEAPF32 = new Float32Array(window.Module[prop].buffer); + console.log(`Initialized HEAPF32 from buffer in Module.${prop}.buffer at load time.`); + foundBuffer = true; + break; + } + } + } catch (e) { + console.error(`Error inspecting Module.${prop} at load time:`, e.message); + } + } + if (!foundBuffer) { + console.log('No suitable memory buffer found in deep inspection at load time.'); + } + } + } catch (e) { + console.error('Failed to initialize HEAPF32 dynamically at load time in core module:', e.message); + } + console.log(`Post-workaround at load time - HEAPF32 exists: ${!!window.Module.HEAPF32}`); + } + const originalOnRuntimeInitialized = window.Module.onRuntimeInitialized; + window.Module.onRuntimeInitialized = function() { + console.log('onRuntimeInitialized triggered. SherpaOnnx Core module initialized.'); + console.log('Module.calledRun status when onRuntimeInitialized triggered:', !!window.Module.calledRun); + console.log('Checking for HEAPF32 availability after initialization:', !!window.Module.HEAPF32); + global.SherpaOnnx.isReady = true; // Custom readiness flag + console.log('SherpaOnnx readiness flag set to true'); + if (originalOnRuntimeInitialized) { + console.log('Calling original onRuntimeInitialized callback.'); + originalOnRuntimeInitialized(); + } + if (global.onModuleReady) { + console.log('Calling global.onModuleReady callback.'); + global.onModuleReady(); + } }; + console.log('onRuntimeInitialized hook set. Waiting for initialization...'); + // Additional check if calledRun is already true but onRuntimeInitialized hasn't fired + if (window.Module.calledRun && !global.SherpaOnnx.isReady) { + console.warn('Module.calledRun is true but onRuntimeInitialized has not fired. Forcing readiness check.'); + // Start a continuous check for HEAPF32 availability + let heapCheckAttempts = 0; + const heapCheckInterval = setInterval(() => { + heapCheckAttempts++; + console.log(`HEAPF32 check attempt ${heapCheckAttempts}: HEAPF32 exists: ${!!window.Module.HEAPF32}`); + if (!window.Module.HEAPF32) { + try { + if (window.Module.HEAP8) { + window.Module.HEAPF32 = new Float32Array(window.Module.HEAP8.buffer); + console.log('Initialized HEAPF32 from HEAP8 during continuous check.'); + } else if (window.Module.asm && window.Module.asm.memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.asm.memory.buffer); + console.log('Initialized HEAPF32 from WebAssembly memory during continuous check.'); + } else if (window.Module.memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.memory.buffer); + console.log('Initialized HEAPF32 from Module.memory during continuous check.'); + } else if (window.Module._memory) { + window.Module.HEAPF32 = new Float32Array(window.Module._memory.buffer); + console.log('Initialized HEAPF32 from Module._memory during continuous check.'); + } else if (typeof WebAssembly !== 'undefined' && WebAssembly.Memory && window.Module.asm) { + for (const prop in window.Module.asm) { + if (window.Module.asm[prop] instanceof WebAssembly.Memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.asm[prop].buffer); + console.log(`Initialized HEAPF32 from WebAssembly.Memory in asm.${prop} during continuous check.`); + break; + } + } + if (!window.Module.HEAPF32) { + console.warn('No WebAssembly.Memory found in asm properties during continuous check.'); + } + } else { + console.warn('No standard method found to initialize HEAPF32 during continuous check.'); + // Simplified deeper inspection during continuous check + console.log(`Check ${heapCheckAttempts}: Inspecting window.Module for memory buffers...`); + let foundBuffer = false; + for (const prop in window.Module) { + try { + if (window.Module[prop] && typeof window.Module[prop] === 'object') { + if (window.Module[prop] instanceof ArrayBuffer) { + window.Module.HEAPF32 = new Float32Array(window.Module[prop]); + console.log(`Initialized HEAPF32 from ArrayBuffer in Module.${prop} during check ${heapCheckAttempts}.`); + foundBuffer = true; + break; + } else if (window.Module[prop].buffer && window.Module[prop].buffer instanceof ArrayBuffer) { + window.Module.HEAPF32 = new Float32Array(window.Module[prop].buffer); + console.log(`Initialized HEAPF32 from buffer in Module.${prop}.buffer during check ${heapCheckAttempts}.`); + foundBuffer = true; + break; + } + } + } catch (e) { + console.error(`Error inspecting Module.${prop} during check ${heapCheckAttempts}:`, e.message); + } + } + if (!foundBuffer) { + console.log(`Check ${heapCheckAttempts}: No suitable memory buffer found in deep inspection.`); + } + } + } catch (e) { + console.error('Failed to initialize HEAPF32 during continuous check:', e.message); + } + } + if (window.Module.HEAPF32 || heapCheckAttempts >= 10) { + clearInterval(heapCheckInterval); + console.log(`Stopping HEAPF32 checks after ${heapCheckAttempts} attempts. Final status - HEAPF32 exists: ${!!window.Module.HEAPF32}`); + if (!window.Module.HEAPF32) { + console.error('HEAPF32 initialization failed after maximum attempts. Proceeding anyway to unblock UI.'); + } + if (!global.SherpaOnnx.isReady) { + global.SherpaOnnx.isReady = true; + console.log('SherpaOnnx readiness flag manually set to true after HEAPF32 check.'); + if (global.onModuleReady) { + console.log('Calling global.onModuleReady callback after HEAPF32 check.'); + global.onModuleReady(); + } + } + } + }, 500); // Check every 500ms up to 10 attempts (5 seconds) + setTimeout(() => { + if (!global.SherpaOnnx.isReady) { + console.error('onRuntimeInitialized still not triggered after extended delay. Manually setting readiness flag.'); + global.SherpaOnnx.isReady = true; + console.log('SherpaOnnx readiness flag manually set to true due to timeout.'); + if (!window.Module.HEAPF32) { + console.log('HEAPF32 still not available after timeout. Final attempt to initialize.'); + try { + if (window.Module.HEAP8) { + window.Module.HEAPF32 = new Float32Array(window.Module.HEAP8.buffer); + console.log('Initialized HEAPF32 from HEAP8 during final timeout check.'); + } else if (window.Module.asm && window.Module.asm.memory) { + window.Module.HEAPF32 = new Float32Array(window.Module.asm.memory.buffer); + console.log('Initialized HEAPF32 from WebAssembly memory during final timeout check.'); + } + } catch (e) { + console.error('Final attempt to initialize HEAPF32 failed:', e.message); + } + console.log(`Final status after timeout - HEAPF32 exists: ${!!window.Module.HEAPF32}`); + } + if (global.onModuleReady) { + console.log('Calling global.onModuleReady callback due to forced readiness.'); + global.onModuleReady(); + } + } + }, 10000); // Wait 10 seconds before forcing readiness + } + } else { + console.log('Module not defined at script load time. Setting up property trap...'); + console.log("Waiting for Module to be defined..."); + Object.defineProperty(global, 'Module', { + set: function(mod) { + console.log('Module being set. Capturing initialization...'); + console.log('Module properties at set:', Object.keys(mod).slice(0, 10), '... (first 10 shown)'); + this._Module = mod; + console.log("Module defined, waiting for runtime initialization"); + const originalOnRuntimeInitialized = mod.onRuntimeInitialized; + mod.onRuntimeInitialized = function() { + console.log("onRuntimeInitialized triggered from setter. SherpaOnnx Core module initialized."); + global.SherpaOnnx.isReady = true; // Custom readiness flag + console.log("SherpaOnnx readiness flag set to true from setter"); + if (originalOnRuntimeInitialized) { + console.log('Calling original onRuntimeInitialized callback from setter.'); + originalOnRuntimeInitialized(); + } + if (global.onModuleReady) { + console.log('Calling global.onModuleReady callback from setter.'); + global.onModuleReady(); + } + }; + console.log('onRuntimeInitialized hook set in setter. Waiting for initialization...'); + }, + get: function() { + return this._Module; + } + }); } // Configuration for SherpaOnnx diff --git a/wasm/combined/sherpa-onnx-enhancement.js b/wasm/combined/sherpa-onnx-combined-enhancement.js similarity index 100% rename from wasm/combined/sherpa-onnx-enhancement.js rename to wasm/combined/sherpa-onnx-combined-enhancement.js diff --git a/wasm/combined/sherpa-onnx-kws.js b/wasm/combined/sherpa-onnx-combined-kws.js similarity index 100% rename from wasm/combined/sherpa-onnx-kws.js rename to wasm/combined/sherpa-onnx-combined-kws.js diff --git a/wasm/combined/sherpa-onnx-speaker.js b/wasm/combined/sherpa-onnx-combined-speaker.js similarity index 100% rename from wasm/combined/sherpa-onnx-speaker.js rename to wasm/combined/sherpa-onnx-combined-speaker.js diff --git a/wasm/combined/sherpa-onnx-tts.js b/wasm/combined/sherpa-onnx-combined-tts.js similarity index 100% rename from wasm/combined/sherpa-onnx-tts.js rename to wasm/combined/sherpa-onnx-combined-tts.js diff --git a/wasm/combined/sherpa-onnx-vad.js b/wasm/combined/sherpa-onnx-combined-vad.js similarity index 100% rename from wasm/combined/sherpa-onnx-vad.js rename to wasm/combined/sherpa-onnx-combined-vad.js diff --git a/wasm/combined/sherpa-onnx-combined.js b/wasm/combined/sherpa-onnx-combined.js index ac37fc3a1c..283abbc941 100644 --- a/wasm/combined/sherpa-onnx-combined.js +++ b/wasm/combined/sherpa-onnx-combined.js @@ -28,13 +28,13 @@ // Define module paths relative to the base path const defaultModules = [ - 'sherpa-onnx-core.js', - 'sherpa-onnx-vad.js', - 'sherpa-onnx-asr.js', - 'sherpa-onnx-tts.js', - 'sherpa-onnx-speaker.js', - 'sherpa-onnx-enhancement.js', - 'sherpa-onnx-kws.js' + 'sherpa-onnx-combined-core.js', + 'sherpa-onnx-combined-vad.js', + 'sherpa-onnx-combined-asr.js', + 'sherpa-onnx-combined-tts.js', + 'sherpa-onnx-combined-speaker.js', + 'sherpa-onnx-combined-enhancement.js', + 'sherpa-onnx-combined-kws.js' ]; // Use custom module paths if provided, otherwise use defaults with base path @@ -198,25 +198,25 @@ // Main initialization function const initialize = function() { + console.log("initialize() function called. Starting module loading process."); // Browser environment: load scripts if (typeof window !== 'undefined') { - // Set up a backup timeout to ensure callback is called even if loading fails - const timeoutPromise = new Promise((resolve) => { - setTimeout(() => { - console.warn("Module loading timeout reached - some modules may not have loaded correctly"); - resolve(); - }, 30000); // 30 second timeout - }); - - // Load modules with timeout protection - Promise.race([loadModulesSequentially(), timeoutPromise]) + console.log("Browser environment detected. Proceeding to load modules sequentially."); + // Load modules sequentially and handle completion/errors + loadModulesSequentially() .catch(error => { console.error("Module loading failed:", error); - + // Ensure the callback is still called on failure, passing the error if (global.onSherpaOnnxReady) { - global.onSherpaOnnxReady(false, error); + console.log("Calling onSherpaOnnxReady with failure status due to error."); + // Determine if any modules loaded successfully before the error + let anyLoaded = Object.values(loadedModules).some(status => status === true); + let missingModules = modulePaths.filter(path => !loadedModules[path]); + global.onSherpaOnnxReady(anyLoaded && missingModules.length < modulePaths.length, error || missingModules); } }); + } else { + console.log("Non-browser environment detected. Skipping module loading."); } }; @@ -225,22 +225,88 @@ const originalOnRuntimeInitialized = global.Module.onRuntimeInitialized; global.Module.onRuntimeInitialized = function() { - console.log("WASM module runtime initialized, now loading JavaScript modules"); + console.log("WASM module runtime initialized, checking for full initialization including HEAPF32..."); if (originalOnRuntimeInitialized) { originalOnRuntimeInitialized(); } - initialize(); + // Wait for full initialization including HEAPF32 + let attempt = 0; + const checkHeapInterval = setInterval(() => { + attempt++; + console.log(`Attempt ${attempt}: Checking if HEAPF32 is available...`); + console.log(`global.Module.HEAPF32 exists: ${!!global.Module.HEAPF32}`); + if (global.Module.HEAPF32) { + console.log("HEAPF32 is available. Proceeding with JavaScript module initialization."); + clearInterval(checkHeapInterval); + initialize(); + } else if (attempt > 120) { // Wait up to 60 seconds (120 * 500ms) + console.error("HEAPF32 not available after 60 seconds. Proceeding anyway with potential issues."); + clearInterval(checkHeapInterval); + initialize(); + } + }, 500); }; } else { // No WASM module yet, set up a listener global.onModuleReady = function() { console.log("WASM module ready, proceeding with module initialization"); - initialize(); + // Ensure HEAPF32 is available before proceeding + if (global.Module && global.Module.HEAPF32) { + console.log("HEAPF32 confirmed available via onModuleReady."); + initialize(); + } else { + console.error("onModuleReady called but HEAPF32 not available. Waiting for initialization."); + let attempt = 0; + const readyCheckInterval = setInterval(() => { + attempt++; + console.log(`Ready check attempt ${attempt}: Waiting for HEAPF32...`); + if (global.Module && global.Module.HEAPF32) { + console.log("HEAPF32 now available. Proceeding with initialization."); + clearInterval(readyCheckInterval); + initialize(); + } else if (attempt > 120) { + console.error("HEAPF32 still not available after 60 seconds in onModuleReady. Proceeding with risk."); + clearInterval(readyCheckInterval); + initialize(); + } + }, 500); + } }; - // Also start loading anyway in case the event was missed - setTimeout(initialize, 1000); + // Since HEAPF32 availability was logged, check if it's already available and proceed + if (typeof global.Module !== 'undefined' && global.Module.HEAPF32) { + console.log("HEAPF32 already available. Triggering initialization immediately."); + initialize(); + } else { + console.log("Waiting for WASM module initialization or HEAPF32 availability before loading dependent scripts."); + // Force initialization after a short timeout if no response + console.log("Checking for HEAPF32 availability immediately for debugging."); + if (typeof global.Module !== 'undefined') { + console.log("Module exists. Current HEAPF32 status: ", !!global.Module.HEAPF32); + if (global.Module.HEAPF32) { + console.log("HEAPF32 detected. Proceeding with initialization NOW."); + initialize(); + } else { + console.log("No HEAPF32 yet. Waiting a very short period before forcing initialization."); + setTimeout(() => { + console.log("Immediate timeout reached. Forcing initialization regardless of HEAPF32 status to debug."); + if (typeof global.Module !== 'undefined') { + console.log("Module status at force: ", !!global.Module, "HEAPF32 status: ", !!global.Module.HEAPF32); + } else { + console.log("Module still not defined at force time."); + } + initialize(); + }, 1000); // Force after just 1 second for faster debugging + } + } else { + console.log("Module not yet defined. Waiting for it to appear."); + setTimeout(() => { + console.log("Secondary timeout reached. Forcing initialization regardless of status."); + initialize(); + }, 1000); // Force after just 1 second if Module isn't even defined + } + } } })(typeof window !== 'undefined' ? window : global); \ No newline at end of file From 365747a66feb8a3969c25d5277c7906cbc5bc530 Mon Sep 17 00:00:00 2001 From: Arthur Breton Date: Sat, 26 Apr 2025 21:47:20 +0800 Subject: [PATCH 9/9] docs: add ISSUE.md detailing the architectural limitations of the Sherpa-ONNX WASM combined module regarding shared context and HEAPF32 access failures --- wasm/combined/ISSUE.md | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 wasm/combined/ISSUE.md diff --git a/wasm/combined/ISSUE.md b/wasm/combined/ISSUE.md new file mode 100644 index 0000000000..cdd8bec6ae --- /dev/null +++ b/wasm/combined/ISSUE.md @@ -0,0 +1,42 @@ +# Sherpa-ONNX WASM Combined Module Issue: Inconsistent Shared Module Context & HEAPF32 Access Failure + +## Problem Description + +The core issue is a fundamental limitation in the current Sherpa-ONNX WASM combined module architecture: **it fails to establish a reliably shared and synchronized WebAssembly (WASM) runtime context across the multiple, sequentially loaded JavaScript component files** (`sherpa-onnx-combined-core.js`, `sherpa-onnx-combined-asr.js`, etc.). Specifically, essential JavaScript views onto the WASM memory, like `HEAPF32`, are not consistently accessible across these script boundaries. + +### Background: WASM Memory and HEAP Views + +- **WASM Linear Memory**: WebAssembly modules operate on a contiguous block of memory. +- **Emscripten HEAP Views**: To allow JavaScript to interact with this memory, Emscripten (the compiler used) creates typed array views (e.g., `Float32Array`, `Int8Array`) pointing directly into this memory block. These views are assigned to the global `Module` object as properties like `Module.HEAPF32`, `Module.HEAP8`, `Module.HEAPU8`, etc. +- **Initialization**: These `HEAP*` views are crucial for JS-WASM communication. They are normally initialized by the main Emscripten glue code (`sherpa-onnx-wasm-combined.js` in this case) *after* the WASM memory buffer is allocated but *before* or *during* the `Module.onRuntimeInitialized` callback, signifying the runtime is ready. + +### Detailed Explanation of the Failure + +1. **Context/Scope Separation & HEAP Inaccessibility**: Despite all scripts referencing the global `window.Module`, they appear to operate within distinct execution contexts. Crucially, the standard `HEAP*` memory views (especially `HEAPF32`, essential for ASR audio data transfer) that *should* be initialized on `window.Module` by the main glue code are **not accessible or visible** within the context of the subsequently loaded component scripts (e.g., `sherpa-onnx-combined-core.js`). The repeated log messages `No suitable memory buffer found` and `HEAPF32 exists: false` from within `sherpa-onnx-combined-core.js` are direct evidence of this failure. + +2. **Sequential Loading Barrier**: The architecture loads functional components (ASR, VAD, etc.) as separate JS files *after* the main WASM module and its memory are expected to initialize. This sequential loading creates context boundaries that prevent the component scripts from accessing the already initialized `HEAP*` views on the `Module` object. + +3. **Initialization Callbacks Ineffective Across Contexts**: Callbacks like `onRuntimeInitialized` might fire in the main glue code's context, but this readiness state (including the availability of initialized `HEAP*` views) does not reliably propagate to the separate contexts of the component scripts. + +4. **Runtime Errors**: Consequently, operations requiring direct JS interaction with WASM memory via these views fail. For example, `OnlineStream.acceptWaveform` in ASR needs to write to `HEAPF32`. Since `HEAPF32` is inaccessible in the `asr.js` or `core.js` context, this fails, leading to downstream errors like `TypeError: asr.createStream is not a function` (as the recognizer likely failed during its own initialization which might require memory access). + +5. **Selective Functionality Failure (Evidence)**: Functionalities like TTS (`tts.html`) appear less affected. This suggests their JS-WASM interaction pattern doesn't critically rely on the *JavaScript context* having direct write access to `HEAPF32` in the same way streaming ASR does, further supporting that the issue is specific to the accessibility of these memory views across script contexts. + +### Impact + +- **Unreliable Functionality**: Core features requiring JS access to WASM memory views (like streaming ASR via `HEAPF32`) fail reliably. +- **Debugging Dead End**: Standard synchronization techniques are ineffective because the fundamental issue is the inaccessibility of necessary `HEAP*` views due to context separation. + +### Architectural Root Cause + +The multi-file JavaScript approach, combined with Emscripten's standard output, fails to guarantee that the essential `HEAP*` memory views initialized on the `Module` object are accessible from the separate JavaScript files loaded later. Each script effectively gets a view of the `Module` object that might lack these critical, dynamically initialized properties. + +### Potential Solutions + +1. **Unified Script (Likely Viable but with Drawbacks)**: Combine *all* JavaScript glue code (core, ASR, VAD, TTS, etc.) and the main Emscripten `Module` interaction into a **single, large JavaScript file**. This forces all code into the same execution context, ensuring consistent access to the initialized `Module` object and its `HEAP*` views. **Drawback**: Creates a potentially very large initial JS file, impacting load performance. + +2. **WASM Module Re-architecture (Complex)**: Fundamentally change how the C++ code is compiled, perhaps using Emscripten features explicitly designed for better JS module interoperability (e.g., `MODULARIZE=1`, ES6 modules output) that might handle state sharing differently. This likely requires significant changes to the build process and C++/JS interface. + +3. ~~Delayed Functionality Binding~~ (Proven Ineffective): Delaying execution doesn't solve the problem that the necessary `HEAP*` views are fundamentally inaccessible from within the component script contexts. + +This issue highlights a significant architectural challenge. The **Unified Script** approach appears the most practical path forward within the existing build system, despite performance implications.