From cd31498cdc4844a14e94ca943d7c6a12dbc8a8cf Mon Sep 17 00:00:00 2001 From: Bartosz Hanc Date: Tue, 7 Apr 2026 19:06:49 +0200 Subject: [PATCH 1/5] feat: add inference mutex for thread safety in VAD, Text Embeddings and Text-to-Image --- .../rnexecutorch/models/embeddings/text/TextEmbeddings.cpp | 6 ++++++ .../rnexecutorch/models/embeddings/text/TextEmbeddings.h | 3 +++ .../rnexecutorch/models/text_to_image/TextToImage.cpp | 2 ++ .../common/rnexecutorch/models/text_to_image/TextToImage.h | 2 ++ .../voice_activity_detection/VoiceActivityDetection.h | 1 + 5 files changed, 14 insertions(+) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp index d645d6afa3..0c9b997914 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp @@ -35,8 +35,14 @@ TokenIdsWithAttentionMask TextEmbeddings::preprocess(const std::string &input) { return {.inputIds = inputIds64, .attentionMask = attentionMask}; } +void TextEmbeddings::unload() noexcept { + std::scoped_lock lock(generate_mutex_); + BaseModel::unload(); +} + std::shared_ptr TextEmbeddings::generate(const std::string input) { + std::scoped_lock lock(generate_mutex_); auto preprocessed = preprocess(input); std::vector tokenIdsShape = { diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h index 28dacca365..8f2b8ea72b 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h @@ -1,6 +1,7 @@ #pragma once #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" +#include #include #include @@ -20,8 +21,10 @@ class TextEmbeddings final : public BaseEmbeddings { [[nodiscard( "Registered non-void function")]] std::shared_ptr generate(const std::string input); + void unload() noexcept; private: + mutable std::mutex generate_mutex_; std::vector> inputShapes; TokenIdsWithAttentionMask preprocess(const std::string &input); std::unique_ptr tokenizer; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.cpp b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.cpp index e8de58b708..568f7738f8 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.cpp @@ -58,6 +58,7 @@ std::shared_ptr TextToImage::generate(std::string input, int32_t imageSize, size_t numInferenceSteps, int32_t seed, std::shared_ptr callback) { + std::scoped_lock lock(generate_mutex_); setImageSize(imageSize); setSeed(seed); @@ -137,6 +138,7 @@ size_t TextToImage::getMemoryLowerBound() const noexcept { } void TextToImage::unload() noexcept { + std::scoped_lock lock(generate_mutex_); encoder->unload(); unet->unload(); decoder->unload(); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.h b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.h index 18316217cd..1b69bc58f4 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -49,6 +50,7 @@ class TextToImage final { static constexpr float guidanceScale = 7.5f; static constexpr float latentsScale = 0.18215f; bool interrupted = false; + mutable std::mutex generate_mutex_; std::shared_ptr callInvoker; std::unique_ptr scheduler; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h b/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h index c756bb6d3c..6cb39852d9 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h @@ -23,6 +23,7 @@ class VoiceActivityDetection : public BaseModel { std::shared_ptr callInvoker); [[nodiscard("Registered non-void function")]] std::vector generate(std::span waveform) const; + void unload() noexcept; void unload() noexcept; From c82605bfa5e6adc3e9a18500359398697c59c70a Mon Sep 17 00:00:00 2001 From: Bartosz Hanc Date: Tue, 7 Apr 2026 19:28:51 +0200 Subject: [PATCH 2/5] refactor: rename generate_mutex_ to inference_mutex_ --- .../rnexecutorch/models/embeddings/text/TextEmbeddings.cpp | 4 ++-- .../rnexecutorch/models/embeddings/text/TextEmbeddings.h | 2 +- .../common/rnexecutorch/models/text_to_image/TextToImage.cpp | 4 ++-- .../common/rnexecutorch/models/text_to_image/TextToImage.h | 2 +- .../models/voice_activity_detection/VoiceActivityDetection.h | 1 - 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp index 0c9b997914..f0f4108543 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp @@ -36,13 +36,13 @@ TokenIdsWithAttentionMask TextEmbeddings::preprocess(const std::string &input) { } void TextEmbeddings::unload() noexcept { - std::scoped_lock lock(generate_mutex_); + std::scoped_lock lock(inference_mutex_); BaseModel::unload(); } std::shared_ptr TextEmbeddings::generate(const std::string input) { - std::scoped_lock lock(generate_mutex_); + std::scoped_lock lock(inference_mutex_); auto preprocessed = preprocess(input); std::vector tokenIdsShape = { diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h index 8f2b8ea72b..93d0988c04 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h @@ -24,7 +24,7 @@ class TextEmbeddings final : public BaseEmbeddings { void unload() noexcept; private: - mutable std::mutex generate_mutex_; + mutable std::mutex inference_mutex_; std::vector> inputShapes; TokenIdsWithAttentionMask preprocess(const std::string &input); std::unique_ptr tokenizer; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.cpp b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.cpp index 568f7738f8..22ad6f2ad8 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.cpp @@ -58,7 +58,7 @@ std::shared_ptr TextToImage::generate(std::string input, int32_t imageSize, size_t numInferenceSteps, int32_t seed, std::shared_ptr callback) { - std::scoped_lock lock(generate_mutex_); + std::scoped_lock lock(inference_mutex_); setImageSize(imageSize); setSeed(seed); @@ -138,7 +138,7 @@ size_t TextToImage::getMemoryLowerBound() const noexcept { } void TextToImage::unload() noexcept { - std::scoped_lock lock(generate_mutex_); + std::scoped_lock lock(inference_mutex_); encoder->unload(); unet->unload(); decoder->unload(); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.h b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.h index 1b69bc58f4..e071a0c2ee 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.h @@ -50,7 +50,7 @@ class TextToImage final { static constexpr float guidanceScale = 7.5f; static constexpr float latentsScale = 0.18215f; bool interrupted = false; - mutable std::mutex generate_mutex_; + mutable std::mutex inference_mutex_; std::shared_ptr callInvoker; std::unique_ptr scheduler; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h b/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h index 6cb39852d9..5bdcbd9352 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h @@ -29,7 +29,6 @@ class VoiceActivityDetection : public BaseModel { private: mutable std::mutex inference_mutex_; - std::vector> preprocess(std::span waveform) const; std::vector postprocess(const std::vector &scores, From bb52a43daf1fc98ddc017e3bf0a8b0e29af64db3 Mon Sep 17 00:00:00 2001 From: Bartosz Hanc Date: Tue, 7 Apr 2026 20:09:07 +0200 Subject: [PATCH 3/5] fix: capture model and callInvoker by value in GlobalThreadPool detach --- .../common/rnexecutorch/host_objects/ModelHostObject.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h index abfdb40dfc..04b0accd34 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h @@ -375,7 +375,9 @@ template class ModelHostObject : public JsiHostObject { // We need to dispatch a thread if we want the function to be // asynchronous. In this thread all accesses to jsi::Runtime need to // be done via the callInvoker. - threads::GlobalThreadPool::detach([this, promise, + threads::GlobalThreadPool::detach([model = this->model, + callInvoker = this->callInvoker, + promise, argsConverted = std::move(argsConverted)]() { try { From eb00055c7e7c4a8c670e0eb566c0af6b14bfda12 Mon Sep 17 00:00:00 2001 From: Bartosz Hanc Date: Wed, 8 Apr 2026 11:40:11 +0200 Subject: [PATCH 4/5] refactor: remove duplicate unload() declaration in VoiceActivityDetection --- .../models/voice_activity_detection/VoiceActivityDetection.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h b/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h index 5bdcbd9352..4b1c1ed163 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h @@ -25,10 +25,9 @@ class VoiceActivityDetection : public BaseModel { generate(std::span waveform) const; void unload() noexcept; - void unload() noexcept; - private: mutable std::mutex inference_mutex_; + std::vector> preprocess(std::span waveform) const; std::vector postprocess(const std::vector &scores, From dcad33b6adde67fb5a07fc0f091a45e9f0125fcb Mon Sep 17 00:00:00 2001 From: Bartosz Hanc Date: Wed, 8 Apr 2026 11:40:58 +0200 Subject: [PATCH 5/5] refactor: add newline --- .../models/voice_activity_detection/VoiceActivityDetection.h | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h b/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h index 4b1c1ed163..c756bb6d3c 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h @@ -23,6 +23,7 @@ class VoiceActivityDetection : public BaseModel { std::shared_ptr callInvoker); [[nodiscard("Registered non-void function")]] std::vector generate(std::span waveform) const; + void unload() noexcept; private: