diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h index abfdb40dfc..04b0accd34 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h @@ -375,7 +375,9 @@ template class ModelHostObject : public JsiHostObject { // We need to dispatch a thread if we want the function to be // asynchronous. In this thread all accesses to jsi::Runtime need to // be done via the callInvoker. - threads::GlobalThreadPool::detach([this, promise, + threads::GlobalThreadPool::detach([model = this->model, + callInvoker = this->callInvoker, + promise, argsConverted = std::move(argsConverted)]() { try { diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp index d645d6afa3..f0f4108543 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp @@ -35,8 +35,14 @@ TokenIdsWithAttentionMask TextEmbeddings::preprocess(const std::string &input) { return {.inputIds = inputIds64, .attentionMask = attentionMask}; } +void TextEmbeddings::unload() noexcept { + std::scoped_lock lock(inference_mutex_); + BaseModel::unload(); +} + std::shared_ptr TextEmbeddings::generate(const std::string input) { + std::scoped_lock lock(inference_mutex_); auto preprocessed = preprocess(input); std::vector tokenIdsShape = { diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h index 28dacca365..93d0988c04 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h @@ -1,6 +1,7 @@ #pragma once #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" +#include #include #include @@ -20,8 +21,10 @@ class TextEmbeddings final : public BaseEmbeddings { [[nodiscard( "Registered non-void function")]] std::shared_ptr generate(const std::string input); + void unload() noexcept; private: + mutable std::mutex inference_mutex_; std::vector> inputShapes; TokenIdsWithAttentionMask preprocess(const std::string &input); std::unique_ptr tokenizer; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp index 64e94c2ff0..10aff5cc77 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/llm/LLM.cpp @@ -20,7 +20,7 @@ using executorch::runtime::Error; LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource, std::vector capabilities, std::shared_ptr callInvoker) - : BaseModel(modelSource, callInvoker, Module::LoadMode::File) { + : BaseModel(modelSource, callInvoker, Module::LoadMode::Mmap) { if (capabilities.empty()) { runner_ = @@ -42,8 +42,12 @@ LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource, throw RnExecutorchError(loadResult, "Failed to load LLM runner"); } - memorySizeLowerBound = fs::file_size(fs::path(modelSource)) + - fs::file_size(fs::path(tokenizerSource)); + // I am purposefully not adding file size of the model here. The reason is + // that Hermes would crash the app if we try to alloc too much memory here. + // Also, given we're using mmap, the true memory consumption of a model is not + // really equal to the size of the model. The size of the tokenizer file is a + // hint to the GC that this object might be worth getting rid of. + memorySizeLowerBound = fs::file_size(fs::path(tokenizerSource)); } std::string LLM::generate(std::string input, diff --git a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.cpp b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.cpp index e8de58b708..22ad6f2ad8 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.cpp @@ -58,6 +58,7 @@ std::shared_ptr TextToImage::generate(std::string input, int32_t imageSize, size_t numInferenceSteps, int32_t seed, std::shared_ptr callback) { + std::scoped_lock lock(inference_mutex_); setImageSize(imageSize); setSeed(seed); @@ -137,6 +138,7 @@ size_t TextToImage::getMemoryLowerBound() const noexcept { } void TextToImage::unload() noexcept { + std::scoped_lock lock(inference_mutex_); encoder->unload(); unet->unload(); decoder->unload(); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.h b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.h index 18316217cd..e071a0c2ee 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/TextToImage.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -49,6 +50,7 @@ class TextToImage final { static constexpr float guidanceScale = 7.5f; static constexpr float latentsScale = 0.18215f; bool interrupted = false; + mutable std::mutex inference_mutex_; std::shared_ptr callInvoker; std::unique_ptr scheduler; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.cpp index a1252edfee..49971cba8c 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.cpp @@ -54,8 +54,14 @@ VoiceActivityDetection::preprocess(std::span waveform) const { return frameBuffer; } +void VoiceActivityDetection::unload() noexcept { + std::scoped_lock lock(inference_mutex_); + BaseModel::unload(); +} + std::vector VoiceActivityDetection::generate(std::span waveform) const { + std::scoped_lock lock(inference_mutex_); auto windowedInput = preprocess(waveform); auto [chunksNumber, remainder] = std::div( diff --git a/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h b/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h index e692889305..c756bb6d3c 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" @@ -23,7 +24,11 @@ class VoiceActivityDetection : public BaseModel { [[nodiscard("Registered non-void function")]] std::vector generate(std::span waveform) const; + void unload() noexcept; + private: + mutable std::mutex inference_mutex_; + std::vector> preprocess(std::span waveform) const; std::vector postprocess(const std::vector &scores, diff --git a/packages/react-native-executorch/package.json b/packages/react-native-executorch/package.json index ba8cb30bb6..2e76688f90 100644 --- a/packages/react-native-executorch/package.json +++ b/packages/react-native-executorch/package.json @@ -1,6 +1,6 @@ { "name": "react-native-executorch", - "version": "0.8.2", + "version": "0.8.3", "description": "An easy way to run AI models in React Native with ExecuTorch", "source": "./src/index.ts", "main": "./lib/module/index.js",