mybigday
diff --git a/‎android/src/main/java/com/rnwhisper/RNWhisper.java
Lines changed: 9 additions & 0 deletions b/‎android/src/main/java/com/rnwhisper/RNWhisper.java
Lines changed: 9 additions & 0 deletions
diff --git a/‎android/src/main/java/com/rnwhisper/WhisperContext.java
Lines changed: 5 additions & 0 deletions b/‎android/src/main/java/com/rnwhisper/WhisperContext.java
Lines changed: 5 additions & 0 deletions
diff --git a/‎android/src/main/jni.cpp
Lines changed: 13 additions & 0 deletions b/‎android/src/main/jni.cpp
Lines changed: 13 additions & 0 deletions
diff --git a/‎android/src/newarch/java/com/rnwhisper/RNWhisperModule.java
Lines changed: 5 additions & 0 deletions b/‎android/src/newarch/java/com/rnwhisper/RNWhisperModule.java
Lines changed: 5 additions & 0 deletions
diff --git a/‎android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java
Lines changed: 5 additions & 0 deletions b/‎android/src/oldarch/java/com/rnwhisper/RNWhisperModule.java
Lines changed: 5 additions & 0 deletions
diff --git a/‎cpp/ggml-metal.m
Lines changed: 24 additions & 2 deletions b/‎cpp/ggml-metal.m
Lines changed: 24 additions & 2 deletions
diff --git a/‎cpp/rn-whisper.cpp
Lines changed: 91 additions & 0 deletions b/‎cpp/rn-whisper.cpp
Lines changed: 91 additions & 0 deletions
diff --git a/‎cpp/rn-whisper.h
Lines changed: 2 additions & 0 deletions b/‎cpp/rn-whisper.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎cpp/whisper.cpp
Lines changed: 33 additions & 10 deletions b/‎cpp/whisper.cpp
Lines changed: 33 additions & 10 deletions
diff --git a/‎cpp/whisper.h
Lines changed: 18 additions & 0 deletions b/‎cpp/whisper.h
Lines changed: 18 additions & 0 deletions
@@ -235,6 +235,15 @@ protected void onPostExecute(Void result) {
     tasks.put(task, "abortTranscribe-" + id);
   }
 
+  public void bench(double id, double nThreads, Promise promise) {
+    final WhisperContext context = contexts.get((int) id);
+    if (context == null) {
+      promise.reject("Context not found");
+      return;
+    }
+    promise.resolve(context.bench((int) nThreads));
+  }
+
   public void releaseContext(double id, Promise promise) {
     final int contextId = (int) id;
     AsyncTask task = new AsyncTask<Void, Void, Void>() {
 
@@ -423,6 +423,10 @@ public void stopCurrentTranscribe() {
     stopTranscribe(this.jobId);
   }
 
+  public String bench(int n_threads) {
+    return bench(context, n_threads);
+  }
+
   public void release() {
     stopCurrentTranscribe();
     freeContext(context);
@@ -527,4 +531,5 @@ protected static native int fullWithJob(
     int slice_index,
     int n_samples
   );
+  protected static native String bench(long context, int n_threads);
 }
@@ -508,4 +508,17 @@ Java_com_rnwhisper_WhisperContext_getTextSegmentSpeakerTurnNext(
     return whisper_full_get_segment_speaker_turn_next(context, index);
 }
 
+JNIEXPORT jstring JNICALL
+Java_com_rnwhisper_WhisperContext_bench(
+    JNIEnv *env,
+    jobject thiz,
+    jlong context_ptr,
+    jint n_threads
+) {
+    UNUSED(thiz);
+    struct whisper_context *context = reinterpret_cast<struct whisper_context *>(context_ptr);
+    std::string result = rnwhisper::bench(context, n_threads);
+    return env->NewStringUTF(result.c_str());
+}
+
 } // extern "C"
@@ -57,6 +57,11 @@ public void abortTranscribe(double contextId, double jobId, Promise promise) {
     rnwhisper.abortTranscribe(contextId, jobId, promise);
   }
 
+  @ReactMethod
+  public void bench(double id, double nThreads, Promise promise) {
+    rnwhisper.bench(id, nThreads, promise);
+  }
+
   @ReactMethod
   public void releaseContext(double id, Promise promise) {
     rnwhisper.releaseContext(id, promise);
 
@@ -57,6 +57,11 @@ public void abortTranscribe(double contextId, double jobId, Promise promise) {
     rnwhisper.abortTranscribe(contextId, jobId, promise);
   }
 
+  @ReactMethod
+  public void bench(double id, double nThreads, Promise promise) {
+    rnwhisper.bench(id, nThreads, promise);
+  }
+
   @ReactMethod
   public void releaseContext(double id, Promise promise) {
     rnwhisper.releaseContext(id, promise);
 
@@ -76,6 +76,7 @@ static void wsp_ggml_backend_metal_device_rel(struct wsp_ggml_backend_metal_devi
     ctx->mtl_device_ref_count--;
 
     if (ctx->mtl_device_ref_count == 0) {
+        [ctx->mtl_device release];
         ctx->mtl_device = nil;
     }
 }
@@ -520,8 +521,10 @@ @implementation WSPGGMLMetalClass
             struct wsp_ggml_metal_kernel * kernel = &ctx->kernels[e]; \
             id<MTLFunction> metal_function = [metal_library newFunctionWithName:@"kernel_"#name]; \
             kernel->pipeline = [device newComputePipelineStateWithFunction:metal_function error:&error]; \
+            [metal_function release]; \
             if (error) { \
                 WSP_GGML_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
+                [metal_library release]; \
                 return NULL; \
             } \
         } else { \
@@ -723,12 +726,24 @@ @implementation WSPGGMLMetalClass
         WSP_GGML_METAL_ADD_KERNEL(WSP_GGML_METAL_KERNEL_TYPE_POOL_2D_MAX_F32,               pool_2d_max_f32,                true);
     }
 
+    [metal_library release];
+
     return ctx;
 }
 
 static void wsp_ggml_metal_free(struct wsp_ggml_backend_metal_context * ctx) {
     WSP_GGML_LOG_INFO("%s: deallocating\n", __func__);
 
+    for (int i = 0; i < WSP_GGML_METAL_KERNEL_TYPE_COUNT; ++i) {
+        [ctx->kernels[i].pipeline release];
+    }
+
+    Block_release(ctx->encode_async);
+
+    [ctx->queue release];
+
+    dispatch_release(ctx->d_queue);
+
     free(ctx);
 }
 
@@ -3241,6 +3256,9 @@ static enum wsp_ggml_status wsp_ggml_metal_graph_compute(
 static void wsp_ggml_backend_metal_buffer_free_buffer(wsp_ggml_backend_buffer_t buffer) {
     struct wsp_ggml_backend_metal_buffer_context * ctx = (struct wsp_ggml_backend_metal_buffer_context *)buffer->context;
 
+    for (int i = 0; i < ctx->n_buffers; i++) {
+        [ctx->buffers[i].metal release];
+    }
     wsp_ggml_backend_metal_device_rel(buffer->buft->device->context);
 
     if (ctx->owned) {
@@ -3534,7 +3552,11 @@ static void wsp_ggml_backend_metal_set_n_cb(wsp_ggml_backend_t backend, int n_cb
         }
     }
 
-    ctx->encode_async = ^(size_t iter) {
+    if (ctx->encode_async) {
+        Block_release(ctx->encode_async);
+    }
+
+    ctx->encode_async = Block_copy(^(size_t iter) {
         const int cb_idx = iter;
         const int n_cb_l = ctx->n_cb;
 
@@ -3573,7 +3595,7 @@ static void wsp_ggml_backend_metal_set_n_cb(wsp_ggml_backend_t backend, int n_cb
         if (cb_idx < 2 || ctx->abort_callback == NULL) {
             [command_buffer commit];
         }
-    };
+    });
 }
 
 static struct wsp_ggml_backend_i wsp_ggml_backend_metal_i = {
 
@@ -8,6 +8,97 @@
 
 namespace rnwhisper {
 
+const char * system_info(void) {
+  static std::string s;
+  s = "";
+  if (wsp_ggml_cpu_has_avx() == 1) s += "AVX ";
+  if (wsp_ggml_cpu_has_avx2() == 1) s += "AVX2 ";
+  if (wsp_ggml_cpu_has_avx512() == 1) s += "AVX512 ";
+  if (wsp_ggml_cpu_has_fma() == 1) s += "FMA ";
+  if (wsp_ggml_cpu_has_neon() == 1) s += "NEON ";
+  if (wsp_ggml_cpu_has_arm_fma() == 1) s += "ARM_FMA ";
+  if (wsp_ggml_cpu_has_metal() == 1) s += "METAL ";
+  if (wsp_ggml_cpu_has_f16c() == 1) s += "F16C ";
+  if (wsp_ggml_cpu_has_fp16_va() == 1) s += "FP16_VA ";
+  if (wsp_ggml_cpu_has_blas() == 1) s += "BLAS ";
+  if (wsp_ggml_cpu_has_sse3() == 1) s += "SSE3 ";
+  if (wsp_ggml_cpu_has_ssse3() == 1) s += "SSSE3 ";
+  if (wsp_ggml_cpu_has_vsx() == 1) s += "VSX ";
+#ifdef WHISPER_USE_COREML
+  s += "COREML ";
+#endif
+  s.erase(s.find_last_not_of(" ") + 1);
+  return s.c_str();
+}
+
+std::string bench(struct whisper_context * ctx, int n_threads) {
+    const int n_mels = whisper_model_n_mels(ctx);
+
+    if (int ret = whisper_set_mel(ctx, nullptr, 0, n_mels)) {
+        return "error: failed to set mel: " + std::to_string(ret);
+    }
+    // heat encoder
+    if (int ret = whisper_encode(ctx, 0, n_threads) != 0) {
+        return "error: failed to encode: " + std::to_string(ret);
+    }
+
+    whisper_token tokens[512];
+    memset(tokens, 0, sizeof(tokens));
+
+    // prompt heat
+    if (int ret = whisper_decode(ctx, tokens, 256, 0, n_threads) != 0) {
+        return "error: failed to decode: " + std::to_string(ret);
+    }
+
+    // text-generation heat
+    if (int ret = whisper_decode(ctx, tokens, 1, 256, n_threads) != 0) {
+        return "error: failed to decode: " + std::to_string(ret);
+    }
+
+    whisper_reset_timings(ctx);
+
+    // actual run
+    if (int ret = whisper_encode(ctx, 0, n_threads) != 0) {
+        return "error: failed to encode: " + std::to_string(ret);
+    }
+
+    // text-generation
+    for (int i = 0; i < 256; i++) {
+        if (int ret = whisper_decode(ctx, tokens, 1, i, n_threads) != 0) {
+            return "error: failed to decode: " + std::to_string(ret);
+        }
+    }
+
+    // batched decoding
+    for (int i = 0; i < 64; i++) {
+        if (int ret = whisper_decode(ctx, tokens, 5, 0, n_threads) != 0) {
+            return "error: failed to decode: " + std::to_string(ret);
+        }
+    }
+
+    // prompt processing
+    for (int i = 0; i < 16; i++) {
+        if (int ret = whisper_decode(ctx, tokens, 256, 0, n_threads) != 0) {
+            return "error: failed to decode: " + std::to_string(ret);
+        }
+    }
+
+    const struct whisper_timings * timings = whisper_get_timings(ctx);
+
+    const int32_t n_encode = std::max(1, timings->n_encode);
+    const int32_t n_decode = std::max(1, timings->n_decode);
+    const int32_t n_batchd = std::max(1, timings->n_batchd);
+    const int32_t n_prompt = std::max(1, timings->n_prompt);
+
+    return std::string("[") +
+        "\"" + system_info() + "\"," +
+        std::to_string(n_threads) + "," +
+        std::to_string(1e-3f * timings->t_encode_us / n_encode) + "," +
+        std::to_string(1e-3f * timings->t_decode_us / n_decode) + "," +
+        std::to_string(1e-3f * timings->t_batchd_us / n_batchd) + "," +
+        std::to_string(1e-3f * timings->t_prompt_us / n_prompt) + "]";
+}
+
 void high_pass_filter(std::vector<float> & data, float cutoff, float sample_rate) {
     const float rc = 1.0f / (2.0f * M_PI * cutoff);
     const float dt = 1.0f / sample_rate;
 
@@ -9,6 +9,8 @@
 
 namespace rnwhisper {
 
+std::string bench(whisper_context * ctx, int n_threads);
+
 struct vad_params {
     bool use_vad = false;
     float vad_thold = 0.6f;
 
@@ -4190,28 +4190,51 @@ whisper_token whisper_token_transcribe(struct whisper_context * ctx) {
     return ctx->vocab.token_transcribe;
 }
 
+struct whisper_timings * whisper_get_timings(struct whisper_context * ctx) {
+    if (ctx->state == nullptr) {
+        return nullptr;
+    }
+    return new whisper_timings {
+        .load_us = ctx->t_load_us,
+        .t_start_us = ctx->t_start_us,
+        .fail_p = ctx->state->n_fail_p,
+        .fail_h = ctx->state->n_fail_h,
+        .t_mel_us = ctx->state->t_mel_us,
+        .n_sample = ctx->state->n_sample,
+        .n_encode = ctx->state->n_encode,
+        .n_decode = ctx->state->n_decode,
+        .n_batchd = ctx->state->n_batchd,
+        .n_prompt = ctx->state->n_prompt,
+        .t_sample_us = ctx->state->t_sample_us,
+        .t_encode_us = ctx->state->t_encode_us,
+        .t_decode_us = ctx->state->t_decode_us,
+        .t_batchd_us = ctx->state->t_batchd_us,
+        .t_prompt_us = ctx->state->t_prompt_us,
+    };
+}
+
 void whisper_print_timings(struct whisper_context * ctx) {
     const int64_t t_end_us = wsp_ggml_time_us();
+    const struct whisper_timings * timings = whisper_get_timings(ctx);
 
     WHISPER_LOG_INFO("\n");
-    WHISPER_LOG_INFO("%s:     load time = %8.2f ms\n", __func__, ctx->t_load_us / 1000.0f);
+    WHISPER_LOG_INFO("%s:     load time = %8.2f ms\n", __func__, timings->load_us / 1000.0f);
     if (ctx->state != nullptr) {
-
         const int32_t n_sample = std::max(1, ctx->state->n_sample);
         const int32_t n_encode = std::max(1, ctx->state->n_encode);
         const int32_t n_decode = std::max(1, ctx->state->n_decode);
         const int32_t n_batchd = std::max(1, ctx->state->n_batchd);
         const int32_t n_prompt = std::max(1, ctx->state->n_prompt);
 
-        WHISPER_LOG_INFO("%s:     fallbacks = %3d p / %3d h\n", __func__, ctx->state->n_fail_p, ctx->state->n_fail_h);
-        WHISPER_LOG_INFO("%s:      mel time = %8.2f ms\n", __func__, ctx->state->t_mel_us / 1000.0f);
-        WHISPER_LOG_INFO("%s:   sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_sample_us, n_sample, 1e-3f * ctx->state->t_sample_us / n_sample);
-        WHISPER_LOG_INFO("%s:   encode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_encode_us, n_encode, 1e-3f * ctx->state->t_encode_us / n_encode);
-        WHISPER_LOG_INFO("%s:   decode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_decode_us, n_decode, 1e-3f * ctx->state->t_decode_us / n_decode);
-        WHISPER_LOG_INFO("%s:   batchd time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_batchd_us, n_batchd, 1e-3f * ctx->state->t_batchd_us / n_batchd);
-        WHISPER_LOG_INFO("%s:   prompt time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * ctx->state->t_prompt_us, n_prompt, 1e-3f * ctx->state->t_prompt_us / n_prompt);
+        WHISPER_LOG_INFO("%s:     fallbacks = %3d p / %3d h\n", __func__, timings->fail_p, timings->fail_h);
+        WHISPER_LOG_INFO("%s:      mel time = %8.2f ms\n", __func__, timings->t_mel_us/1000.0f);
+        WHISPER_LOG_INFO("%s:   sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings->t_sample_us, n_sample, 1e-3f * timings->t_sample_us / n_sample);
+        WHISPER_LOG_INFO("%s:   encode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings->t_encode_us, n_encode, 1e-3f * timings->t_encode_us / n_encode);
+        WHISPER_LOG_INFO("%s:   decode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings->t_decode_us, n_decode, 1e-3f * timings->t_decode_us / n_decode);
+        WHISPER_LOG_INFO("%s:   batchd time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings->t_batchd_us, n_batchd, 1e-3f * timings->t_batchd_us / n_batchd);
+        WHISPER_LOG_INFO("%s:   prompt time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f * timings->t_prompt_us, n_prompt, 1e-3f * timings->t_prompt_us / n_prompt);
     }
-    WHISPER_LOG_INFO("%s:    total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0f);
+    WHISPER_LOG_INFO("%s:    total time = %8.2f ms\n", __func__, (t_end_us - timings->t_start_us)/1000.0f);
 }
 
 void whisper_reset_timings(struct whisper_context * ctx) {
 
@@ -424,6 +424,24 @@ extern "C" {
     WHISPER_API whisper_token whisper_token_transcribe(struct whisper_context * ctx);
 
     // Performance information from the default state.
+    struct whisper_timings {
+        int64_t load_us;
+        int64_t t_start_us;
+        int32_t fail_p;
+        int32_t fail_h;
+        int64_t t_mel_us;
+        int32_t n_sample;
+        int32_t n_encode;
+        int32_t n_decode;
+        int32_t n_batchd;
+        int32_t n_prompt;
+        int64_t t_sample_us;
+        int64_t t_encode_us;
+        int64_t t_decode_us;
+        int64_t t_batchd_us;
+        int64_t t_prompt_us;
+    };
+    WHISPER_API struct whisper_timings * whisper_get_timings(struct whisper_context * ctx);
     WHISPER_API void whisper_print_timings(struct whisper_context * ctx);
     WHISPER_API void whisper_reset_timings(struct whisper_context * ctx);