Skip to content

Commit

Permalink
Merge pull request ggerganov#4 from bobqianic/update
Browse files Browse the repository at this point in the history
Update main
  • Loading branch information
bobqianic authored Feb 2, 2024
2 parents 6a2674c + 6766747 commit e2e5177
Showing 1 changed file with 11 additions and 3 deletions.
14 changes: 11 additions & 3 deletions examples/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,10 @@ struct whisper_params {
int32_t best_of = whisper_full_default_params(WHISPER_SAMPLING_GREEDY).greedy.best_of;
int32_t beam_size = whisper_full_default_params(WHISPER_SAMPLING_BEAM_SEARCH).beam_search.beam_size;

float word_thold = 0.01f;
float entropy_thold = 2.40f;
float logprob_thold = -1.00f;
float word_thold = 0.01f;
float entropy_thold = 2.40f;
float logprob_thold = -1.00f;
float no_speech_thold = 0.60f;

bool speed_up = false;
bool debug_mode = false;
Expand All @@ -91,6 +92,7 @@ struct whisper_params {
bool print_colors = false;
bool print_progress = false;
bool no_timestamps = false;
bool suppress_nst = true; // suppress non speech tokens
bool log_score = false;
bool use_gpu = true;

Expand Down Expand Up @@ -140,8 +142,10 @@ bool whisper_params_parse(int argc, const char ** argv, whisper_params & params)
else if (arg == "-wt" || arg == "--word-thold") { params.word_thold = std::stof(argv[++i]); }
else if (arg == "-et" || arg == "--entropy-thold") { params.entropy_thold = std::stof(argv[++i]); }
else if (arg == "-lpt" || arg == "--logprob-thold") { params.logprob_thold = std::stof(argv[++i]); }
else if (arg == "-nst" || arg == "--nospeech-thold") { params.no_speech_thold = std::stof(argv[++i]); }
// else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
else if (arg == "-debug"|| arg == "--debug-mode") { params.debug_mode = true; }
else if (arg == "-snst" || arg == "--suppress-nst") { params.suppress_nst = true; }
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
else if (arg == "-di" || arg == "--diarize") { params.diarize = true; }
else if (arg == "-tdrz" || arg == "--tinydiarize") { params.tinydiarize = true; }
Expand Down Expand Up @@ -199,8 +203,10 @@ void whisper_print_usage(int /*argc*/, const char ** argv, const whisper_params
fprintf(stderr, " -wt N, --word-thold N [%-7.2f] word timestamp probability threshold\n", params.word_thold);
fprintf(stderr, " -et N, --entropy-thold N [%-7.2f] entropy threshold for decoder fail\n", params.entropy_thold);
fprintf(stderr, " -lpt N, --logprob-thold N [%-7.2f] log probability threshold for decoder fail\n", params.logprob_thold);
fprintf(stderr, " -nst N, --nospeech-thold N [%-7.2f] no-speech threshold for decoder fail\n", params.no_speech_thold);
// fprintf(stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n", params.speed_up ? "true" : "false");
fprintf(stderr, " -debug, --debug-mode [%-7s] enable debug mode (eg. dump log_mel)\n", params.debug_mode ? "true" : "false");
fprintf(stderr, " -snst, --suppress-nst [%-7s] suppress non-speech tokens\n", params.suppress_nst ? "true" : "false");
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
fprintf(stderr, " -di, --diarize [%-7s] stereo audio diarization\n", params.diarize ? "true" : "false");
fprintf(stderr, " -tdrz, --tinydiarize [%-7s] enable tinydiarize (requires a tdrz model)\n", params.tinydiarize ? "true" : "false");
Expand Down Expand Up @@ -1025,8 +1031,10 @@ int run(int argc, const char ** argv) {
wparams.temperature_inc = params.no_fallback ? 0.0f : wparams.temperature_inc;
wparams.entropy_thold = params.entropy_thold;
wparams.logprob_thold = params.logprob_thold;
wparams.no_speech_thold = params.no_speech_thold;

wparams.no_timestamps = params.no_timestamps;
wparams.suppress_non_speech_tokens = params.suppress_nst;

whisper_print_user_data user_data = { &params, &pcmf32s, 0 };

Expand Down

0 comments on commit e2e5177

Please sign in to comment.