Skip to content

Commit 0245023

Browse files
authored
refactor: Update version to 0.3.5 and clear current caption in transc… (#164)
* refactor: Update version to 0.3.5 and clear current caption in transcription filter callbacks * feat: Refactor whisper-processing.cpp for improved VAD segmentation and token buffer thread * feat: Update prebuilt Whispercpp version to 0.0.6 * refactor: Remove trailing whitespace in translation-language-utils.h * refactor: Add case-insensitive flag to regex in set_text_callback The code change adds the `std::regex_constants::icase` flag to the regex used in the `set_text_callback` function in `transcription-filter-callbacks.cpp`. This allows for case-insensitive matching when replacing filter words in the `str_copy` string. Refactor the code to improve VAD segmentation and token buffer thread in whisper-processing.cpp The code change refactors the `whisper-processing.cpp` file to improve the VAD (Voice Activity Detection) segmentation and token buffer thread. This aims to enhance the performance and accuracy of the transcription filtering process. refactor: Add prepopulated filter options and corresponding map entries in FilterReplaceDialog The code change adds prepopulated filter options, such as "English Swear Words," "English Hallucinations," and "Korean Hallucinations," to the `FilterReplaceDialog` UI. It also adds the corresponding map entries to the `filter_words_replace` map, allowing users to easily add predefined filter patterns and replacement values. refactor: Update version to 0.3.5 and clear current caption in transcription filter callbacks The code change updates the version to 0.3.5 and clears the current caption in the transcription filter callbacks. This ensures that the correct version is displayed and any previous captions are removed. refactor: Remove trailing whitespace in translation-language-utils.h The code change removes trailing whitespace in the `translation-language-utils.h` file, improving code readability and consistency.
1 parent abe678b commit 0245023

12 files changed

+138
-47
lines changed

buildspec.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
},
3939
"name": "obs-localvocal",
4040
"displayName": "OBS Localvocal",
41-
"version": "0.3.4",
41+
"version": "0.3.5",
4242
"author": "Roy Shilkrot",
4343
"website": "https://github.com/occ-ai/obs-localvocal",
4444
"email": "[email protected]",

cmake/BuildWhispercpp.cmake

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
include(ExternalProject)
22
include(FetchContent)
33

4-
set(PREBUILT_WHISPERCPP_VERSION "0.0.5")
4+
set(PREBUILT_WHISPERCPP_VERSION "0.0.6")
55
set(PREBUILT_WHISPERCPP_URL_BASE
66
"https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/${PREBUILT_WHISPERCPP_VERSION}")
77

88
if(APPLE)
99
# check the "MACOS_ARCH" env var to figure out if this is x86 or arm64
1010
if($ENV{MACOS_ARCH} STREQUAL "x86_64")
11-
set(WHISPER_CPP_HASH "da61500b9a37f8630b9e4ed49bc3fe7858729d7a28a2e80bf6cfa4cb97523546")
11+
set(WHISPER_CPP_HASH "454abee900a96a0a10a91f631ff797bdbdf2df0d2a819479a409634c9be1e12c")
1212
elseif($ENV{MACOS_ARCH} STREQUAL "arm64")
13-
set(WHISPER_CPP_HASH "ef1e2628ba09414c0848d58c471440f38b8393cb5d428edf82b9e78aeeecdd15")
13+
set(WHISPER_CPP_HASH "f726388cc494f6fca864c860af6c1bc2932c3dc823ef92197b1e29f088425668")
1414
else()
1515
message(
1616
FATAL_ERROR
@@ -54,13 +54,13 @@ elseif(WIN32)
5454
set(WHISPER_CPP_URL
5555
"${PREBUILT_WHISPERCPP_URL_BASE}/whispercpp-windows-${ARCH_PREFIX}-${PREBUILT_WHISPERCPP_VERSION}.zip")
5656
if(${ACCELERATION} STREQUAL "cpu")
57-
set(WHISPER_CPP_HASH "2b1cfa0dd764132c4cde60e112a8e6328d28d158d91a8845080baa3e9d2dcdcd")
57+
set(WHISPER_CPP_HASH "126c5d859e902b4cd0f2cd09304a68750f1dbc6a7aa62e280cfd56c51a6a1c95")
5858
add_compile_definitions("LOCALVOCAL_WITH_CPU")
5959
elseif(${ACCELERATION} STREQUAL "cuda")
60-
set(WHISPER_CPP_HASH "011e813742fddf0911c4a36d2080d7a388cf78738081297088e7d50023e4f9bc")
60+
set(WHISPER_CPP_HASH "5b9592c311a7f1612894ca0b36f6bd4effb6a46acd03d33924df56c52f566779")
6161
add_compile_definitions("LOCALVOCAL_WITH_CUDA")
6262
elseif(${ACCELERATION} STREQUAL "hipblas")
63-
set(WHISPER_CPP_HASH "f2980d6cd3df9cac464378d26d2c19d827bcac995c8d0398a39230a9be936013")
63+
set(WHISPER_CPP_HASH "c306ecce16cd10f377fdefbf7bb252abac8e6638a2637f82b1f1f32dd2cb4e39")
6464
add_compile_definitions("LOCALVOCAL_WITH_HIPBLAS")
6565
else()
6666
message(

src/transcription-filter-callbacks.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,8 @@ void set_text_callback(struct transcription_filter_data *gf,
218218
for (const auto &filter_words : gf->filter_words_replace) {
219219
// if filter exists within str_copy, replace it with the replacement
220220
str_copy = std::regex_replace(str_copy,
221-
std::regex(std::get<0>(filter_words)),
221+
std::regex(std::get<0>(filter_words),
222+
std::regex_constants::icase),
222223
std::get<1>(filter_words));
223224
}
224225
// if the text was modified, log the original and modified text
@@ -322,7 +323,7 @@ void recording_state_callback(enum obs_frontend_event event, void *data)
322323
}
323324
}
324325

325-
void reset_caption_state(transcription_filter_data *gf_)
326+
void clear_current_caption(transcription_filter_data *gf_)
326327
{
327328
if (gf_->captions_monitor.isEnabled()) {
328329
gf_->captions_monitor.clear();
@@ -336,6 +337,12 @@ void reset_caption_state(transcription_filter_data *gf_)
336337
gf_->translation_ctx.last_input_tokens.clear();
337338
gf_->translation_ctx.last_translation_tokens.clear();
338339
gf_->last_transcription_sentence.clear();
340+
gf_->cleared_last_sub = true;
341+
}
342+
343+
void reset_caption_state(transcription_filter_data *gf_)
344+
{
345+
clear_current_caption(gf_);
339346
// flush the buffer
340347
{
341348
std::lock_guard<std::mutex> lock(gf_->whisper_buf_mutex);

src/transcription-filter-callbacks.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ void audio_chunk_callback(struct transcription_filter_data *gf, const float *pcm
1717
void set_text_callback(struct transcription_filter_data *gf,
1818
const DetectionResultWithText &resultIn);
1919

20+
void clear_current_caption(transcription_filter_data *gf_);
21+
2022
void recording_state_callback(enum obs_frontend_event event, void *data);
2123

2224
void media_play_callback(void *data_, calldata_t *cd);

src/transcription-filter-data.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ struct transcription_filter_audio_info {
152152

153153
// Callback sent when the transcription has a new result
154154
void set_text_callback(struct transcription_filter_data *gf, const DetectionResultWithText &str);
155+
void clear_current_caption(transcription_filter_data *gf_);
155156

156157
// Callback sent when the VAD finds an audio chunk. Sample rate = WHISPER_SAMPLE_RATE, channels = 1
157158
// The audio chunk is in 32-bit float format

src/transcription-filter-properties.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -622,7 +622,7 @@ void transcription_filter_defaults(obs_data_t *s)
622622
obs_data_set_default_double(s, "thold_ptsum", 0.01);
623623
obs_data_set_default_int(s, "max_len", 0);
624624
obs_data_set_default_bool(s, "split_on_word", true);
625-
obs_data_set_default_int(s, "max_tokens", 0);
625+
obs_data_set_default_int(s, "max_tokens", 50);
626626
obs_data_set_default_bool(s, "suppress_blank", false);
627627
obs_data_set_default_bool(s, "suppress_non_speech_tokens", true);
628628
obs_data_set_default_double(s, "temperature", 0.1);

src/transcription-filter.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,7 @@ void transcription_filter_update(void *data, obs_data_t *s)
396396
gf->whisper_params.temperature = (float)obs_data_get_double(s, "temperature");
397397
gf->whisper_params.max_initial_ts = (float)obs_data_get_double(s, "max_initial_ts");
398398
gf->whisper_params.length_penalty = (float)obs_data_get_double(s, "length_penalty");
399+
gf->whisper_params.no_timestamps = true;
399400

400401
if (gf->vad) {
401402
const float vad_threshold = (float)obs_data_get_double(s, "vad_threshold");

src/translation/translation-language-utils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@
55

66
std::string remove_start_punctuation(const std::string &text);
77

8-
#endif // TRANSLATION_LANGUAGE_UTILS_H
8+
#endif // TRANSLATION_LANGUAGE_UTILS_H

src/ui/filter-replace-dialog.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ FilterReplaceDialog::FilterReplaceDialog(QWidget *parent, transcription_filter_d
2727
// connect edit triggers
2828
connect(ui->tableWidget, &QTableWidget::itemChanged, this,
2929
&FilterReplaceDialog::editFilter);
30+
// connect toolButton_addPrepopulatedFilter
31+
connect(ui->toolButton_addPrepopulatedFilter, &QToolButton::clicked, this,
32+
&FilterReplaceDialog::addPrepopulatedFilter);
3033
}
3134

3235
FilterReplaceDialog::~FilterReplaceDialog()
@@ -73,3 +76,28 @@ void FilterReplaceDialog::editFilter(QTableWidgetItem *item)
7376
// use the row number to update the filter_words_replace map
7477
ctx->filter_words_replace[item->row()] = std::make_tuple(key, value);
7578
}
79+
80+
void FilterReplaceDialog::addPrepopulatedFilter()
81+
{
82+
// add a prepopulated filter_words_replace map entry
83+
// check the value of the comboBox_selectPrepopulatedFilter
84+
// and add the corresponding filter_words_replace map entry
85+
std::string replace_value = "";
86+
std::string replace_pattern;
87+
const std::string selected =
88+
ui->comboBox_selectPrepopulatedFilter->currentText().toStdString();
89+
if (selected == "English Swear Words") {
90+
replace_pattern = "(fuck|shit|bitch|cunt|cock|dick|pussy)";
91+
replace_value = "****";
92+
} else if (selected == "English Hallucinations") {
93+
replace_pattern = "(Thank you|Thanks for watching|Please subscribe)";
94+
} else if (selected == "Korean Hallucinations") {
95+
replace_pattern = "MBC.*";
96+
}
97+
ctx->filter_words_replace.push_back(std::make_tuple(replace_pattern, replace_value));
98+
ui->tableWidget->insertRow(ui->tableWidget->rowCount());
99+
ui->tableWidget->setItem(ui->tableWidget->rowCount() - 1, 0,
100+
new QTableWidgetItem(QString::fromStdString(replace_pattern)));
101+
ui->tableWidget->setItem(ui->tableWidget->rowCount() - 1, 1,
102+
new QTableWidgetItem(QString::fromStdString(replace_value)));
103+
}

src/ui/filter-replace-dialog.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ private slots:
2525
void addFilter();
2626
void removeFilter();
2727
void editFilter(QTableWidgetItem *item);
28+
void addPrepopulatedFilter();
2829
};
2930

3031
#endif // FILTERREPLACEDIALOG_H

0 commit comments

Comments
 (0)