Skip to content

Commit 16b5a17

Browse files
authored
Merge pull request #11 from royshil/roy.add_file_save_output_option
Add file save output option
2 parents 4341653 + 251e793 commit 16b5a17

File tree

2 files changed

+77
-30
lines changed

2 files changed

+77
-30
lines changed

src/transcription-filter-data.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ struct transcription_filter_data {
5858
std::mutex *text_source_mutex = nullptr;
5959
// Callback to set the text in the output text source (subtitles)
6060
std::function<void(const std::string &str)> setTextCallback;
61+
// Output file path to write the subtitles
62+
std::string output_file_path;
6163

6264
// Use std for thread and mutex
6365
std::thread whisper_thread;

src/transcription-filter.cpp

Lines changed: 75 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "model-utils/model-downloader.h"
99

1010
#include <algorithm>
11+
#include <fstream>
1112

1213
inline enum speaker_layout convert_speaker_layout(uint8_t channels)
1314
{
@@ -176,31 +177,38 @@ void acquire_weak_text_source_ref(struct transcription_filter_data *gf)
176177

177178
void set_text_callback(struct transcription_filter_data *gf, const std::string &str)
178179
{
179-
if (!gf->text_source_mutex) {
180-
obs_log(LOG_ERROR, "text_source_mutex is null");
181-
return;
182-
}
180+
if (gf->output_file_path != "" && !gf->text_source_name) {
181+
// Write to file, do not append
182+
std::ofstream output_file(gf->output_file_path, std::ios::out | std::ios::trunc);
183+
output_file << str;
184+
output_file.close();
185+
} else {
186+
if (!gf->text_source_mutex) {
187+
obs_log(LOG_ERROR, "text_source_mutex is null");
188+
return;
189+
}
183190

184-
if (!gf->text_source) {
185-
// attempt to acquire a weak ref to the text source if it's yet available
186-
acquire_weak_text_source_ref(gf);
187-
}
191+
if (!gf->text_source) {
192+
// attempt to acquire a weak ref to the text source if it's yet available
193+
acquire_weak_text_source_ref(gf);
194+
}
188195

189-
std::lock_guard<std::mutex> lock(*gf->text_source_mutex);
196+
std::lock_guard<std::mutex> lock(*gf->text_source_mutex);
190197

191-
if (!gf->text_source) {
192-
obs_log(LOG_ERROR, "text_source is null");
193-
return;
194-
}
195-
auto target = obs_weak_source_get_source(gf->text_source);
196-
if (!target) {
197-
obs_log(LOG_ERROR, "text_source target is null");
198-
return;
198+
if (!gf->text_source) {
199+
obs_log(LOG_ERROR, "text_source is null");
200+
return;
201+
}
202+
auto target = obs_weak_source_get_source(gf->text_source);
203+
if (!target) {
204+
obs_log(LOG_ERROR, "text_source target is null");
205+
return;
206+
}
207+
auto text_settings = obs_source_get_settings(target);
208+
obs_data_set_string(text_settings, "text", str.c_str());
209+
obs_source_update(target, text_settings);
210+
obs_source_release(target);
199211
}
200-
auto text_settings = obs_source_get_settings(target);
201-
obs_data_set_string(text_settings, "text", str.c_str());
202-
obs_source_update(target, text_settings);
203-
obs_source_release(target);
204212
};
205213

206214
void transcription_filter_update(void *data, obs_data_t *s)
@@ -218,8 +226,9 @@ void transcription_filter_update(void *data, obs_data_t *s)
218226
const char *new_text_source_name = obs_data_get_string(s, "subtitle_sources");
219227
obs_weak_source_t *old_weak_text_source = NULL;
220228

221-
if (strcmp(new_text_source_name, "none") == 0 ||
222-
strcmp(new_text_source_name, "(null)") == 0 || strcmp(new_text_source_name, "") == 0) {
229+
if (new_text_source_name == nullptr || strcmp(new_text_source_name, "none") == 0 ||
230+
strcmp(new_text_source_name, "(null)") == 0 ||
231+
strcmp(new_text_source_name, "text_file") == 0 || strlen(new_text_source_name) == 0) {
223232
// new selected text source is not valid, release the old one
224233
if (gf->text_source) {
225234
if (!gf->text_source_mutex) {
@@ -234,6 +243,15 @@ void transcription_filter_update(void *data, obs_data_t *s)
234243
bfree(gf->text_source_name);
235244
gf->text_source_name = nullptr;
236245
}
246+
gf->output_file_path = "";
247+
if (strcmp(new_text_source_name, "text_file") == 0) {
248+
// set the output file path
249+
const char *output_file_path =
250+
obs_data_get_string(s, "subtitle_output_filename");
251+
if (output_file_path != nullptr && strlen(output_file_path) > 0) {
252+
gf->output_file_path = output_file_path;
253+
}
254+
}
237255
} else {
238256
// new selected text source is valid, check if it's different from the old one
239257
if (gf->text_source_name == nullptr ||
@@ -248,6 +266,11 @@ void transcription_filter_update(void *data, obs_data_t *s)
248266
old_weak_text_source = gf->text_source;
249267
gf->text_source = nullptr;
250268
}
269+
if (gf->text_source_name) {
270+
// free the old text source name
271+
bfree(gf->text_source_name);
272+
gf->text_source_name = nullptr;
273+
}
251274
gf->text_source_name = bstrdup(new_text_source_name);
252275
}
253276
}
@@ -343,7 +366,7 @@ void transcription_filter_update(void *data, obs_data_t *s)
343366
void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
344367
{
345368
struct transcription_filter_data *gf = static_cast<struct transcription_filter_data *>(
346-
bmalloc(sizeof(struct transcription_filter_data)));
369+
bzalloc(sizeof(struct transcription_filter_data)));
347370

348371
// Get the number of channels for the input source
349372
gf->channels = audio_output_get_channels(obs_get_audio());
@@ -364,7 +387,7 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
364387
}
365388

366389
gf->context = filter;
367-
gf->whisper_model_path = obs_data_get_string(settings, "whisper_model_path");
390+
gf->whisper_model_path = std::string(obs_data_get_string(settings, "whisper_model_path"));
368391
gf->whisper_context = init_whisper_context(gf->whisper_model_path);
369392
if (gf->whisper_context == nullptr) {
370393
obs_log(LOG_ERROR, "Failed to load whisper model");
@@ -395,6 +418,7 @@ void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)
395418
gf->text_source_mutex = new std::mutex();
396419
gf->text_source = nullptr;
397420
gf->text_source_name = bstrdup(obs_data_get_string(settings, "subtitle_sources"));
421+
gf->output_file_path = std::string("");
398422

399423
obs_log(gf->log_level, "transcription_filter: run update");
400424
// get the settings updated on the filter data struct
@@ -430,7 +454,7 @@ void transcription_filter_deactivate(void *data)
430454
void transcription_filter_defaults(obs_data_t *s)
431455
{
432456
obs_data_set_default_bool(s, "vad_enabled", true);
433-
obs_data_set_default_int(s, "log_level", LOG_DEBUG);
457+
obs_data_set_default_int(s, "log_level", LOG_INFO);
434458
obs_data_set_default_bool(s, "log_words", true);
435459
obs_data_set_default_string(s, "whisper_model_path", "models/ggml-tiny.en.bin");
436460
obs_data_set_default_string(s, "whisper_language_select", "en");
@@ -474,13 +498,34 @@ obs_properties_t *transcription_filter_properties(void *data)
474498
obs_property_list_add_int(list, "WARNING", LOG_WARNING);
475499
obs_properties_add_bool(ppts, "log_words", "Log output words");
476500

477-
obs_property_t *sources =
478-
obs_properties_add_list(ppts, "subtitle_sources", "Subtitles Text Source",
501+
obs_property_t *subs_output =
502+
obs_properties_add_list(ppts, "subtitle_sources", "Subtitles Output",
479503
OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING);
480504
// Add "none" option
481-
obs_property_list_add_string(sources, "None / No output", "none");
505+
obs_property_list_add_string(subs_output, "None / No output", "none");
506+
obs_property_list_add_string(subs_output, "Text File output", "text_file");
482507
// Add text sources
483-
obs_enum_sources(add_sources_to_list, sources);
508+
obs_enum_sources(add_sources_to_list, subs_output);
509+
510+
obs_properties_add_path(ppts, "subtitle_output_filename", "Output filename",
511+
OBS_PATH_FILE_SAVE, "Text (*.txt)", NULL);
512+
513+
obs_property_set_modified_callback(subs_output, [](obs_properties_t *props,
514+
obs_property_t *property,
515+
obs_data_t *settings) {
516+
UNUSED_PARAMETER(property);
517+
const char *new_output = obs_data_get_string(settings, "subtitle_sources");
518+
if (strcmp(new_output, "text_file") == 0) {
519+
// Show the output filename selection input
520+
obs_property_set_visible(
521+
obs_properties_get(props, "subtitle_output_filename"), true);
522+
} else {
523+
// Hide the output filename selection input
524+
obs_property_set_visible(
525+
obs_properties_get(props, "subtitle_output_filename"), false);
526+
}
527+
return true;
528+
});
484529

485530
// Add a list of available whisper models to download
486531
obs_property_t *whisper_models_list =

0 commit comments

Comments
 (0)