21
21
#include " whisper-utils/whisper-utils.h"
22
22
#include " whisper-utils/whisper-model-utils.h"
23
23
#include " translation/language_codes.h"
24
+ #include " translation/cloud-translation/translation-cloud.h"
24
25
25
26
void send_caption_to_source (const std::string &target_source_name, const std::string &caption,
26
27
struct transcription_filter_data *gf)
@@ -80,30 +81,60 @@ std::string send_sentence_to_translation(const std::string &sentence,
80
81
return " " ;
81
82
}
82
83
84
+ void send_sentence_to_cloud_translation_async (const std::string &sentence,
85
+ struct transcription_filter_data *gf,
86
+ const std::string &source_language,
87
+ std::function<void (const std::string &)> callback)
88
+ {
89
+ std::thread ([sentence, gf, source_language, callback]() {
90
+ const std::string last_text = gf->last_text_for_cloud_translation ;
91
+ gf->last_text_for_cloud_translation = sentence;
92
+ if (gf->translate_cloud && !sentence.empty ()) {
93
+ obs_log (gf->log_level , " Translating text with cloud provider %s. %s -> %s" ,
94
+ gf->translate_cloud_provider .c_str (), source_language.c_str (),
95
+ gf->translate_cloud_target_language .c_str ());
96
+ std::string translated_text;
97
+ if (sentence == last_text) {
98
+ // do not translate the same sentence twice
99
+ callback (gf->last_text_cloud_translation );
100
+ return ;
101
+ }
102
+ CloudTranslatorConfig config;
103
+ config.provider = gf->translate_cloud_provider ;
104
+ config.access_key = gf->translate_cloud_api_key ;
105
+ config.secret_key = gf->translate_cloud_secret_key ;
106
+ config.free = gf->translate_cloud_deepl_free ;
107
+ config.region = gf->translate_cloud_region ;
108
+
109
+ translated_text = translate_cloud (config, sentence,
110
+ gf->translate_cloud_target_language ,
111
+ source_language);
112
+ if (!translated_text.empty ()) {
113
+ if (gf->log_words ) {
114
+ obs_log (LOG_INFO, " Cloud Translation: '%s' -> '%s'" ,
115
+ sentence.c_str (), translated_text.c_str ());
116
+ }
117
+ gf->last_text_translation = translated_text;
118
+ callback (translated_text);
119
+ return ;
120
+ } else {
121
+ obs_log (gf->log_level , " Failed to translate text" );
122
+ }
123
+ }
124
+ callback (" " );
125
+ }).detach ();
126
+ }
127
+
83
128
void send_sentence_to_file (struct transcription_filter_data *gf,
84
- const DetectionResultWithText &result, const std::string &str_copy ,
85
- const std::string &translated_sentence )
129
+ const DetectionResultWithText &result, const std::string &sentence ,
130
+ const std::string &file_path, bool bump_sentence_number )
86
131
{
87
132
// Check if we should save the sentence
88
133
if (gf->save_only_while_recording && !obs_frontend_recording_active ()) {
89
134
// We are not recording, do not save the sentence to file
90
135
return ;
91
136
}
92
137
93
- std::string translated_file_path = " " ;
94
- bool write_translations = gf->translate && !translated_sentence.empty ();
95
-
96
- // if translation is enabled, save the translated sentence to another file
97
- if (write_translations) {
98
- // add a postfix to the file name (without extension) with the translation target language
99
- std::string output_file_path = gf->output_file_path ;
100
- std::string file_extension =
101
- output_file_path.substr (output_file_path.find_last_of (" ." ) + 1 );
102
- std::string file_name =
103
- output_file_path.substr (0 , output_file_path.find_last_of (" ." ));
104
- translated_file_path = file_name + " _" + gf->target_lang + " ." + file_extension;
105
- }
106
-
107
138
// should the file be truncated?
108
139
std::ios_base::openmode openmode = std::ios::out;
109
140
if (gf->truncate_output_file ) {
@@ -114,15 +145,9 @@ void send_sentence_to_file(struct transcription_filter_data *gf,
114
145
if (!gf->save_srt ) {
115
146
// Write raw sentence to file
116
147
try {
117
- std::ofstream output_file (gf-> output_file_path , openmode);
118
- output_file << str_copy << std::endl;
148
+ std::ofstream output_file (file_path , openmode);
149
+ output_file << sentence << std::endl;
119
150
output_file.close ();
120
- if (write_translations) {
121
- std::ofstream translated_output_file (translated_file_path,
122
- openmode);
123
- translated_output_file << translated_sentence << std::endl;
124
- translated_output_file.close ();
125
- }
126
151
} catch (const std::ofstream::failure &e) {
127
152
obs_log (LOG_ERROR, " Exception opening/writing/closing file: %s" , e.what ());
128
153
}
@@ -133,9 +158,9 @@ void send_sentence_to_file(struct transcription_filter_data *gf,
133
158
}
134
159
135
160
obs_log (gf->log_level , " Saving sentence to file %s, sentence #%d" ,
136
- gf-> output_file_path .c_str (), gf->sentence_number );
161
+ file_path .c_str (), gf->sentence_number );
137
162
// Append sentence to file in .srt format
138
- std::ofstream output_file (gf-> output_file_path , openmode);
163
+ std::ofstream output_file (file_path , openmode);
139
164
output_file << gf->sentence_number << std::endl;
140
165
// use the start and end timestamps to calculate the start and end time in srt format
141
166
auto format_ts_for_srt = [](std::ofstream &output_stream, uint64_t ts) {
@@ -156,28 +181,34 @@ void send_sentence_to_file(struct transcription_filter_data *gf,
156
181
format_ts_for_srt (output_file, result.end_timestamp_ms );
157
182
output_file << std::endl;
158
183
159
- output_file << str_copy << std::endl;
184
+ output_file << sentence << std::endl;
160
185
output_file << std::endl;
161
186
output_file.close ();
162
187
163
- if (write_translations) {
164
- obs_log (gf->log_level , " Saving translation to file %s, sentence #%d" ,
165
- translated_file_path.c_str (), gf->sentence_number );
166
-
167
- // Append translated sentence to file in .srt format
168
- std::ofstream translated_output_file (translated_file_path, openmode);
169
- translated_output_file << gf->sentence_number << std::endl;
170
- format_ts_for_srt (translated_output_file, result.start_timestamp_ms );
171
- translated_output_file << " --> " ;
172
- format_ts_for_srt (translated_output_file, result.end_timestamp_ms );
173
- translated_output_file << std::endl;
174
-
175
- translated_output_file << translated_sentence << std::endl;
176
- translated_output_file << std::endl;
177
- translated_output_file.close ();
188
+ if (bump_sentence_number) {
189
+ gf->sentence_number ++;
178
190
}
191
+ }
192
+ }
179
193
180
- gf->sentence_number ++;
194
+ void send_translated_sentence_to_file (struct transcription_filter_data *gf,
195
+ const DetectionResultWithText &result,
196
+ const std::string &translated_sentence,
197
+ const std::string &target_lang)
198
+ {
199
+ // if translation is enabled, save the translated sentence to another file
200
+ if (translated_sentence.empty ()) {
201
+ obs_log (gf->log_level , " Translation is empty, not saving to file" );
202
+ } else {
203
+ // add a postfix to the file name (without extension) with the translation target language
204
+ std::string translated_file_path = " " ;
205
+ std::string output_file_path = gf->output_file_path ;
206
+ std::string file_extension =
207
+ output_file_path.substr (output_file_path.find_last_of (" ." ) + 1 );
208
+ std::string file_name =
209
+ output_file_path.substr (0 , output_file_path.find_last_of (" ." ));
210
+ translated_file_path = file_name + " _" + target_lang + " ." + file_extension;
211
+ send_sentence_to_file (gf, result, translated_sentence, translated_file_path, false );
181
212
}
182
213
}
183
214
@@ -235,41 +266,76 @@ void set_text_callback(struct transcription_filter_data *gf,
235
266
}
236
267
}
237
268
238
- bool should_translate =
269
+ bool should_translate_local =
239
270
gf->translate_only_full_sentences ? result.result == DETECTION_RESULT_SPEECH : true ;
240
271
241
272
// send the sentence to translation (if enabled)
242
- std::string translated_sentence =
243
- should_translate ? send_sentence_to_translation (str_copy, gf, result.language ) : " " ;
273
+ std::string translated_sentence_local =
274
+ should_translate_local ? send_sentence_to_translation (str_copy, gf, result.language )
275
+ : " " ;
244
276
245
277
if (gf->translate ) {
246
278
if (gf->translation_output == " none" ) {
247
279
// overwrite the original text with the translated text
248
- str_copy = translated_sentence ;
280
+ str_copy = translated_sentence_local ;
249
281
} else {
250
282
if (gf->buffered_output ) {
251
283
// buffered output - add the sentence to the monitor
252
284
gf->translation_monitor .addSentenceFromStdString (
253
- translated_sentence ,
285
+ translated_sentence_local ,
254
286
get_time_point_from_ms (result.start_timestamp_ms ),
255
287
get_time_point_from_ms (result.end_timestamp_ms ),
256
288
result.result == DETECTION_RESULT_PARTIAL);
257
289
} else {
258
290
// non-buffered output - send the sentence to the selected source
259
- send_caption_to_source (gf->translation_output , translated_sentence,
260
- gf);
291
+ send_caption_to_source (gf->translation_output ,
292
+ translated_sentence_local, gf);
261
293
}
262
294
}
295
+ if (gf->save_to_file && gf->output_file_path != " " ) {
296
+ send_translated_sentence_to_file (gf, result, translated_sentence_local,
297
+ gf->target_lang );
298
+ }
263
299
}
264
300
265
- if (gf->buffered_output ) {
266
- gf->captions_monitor .addSentenceFromStdString (
267
- str_copy, get_time_point_from_ms (result.start_timestamp_ms ),
268
- get_time_point_from_ms (result.end_timestamp_ms ),
269
- result.result == DETECTION_RESULT_PARTIAL);
270
- } else {
271
- // non-buffered output - send the sentence to the selected source
272
- send_caption_to_source (gf->text_source_name , str_copy, gf);
301
+ bool should_translate_cloud = (gf->translate_cloud_only_full_sentences
302
+ ? result.result == DETECTION_RESULT_SPEECH
303
+ : true ) &&
304
+ gf->translate_cloud ;
305
+
306
+ if (should_translate_cloud) {
307
+ send_sentence_to_cloud_translation_async (
308
+ str_copy, gf, result.language ,
309
+ [gf, result](const std::string &translated_sentence_cloud) {
310
+ if (gf->translate_cloud_output != " none" ) {
311
+ send_caption_to_source (gf->translate_cloud_output ,
312
+ translated_sentence_cloud, gf);
313
+ } else {
314
+ // overwrite the original text with the translated text
315
+ send_caption_to_source (gf->text_source_name ,
316
+ translated_sentence_cloud, gf);
317
+ }
318
+ if (gf->save_to_file && gf->output_file_path != " " ) {
319
+ send_translated_sentence_to_file (
320
+ gf, result, translated_sentence_cloud,
321
+ gf->translate_cloud_target_language );
322
+ }
323
+ });
324
+ }
325
+
326
+ // send the original text to the output
327
+ // unless the translation is enabled and set to overwrite the original text
328
+ if (!((should_translate_cloud && gf->translate_cloud_output == " none" ) ||
329
+ (should_translate_local && gf->translation_output == " none" ))) {
330
+ if (gf->buffered_output ) {
331
+ gf->captions_monitor .addSentenceFromStdString (
332
+ str_copy, get_time_point_from_ms (result.start_timestamp_ms ),
333
+ get_time_point_from_ms (result.end_timestamp_ms ),
334
+ result.result == DETECTION_RESULT_PARTIAL);
335
+ } else {
336
+ // non-buffered output - send the sentence to the selected source
337
+ send_caption_to_source (gf->text_source_name , str_copy, gf);
338
+ }
273
339
}
274
340
275
341
if (gf->caption_to_stream && result.result == DETECTION_RESULT_SPEECH) {
@@ -279,7 +345,7 @@ void set_text_callback(struct transcription_filter_data *gf,
279
345
280
346
if (gf->save_to_file && gf->output_file_path != " " &&
281
347
result.result == DETECTION_RESULT_SPEECH) {
282
- send_sentence_to_file (gf, result, str_copy, translated_sentence );
348
+ send_sentence_to_file (gf, result, str_copy, gf-> output_file_path , true );
283
349
}
284
350
285
351
if (!result.text .empty () && (result.result == DETECTION_RESULT_SPEECH ||
0 commit comments