Skip to content

Commit 04a6f6a

Browse files
authored
Add cloud translation support with multiple providers and configurati… (#183)
* Add cloud translation support with multiple providers and configuration options * Refactor CMakeLists.txt for cloud translation sources formatting * Add support for translating only full sentences in cloud translation * Update ICU build configuration and fix header include case sensitivity * Fix CURL helper function signatures and improve URL encoding * Fix character type casting in DeepLTranslator for language conversion * Refactor file saving logic in transcription filter to streamline sentence handling and add support for saving translated sentences * Add support for Deepl Free API endpoint and enhance cloud translation configuration * Add ccache detection to ICU build configuration for improved compilation speed * Enhance ICU build configuration to use ccache as a compiler wrapper for improved performance
1 parent b7ab6a9 commit 04a6f6a

28 files changed

+1798
-73
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ target_sources(
127127
src/translation/translation-language-utils.cpp
128128
src/ui/filter-replace-dialog.cpp)
129129

130+
add_subdirectory(src/translation/cloud-translation)
131+
130132
set_target_properties_plugin(${CMAKE_PROJECT_NAME} PROPERTIES OUTPUT_NAME ${_name})
131133

132134
if(ENABLE_TESTS)

cmake/BuildICU.cmake

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,26 +48,38 @@ if(WIN32)
4848
"${ICU_LIB_${lib}}")
4949
endforeach()
5050
else()
51+
# Add ccache detection at the start
52+
find_program(CCACHE_PROGRAM ccache)
53+
if(CCACHE_PROGRAM)
54+
message(STATUS "Found ccache: ${CCACHE_PROGRAM}")
55+
# Create compiler wrapper commands
56+
set(C_LAUNCHER "${CCACHE_PROGRAM} ${CMAKE_C_COMPILER}")
57+
set(CXX_LAUNCHER "${CCACHE_PROGRAM} ${CMAKE_CXX_COMPILER}")
58+
endif()
59+
5160
set(ICU_URL
5261
"https://github.com/unicode-org/icu/releases/download/release-${ICU_VERSION_DASH}/icu4c-${ICU_VERSION_UNDERSCORE}-src.tgz"
5362
)
5463
set(ICU_HASH "SHA256=cb968df3e4d2e87e8b11c49a5d01c787bd13b9545280fc6642f826527618caef")
5564
if(APPLE)
5665
set(ICU_PLATFORM "MacOSX")
5766
set(TARGET_ARCH -arch\ $ENV{MACOS_ARCH})
58-
set(ICU_BUILD_ENV_VARS CFLAGS=${TARGET_ARCH} CXXFLAGS=${TARGET_ARCH} LDFLAGS=${TARGET_ARCH})
67+
set(ICU_BUILD_ENV_VARS CFLAGS=${TARGET_ARCH} CXXFLAGS=${TARGET_ARCH} LDFLAGS=${TARGET_ARCH} CC=${C_LAUNCHER}
68+
CXX=${CXX_LAUNCHER})
5969
else()
6070
set(ICU_PLATFORM "Linux")
61-
set(ICU_BUILD_ENV_VARS CFLAGS=-fPIC CXXFLAGS=-fPIC LDFLAGS=-fPIC)
71+
set(ICU_BUILD_ENV_VARS CFLAGS=-fPIC CXXFLAGS=-fPIC LDFLAGS=-fPIC CC=${C_LAUNCHER} CXX=${CXX_LAUNCHER})
6272
endif()
6373

6474
ExternalProject_Add(
6575
ICU_build
6676
DOWNLOAD_EXTRACT_TIMESTAMP true
6777
GIT_REPOSITORY "https://github.com/unicode-org/icu.git"
6878
GIT_TAG "release-${ICU_VERSION_DASH}"
69-
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${ICU_BUILD_ENV_VARS} <SOURCE_DIR>/icu4c/source/runConfigureICU
70-
${ICU_PLATFORM} --prefix=<INSTALL_DIR> --enable-static --disable-shared
79+
CONFIGURE_COMMAND
80+
${CMAKE_COMMAND} -E env ${ICU_BUILD_ENV_VARS} <SOURCE_DIR>/icu4c/source/runConfigureICU ${ICU_PLATFORM}
81+
--prefix=<INSTALL_DIR> --enable-static --disable-shared --disable-tools --disable-samples --disable-layout
82+
--disable-layoutex --disable-tests --disable-draft --disable-extras --disable-icuio
7183
BUILD_COMMAND make -j4
7284
BUILD_BYPRODUCTS
7385
<INSTALL_DIR>/lib/${CMAKE_STATIC_LIBRARY_PREFIX}icudata${CMAKE_STATIC_LIBRARY_SUFFIX}

data/locale/en-US.ini

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ whisper_sampling_method="Whisper Sampling Method"
1616
n_threads="Number of threads"
1717
n_max_text_ctx="Max text context"
1818
translate="Translate"
19+
translate_local="Local Translation"
20+
translate_cloud="Cloud Translation"
1921
no_context="No context"
2022
single_segment="Single segment"
2123
print_special="Print special"
@@ -75,6 +77,11 @@ general_group="General"
7577
transcription_group="Transcription"
7678
file_output_group="File Output Configuration"
7779
translate_explaination="Enabling translation will increase the processing load on your machine, This feature uses additional resources to translate content in real-time, which may impact performance. <a href='#'>Learn More</a>"
80+
translate_cloud_explaination="Cloud translation requires an active internet connection and API keys to the translation provider."
81+
translate_cloud_provider="Translation Provider"
82+
translate_cloud_only_full_sentences="Translate only full sentences"
83+
translate_cloud_api_key="Access Key"
84+
translate_cloud_secret_key="Secret Key"
7885
log_group="Logging"
7986
advanced_group="Advanced Configuration"
8087
buffered_output_parameters="Buffered Output Configuration"
@@ -89,4 +96,20 @@ translate_only_full_sentences="Translate only full sentences"
8996
duration_filter_threshold="Duration filter"
9097
segment_duration="Segment duration"
9198
n_context_sentences="# Context sentences"
92-
max_sub_duration="Max. sub duration (ms)"
99+
max_sub_duration="Max. sub duration (ms)"
100+
Google-Cloud-Translation="Google Cloud Translation"
101+
Microsoft-Translator="Microsoft Azure Translator"
102+
Amazon-Translate="AWS Translate"
103+
IBM-Watson-Translate="IBM Watson Translate"
104+
Yandex-Translate="Yandex Translate"
105+
Baidu-Translate="Baidu Translate"
106+
Tencent-Translate="Tencent Translate"
107+
Alibaba-Translate="Alibaba Translate"
108+
Naver-Translate="Naver Translate"
109+
Kakao-Translate="Kakao Translate"
110+
Papago-Translate="Papago"
111+
Deepl-Translate="Deepl"
112+
Bing-Translate="Bing Translate"
113+
OpenAI-Translate="OpenAI"
114+
Claude-Translate="Claude"
115+
translate_cloud_deepl_free="Use Deepl Free API Endpoint"

src/transcription-filter-callbacks.cpp

Lines changed: 125 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "whisper-utils/whisper-utils.h"
2222
#include "whisper-utils/whisper-model-utils.h"
2323
#include "translation/language_codes.h"
24+
#include "translation/cloud-translation/translation-cloud.h"
2425

2526
void send_caption_to_source(const std::string &target_source_name, const std::string &caption,
2627
struct transcription_filter_data *gf)
@@ -80,30 +81,60 @@ std::string send_sentence_to_translation(const std::string &sentence,
8081
return "";
8182
}
8283

84+
void send_sentence_to_cloud_translation_async(const std::string &sentence,
85+
struct transcription_filter_data *gf,
86+
const std::string &source_language,
87+
std::function<void(const std::string &)> callback)
88+
{
89+
std::thread([sentence, gf, source_language, callback]() {
90+
const std::string last_text = gf->last_text_for_cloud_translation;
91+
gf->last_text_for_cloud_translation = sentence;
92+
if (gf->translate_cloud && !sentence.empty()) {
93+
obs_log(gf->log_level, "Translating text with cloud provider %s. %s -> %s",
94+
gf->translate_cloud_provider.c_str(), source_language.c_str(),
95+
gf->translate_cloud_target_language.c_str());
96+
std::string translated_text;
97+
if (sentence == last_text) {
98+
// do not translate the same sentence twice
99+
callback(gf->last_text_cloud_translation);
100+
return;
101+
}
102+
CloudTranslatorConfig config;
103+
config.provider = gf->translate_cloud_provider;
104+
config.access_key = gf->translate_cloud_api_key;
105+
config.secret_key = gf->translate_cloud_secret_key;
106+
config.free = gf->translate_cloud_deepl_free;
107+
config.region = gf->translate_cloud_region;
108+
109+
translated_text = translate_cloud(config, sentence,
110+
gf->translate_cloud_target_language,
111+
source_language);
112+
if (!translated_text.empty()) {
113+
if (gf->log_words) {
114+
obs_log(LOG_INFO, "Cloud Translation: '%s' -> '%s'",
115+
sentence.c_str(), translated_text.c_str());
116+
}
117+
gf->last_text_translation = translated_text;
118+
callback(translated_text);
119+
return;
120+
} else {
121+
obs_log(gf->log_level, "Failed to translate text");
122+
}
123+
}
124+
callback("");
125+
}).detach();
126+
}
127+
83128
void send_sentence_to_file(struct transcription_filter_data *gf,
84-
const DetectionResultWithText &result, const std::string &str_copy,
85-
const std::string &translated_sentence)
129+
const DetectionResultWithText &result, const std::string &sentence,
130+
const std::string &file_path, bool bump_sentence_number)
86131
{
87132
// Check if we should save the sentence
88133
if (gf->save_only_while_recording && !obs_frontend_recording_active()) {
89134
// We are not recording, do not save the sentence to file
90135
return;
91136
}
92137

93-
std::string translated_file_path = "";
94-
bool write_translations = gf->translate && !translated_sentence.empty();
95-
96-
// if translation is enabled, save the translated sentence to another file
97-
if (write_translations) {
98-
// add a postfix to the file name (without extension) with the translation target language
99-
std::string output_file_path = gf->output_file_path;
100-
std::string file_extension =
101-
output_file_path.substr(output_file_path.find_last_of(".") + 1);
102-
std::string file_name =
103-
output_file_path.substr(0, output_file_path.find_last_of("."));
104-
translated_file_path = file_name + "_" + gf->target_lang + "." + file_extension;
105-
}
106-
107138
// should the file be truncated?
108139
std::ios_base::openmode openmode = std::ios::out;
109140
if (gf->truncate_output_file) {
@@ -114,15 +145,9 @@ void send_sentence_to_file(struct transcription_filter_data *gf,
114145
if (!gf->save_srt) {
115146
// Write raw sentence to file
116147
try {
117-
std::ofstream output_file(gf->output_file_path, openmode);
118-
output_file << str_copy << std::endl;
148+
std::ofstream output_file(file_path, openmode);
149+
output_file << sentence << std::endl;
119150
output_file.close();
120-
if (write_translations) {
121-
std::ofstream translated_output_file(translated_file_path,
122-
openmode);
123-
translated_output_file << translated_sentence << std::endl;
124-
translated_output_file.close();
125-
}
126151
} catch (const std::ofstream::failure &e) {
127152
obs_log(LOG_ERROR, "Exception opening/writing/closing file: %s", e.what());
128153
}
@@ -133,9 +158,9 @@ void send_sentence_to_file(struct transcription_filter_data *gf,
133158
}
134159

135160
obs_log(gf->log_level, "Saving sentence to file %s, sentence #%d",
136-
gf->output_file_path.c_str(), gf->sentence_number);
161+
file_path.c_str(), gf->sentence_number);
137162
// Append sentence to file in .srt format
138-
std::ofstream output_file(gf->output_file_path, openmode);
163+
std::ofstream output_file(file_path, openmode);
139164
output_file << gf->sentence_number << std::endl;
140165
// use the start and end timestamps to calculate the start and end time in srt format
141166
auto format_ts_for_srt = [](std::ofstream &output_stream, uint64_t ts) {
@@ -156,28 +181,34 @@ void send_sentence_to_file(struct transcription_filter_data *gf,
156181
format_ts_for_srt(output_file, result.end_timestamp_ms);
157182
output_file << std::endl;
158183

159-
output_file << str_copy << std::endl;
184+
output_file << sentence << std::endl;
160185
output_file << std::endl;
161186
output_file.close();
162187

163-
if (write_translations) {
164-
obs_log(gf->log_level, "Saving translation to file %s, sentence #%d",
165-
translated_file_path.c_str(), gf->sentence_number);
166-
167-
// Append translated sentence to file in .srt format
168-
std::ofstream translated_output_file(translated_file_path, openmode);
169-
translated_output_file << gf->sentence_number << std::endl;
170-
format_ts_for_srt(translated_output_file, result.start_timestamp_ms);
171-
translated_output_file << " --> ";
172-
format_ts_for_srt(translated_output_file, result.end_timestamp_ms);
173-
translated_output_file << std::endl;
174-
175-
translated_output_file << translated_sentence << std::endl;
176-
translated_output_file << std::endl;
177-
translated_output_file.close();
188+
if (bump_sentence_number) {
189+
gf->sentence_number++;
178190
}
191+
}
192+
}
179193

180-
gf->sentence_number++;
194+
void send_translated_sentence_to_file(struct transcription_filter_data *gf,
195+
const DetectionResultWithText &result,
196+
const std::string &translated_sentence,
197+
const std::string &target_lang)
198+
{
199+
// if translation is enabled, save the translated sentence to another file
200+
if (translated_sentence.empty()) {
201+
obs_log(gf->log_level, "Translation is empty, not saving to file");
202+
} else {
203+
// add a postfix to the file name (without extension) with the translation target language
204+
std::string translated_file_path = "";
205+
std::string output_file_path = gf->output_file_path;
206+
std::string file_extension =
207+
output_file_path.substr(output_file_path.find_last_of(".") + 1);
208+
std::string file_name =
209+
output_file_path.substr(0, output_file_path.find_last_of("."));
210+
translated_file_path = file_name + "_" + target_lang + "." + file_extension;
211+
send_sentence_to_file(gf, result, translated_sentence, translated_file_path, false);
181212
}
182213
}
183214

@@ -235,41 +266,76 @@ void set_text_callback(struct transcription_filter_data *gf,
235266
}
236267
}
237268

238-
bool should_translate =
269+
bool should_translate_local =
239270
gf->translate_only_full_sentences ? result.result == DETECTION_RESULT_SPEECH : true;
240271

241272
// send the sentence to translation (if enabled)
242-
std::string translated_sentence =
243-
should_translate ? send_sentence_to_translation(str_copy, gf, result.language) : "";
273+
std::string translated_sentence_local =
274+
should_translate_local ? send_sentence_to_translation(str_copy, gf, result.language)
275+
: "";
244276

245277
if (gf->translate) {
246278
if (gf->translation_output == "none") {
247279
// overwrite the original text with the translated text
248-
str_copy = translated_sentence;
280+
str_copy = translated_sentence_local;
249281
} else {
250282
if (gf->buffered_output) {
251283
// buffered output - add the sentence to the monitor
252284
gf->translation_monitor.addSentenceFromStdString(
253-
translated_sentence,
285+
translated_sentence_local,
254286
get_time_point_from_ms(result.start_timestamp_ms),
255287
get_time_point_from_ms(result.end_timestamp_ms),
256288
result.result == DETECTION_RESULT_PARTIAL);
257289
} else {
258290
// non-buffered output - send the sentence to the selected source
259-
send_caption_to_source(gf->translation_output, translated_sentence,
260-
gf);
291+
send_caption_to_source(gf->translation_output,
292+
translated_sentence_local, gf);
261293
}
262294
}
295+
if (gf->save_to_file && gf->output_file_path != "") {
296+
send_translated_sentence_to_file(gf, result, translated_sentence_local,
297+
gf->target_lang);
298+
}
263299
}
264300

265-
if (gf->buffered_output) {
266-
gf->captions_monitor.addSentenceFromStdString(
267-
str_copy, get_time_point_from_ms(result.start_timestamp_ms),
268-
get_time_point_from_ms(result.end_timestamp_ms),
269-
result.result == DETECTION_RESULT_PARTIAL);
270-
} else {
271-
// non-buffered output - send the sentence to the selected source
272-
send_caption_to_source(gf->text_source_name, str_copy, gf);
301+
bool should_translate_cloud = (gf->translate_cloud_only_full_sentences
302+
? result.result == DETECTION_RESULT_SPEECH
303+
: true) &&
304+
gf->translate_cloud;
305+
306+
if (should_translate_cloud) {
307+
send_sentence_to_cloud_translation_async(
308+
str_copy, gf, result.language,
309+
[gf, result](const std::string &translated_sentence_cloud) {
310+
if (gf->translate_cloud_output != "none") {
311+
send_caption_to_source(gf->translate_cloud_output,
312+
translated_sentence_cloud, gf);
313+
} else {
314+
// overwrite the original text with the translated text
315+
send_caption_to_source(gf->text_source_name,
316+
translated_sentence_cloud, gf);
317+
}
318+
if (gf->save_to_file && gf->output_file_path != "") {
319+
send_translated_sentence_to_file(
320+
gf, result, translated_sentence_cloud,
321+
gf->translate_cloud_target_language);
322+
}
323+
});
324+
}
325+
326+
// send the original text to the output
327+
// unless the translation is enabled and set to overwrite the original text
328+
if (!((should_translate_cloud && gf->translate_cloud_output == "none") ||
329+
(should_translate_local && gf->translation_output == "none"))) {
330+
if (gf->buffered_output) {
331+
gf->captions_monitor.addSentenceFromStdString(
332+
str_copy, get_time_point_from_ms(result.start_timestamp_ms),
333+
get_time_point_from_ms(result.end_timestamp_ms),
334+
result.result == DETECTION_RESULT_PARTIAL);
335+
} else {
336+
// non-buffered output - send the sentence to the selected source
337+
send_caption_to_source(gf->text_source_name, str_copy, gf);
338+
}
273339
}
274340

275341
if (gf->caption_to_stream && result.result == DETECTION_RESULT_SPEECH) {
@@ -279,7 +345,7 @@ void set_text_callback(struct transcription_filter_data *gf,
279345

280346
if (gf->save_to_file && gf->output_file_path != "" &&
281347
result.result == DETECTION_RESULT_SPEECH) {
282-
send_sentence_to_file(gf, result, str_copy, translated_sentence);
348+
send_sentence_to_file(gf, result, str_copy, gf->output_file_path, true);
283349
}
284350

285351
if (!result.text.empty() && (result.result == DETECTION_RESULT_SPEECH ||

src/transcription-filter-data.h

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,18 @@ struct transcription_filter_data {
8989
float duration_filter_threshold = 2.25f;
9090
int segment_duration = 7000;
9191

92-
// Last transcription result
93-
std::string last_text_for_translation;
94-
std::string last_text_translation;
92+
// Cloud translation options
93+
bool translate_cloud = false;
94+
std::string translate_cloud_provider;
95+
std::string translate_cloud_target_language;
96+
std::string translate_cloud_output;
97+
std::string translate_cloud_api_key;
98+
std::string translate_cloud_secret_key;
99+
bool translate_cloud_only_full_sentences = true;
100+
std::string last_text_for_cloud_translation;
101+
std::string last_text_cloud_translation;
102+
bool translate_cloud_deepl_free;
103+
std::string translate_cloud_region;
95104

96105
// Transcription context sentences
97106
int n_context_sentences;
@@ -119,6 +128,9 @@ struct transcription_filter_data {
119128
std::string translation_model_index;
120129
std::string translation_model_path_external;
121130
bool translate_only_full_sentences;
131+
// Last transcription result
132+
std::string last_text_for_translation;
133+
std::string last_text_translation;
122134

123135
bool buffered_output = false;
124136
TokenBufferThread captions_monitor;

0 commit comments

Comments
 (0)