Skip to content

Commit

Permalink
feat(FgForrest#3): add chatgpt translator a configurations
Browse files Browse the repository at this point in the history
  • Loading branch information
Stepan Kamenik committed Mar 10, 2024
1 parent 769da9a commit 58ea24c
Show file tree
Hide file tree
Showing 12 changed files with 343 additions and 110 deletions.
6 changes: 5 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,11 @@
</dependencyManagement>

<dependencies>

<dependency>
<groupId>com.theokanning.openai-gpt3-java</groupId>
<artifactId>service</artifactId>
<version>0.18.2</version>
</dependency>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-translate</artifactId>
Expand Down
11 changes: 3 additions & 8 deletions src/main/java/one/edee/babylon/MainService.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,15 @@ public MainService(Exporter exporter,
this.importProcessor = importProcessor;
}

public void startTranslation(Action action, TranslationConfiguration configuration, String spreadsheetId, boolean combineSheets, String translatorApiKey) throws IOException, GeneralSecurityException, InterruptedException {
public void startTranslation(Action action, TranslationConfiguration configuration, String spreadsheetId, boolean combineSheets) throws IOException, GeneralSecurityException, InterruptedException {
long stTime = System.currentTimeMillis();
switch (action) {
case EXPORT:
log.info("Babylon starting...");
exporter.walkPathsAndWriteSheets(
configuration.getPath(),
configuration.getMutations(),
configuration,
spreadsheetId,
configuration.getSnapshotPath(),
configuration.getLockedCellEditors(),
combineSheets,
translatorApiKey,
configuration.getDefaultLang());
combineSheets);
break;
case IMPORT:
importProcessor.doImport(spreadsheetId);
Expand Down
15 changes: 3 additions & 12 deletions src/main/java/one/edee/babylon/SpringBootConsoleApplication.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public void run(String... args) {
try {
log.info("Loading config file: '" + arguments.getConfigFileName() + "'");
TranslationConfiguration configuration = configurationReader.readAndCheckConfiguration(arguments.getConfigFileName());
mainService.startTranslation(arguments.getAction(), configuration, arguments.getGoogleSheetId(), arguments.isCombineSheets(), arguments.getTranslatorApiKey());
mainService.startTranslation(arguments.getAction(), configuration, arguments.getGoogleSheetId(), arguments.isCombineSheets());
} catch (Exception e) {
log.error("BABYLON ERROR: ", e);
System.exit(-1);
Expand Down Expand Up @@ -75,10 +75,7 @@ public static Arguments parseArguments(String... args) {
arguments.setConfigFileName(args[1]);
arguments.setGoogleSheetId(args[2]);
if (args.length > 3){
arguments.setTranslatorApiKey(args[3]);
if (args.length > 4){
arguments.setCombineSheets(Boolean.parseBoolean(args[4]));
}
arguments.setCombineSheets(Boolean.parseBoolean(args[3]));
}
return arguments;
}
Expand All @@ -87,8 +84,7 @@ private static void printRequiredArguments() {
log.info("1 - expected action (import, export)");
log.info("2 - path to translator-config.json file");
log.info("3 - ID of the google sheet (e.g. 1xhnBAOpy8-9KWhl8NP0ZIy6mhlgXKnKcLJwKcIeyjPc)");
log.info("4 - arg to specify translator api key");
log.info("5 - arg to specify combineSheets mode");
log.info("4 - arg to specify combineSheets mode");
}

/**
Expand Down Expand Up @@ -118,11 +114,6 @@ public static class Arguments {
* This mode is useful to correct duplicates, etc.
*/
private boolean combineSheets = false;

/**
* Translator api key (Google/Deepl).
*/
private String translatorApiKey;
}

}
30 changes: 30 additions & 0 deletions src/main/java/one/edee/babylon/config/SupportedTranslators.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package one.edee.babylon.config;

/**
* I apologize in advance for the lack of documentation in this code.
* I had every intention of providing clear and concise explanations
* for every line of code, but then I got distracted by a squirrel outside
* my window and the next thing I knew it was three weeks later.
* <p>
* So instead, I've included some helpful comments here and there.
* They might not make sense, but hey, at least they're something.
*
* @author Štěpán Kameník ([email protected]), FG Forrest a.s. (c) 2024
**/
public enum SupportedTranslators {

/**
* @see one.edee.babylon.export.translator.GoogleTranslator
*/
GOOGLE,

/**
* @see one.edee.babylon.export.translator.DeeplTranslator
*/
DEEPL,

/**
* @see one.edee.babylon.export.translator.OpenAiTranslator
*/
OPENAI
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import lombok.NoArgsConstructor;
import lombok.NonNull;
import lombok.RequiredArgsConstructor;
import org.jetbrains.annotations.Nullable;

import java.io.Serializable;
import java.nio.file.Path;
Expand Down Expand Up @@ -52,7 +53,13 @@ public class TranslationConfiguration implements Serializable {
/**
* Default language of project properties.
*/
@NonNull
@Nullable
private SupportedTranslators translator;

/**
* Default language of project properties.
*/
@Nullable
private String translatorApiKey;

@JsonIgnore
Expand Down
122 changes: 45 additions & 77 deletions src/main/java/one/edee/babylon/export/Exporter.java
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
package one.edee.babylon.export;

import com.deepl.api.DeepLException;
import com.deepl.api.Translator;
import com.google.api.client.http.HttpRequestInitializer;
import com.google.cloud.translate.Translate;
import com.google.cloud.translate.TranslateOptions;
import lombok.RequiredArgsConstructor;
import lombok.extern.apachecommons.CommonsLog;
import one.edee.babylon.config.SupportedTranslators;
import one.edee.babylon.config.TranslationConfiguration;
import one.edee.babylon.db.SnapshotUtils;
import one.edee.babylon.export.dto.ExportResult;
import one.edee.babylon.export.dto.TranslationSheet;
import one.edee.babylon.export.translator.Translator;
import one.edee.babylon.sheets.SheetsException;
import one.edee.babylon.sheets.gsheets.model.ASheet;
import one.edee.babylon.snapshot.TranslationSnapshotWriteContract;
import one.edee.babylon.util.AntPathResourceLoader;
import one.edee.babylon.util.PathUtils;
import org.jetbrains.annotations.NotNull;
import org.springframework.context.ApplicationContext;
import org.springframework.util.Assert;
import org.springframework.util.StringUtils;

import java.io.File;
Expand All @@ -25,66 +26,35 @@
import java.util.function.Function;
import java.util.stream.Collectors;

import static com.google.cloud.translate.Translate.TranslateOption.sourceLanguage;
import static com.google.cloud.translate.Translate.TranslateOption.targetLanguage;
import static java.util.Optional.ofNullable;

/**
* Performs the export phase that generates translation sheets.
*/
@CommonsLog
@RequiredArgsConstructor
public class Exporter {
private static final String COMBINING_SHEET_NAME = "ALL";


private final ApplicationContext applicationContext;
private final TranslationCollector translationCollector;
private final TranslationSnapshotWriteContract snapshot;
private final SheetContract gsc;
private final AntPathResourceLoader resourceLoader;
private final PathUtils pu;

public Exporter(TranslationCollector translationCollector, TranslationSnapshotWriteContract snapshot, SheetContract gsc, AntPathResourceLoader resourceLoader) {
this.translationCollector = translationCollector;
this.snapshot = snapshot;
this.gsc = gsc;
this.resourceLoader = resourceLoader;
this.pu = new PathUtils();
}
private final PathUtils pu = new PathUtils();

/**
* Walks message file paths, gathering messages and translations, producing translation sheets in given GSheet spreadsheet.
*
* @param patternPaths paths of message files to export
* @param translationLangs languages to translate messages to
* @param spreadsheetId id of GSheets spreadsheet, must be empty
* @param snapshotPath path to the translation snapshot file
*/
public void walkPathsAndWriteSheets(List<String> patternPaths,
List<String> translationLangs,
String spreadsheetId,
Path snapshotPath,
boolean combineSheets,
String translatorApiKey) {
walkPathsAndWriteSheets(patternPaths, translationLangs, spreadsheetId, snapshotPath, Collections.emptyList(), combineSheets, translatorApiKey, null);
}

/**
* Walks message file paths, gathering messages and translations, producing translation sheets in given GSheet spreadsheet.
*
* @param patternPaths paths of message files to export
* @param translationLangs languages to translate messages to
* @param configuration configuration of translation run
* @param spreadsheetId id of GSheets spreadsheet, must be empty
* @param snapshotPath path to the translation snapshot file
* @param lockedCellEditors list of Google account emails, these account will have the permission to edit locked cells
* @param translatorApiKey
*/
public void walkPathsAndWriteSheets(List<String> patternPaths,
List<String> translationLangs,
public void walkPathsAndWriteSheets(TranslationConfiguration configuration,
String spreadsheetId,
Path snapshotPath,
List<String> lockedCellEditors,
boolean combineSheets,
String translatorApiKey,
String defaultLang) {
boolean combineSheets) {
List<String> patternPaths = configuration.getPath();
warnDuplicatePaths(patternPaths);

List<ASheet> prevSheets = listAllSheets(spreadsheetId);
Expand All @@ -95,7 +65,7 @@ public void walkPathsAndWriteSheets(List<String> patternPaths,
throw new IllegalArgumentException("Please fix the message file paths in the configuration file.");
}

ExportResult result = translationCollector.walkPathsAndCollectTranslationSheets(allUniquePaths, translationLangs);
ExportResult result = translationCollector.walkPathsAndCollectTranslationSheets(allUniquePaths, configuration.getMutations());

if (combineSheets) {
// only for translation debugging
Expand All @@ -116,31 +86,43 @@ public void walkPathsAndWriteSheets(List<String> patternPaths,
original.add(new TranslationSheet(COMBINING_SHEET_NAME,combine));
}

Map<String, List<String>> changed = translateTextsByExternalTool(translatorApiKey, defaultLang, result);
Map<String, List<String>> changed = translateTextsByExternalTool(configuration, result);

uploadTranslations(result, spreadsheetId, lockedCellEditors, changed);
uploadTranslations(result, spreadsheetId, configuration.getLockedCellEditors(), changed);

updateSnapshotAndWriteToDisk(this.snapshot, result, snapshotPath);
updateSnapshotAndWriteToDisk(this.snapshot, result, configuration.getSnapshotPath());

List<Integer> prevSheetIds = prevSheets.stream().map(ASheet::getId).collect(Collectors.toList());
deleteOldSheets(prevSheetIds, spreadsheetId);
}

@NotNull
private static Map<String, List<String>> translateTextsByExternalTool(String translatorApiKey, String defaultLang, ExportResult result) {
private Map<String, List<String>> translateTextsByExternalTool(TranslationConfiguration configuration, ExportResult result) {
Map<String, List<String>> changed = new HashMap<>();

if (translatorApiKey != null) {
if (configuration.getTranslatorApiKey() != null) {
SupportedTranslators translatorType = ofNullable(configuration.getTranslator()).orElse(SupportedTranslators.GOOGLE);

Translator translator = applicationContext.getBeansOfType(Translator.class)
.values()
.stream()
.filter(i -> i.getSupportedTranslator().equals(translatorType))
.findFirst()
.orElseThrow(() -> new IllegalArgumentException("Cannot find translator bean for type" + translatorType));
translator.init(configuration.getTranslatorApiKey());

try {
// Translator translator = new Translator(translatorApiKey);
//noinspection deprecation
Translate translate = TranslateOptions.newBuilder().setApiKey(translatorApiKey).build().getService();
for (TranslationSheet sheet : result.getSheets()) {
log.info("Translating sheet " + sheet.getSheetName());

List<List<String>> rows = sheet.getRows();
List<String> header = rows.get(0);
List<String> originals = rows.stream().map(i->i.get(1)).map(i->StringUtils.hasText(i) ? i : "____DUMMY").collect(Collectors.toList());
Map<String, List<String>> translations = new HashMap<>();

for (String lang : header.stream().skip(2).collect(Collectors.toList())) {
translations.put(lang, translator.translate(configuration.getDefaultLang(), originals, lang));
}

for (int i = 1; i < rows.size(); i++) {
Map<Integer, String> toChange = new HashMap<>();
Expand All @@ -152,17 +134,17 @@ private static Map<String, List<String>> translateTextsByExternalTool(String tra

String lang = header.get(l);

if (lang.equals("en")) {
lang = "en-GB";
}

if (StringUtils.hasText(original)) {
String translatedText = getTranslatedTextByGoogle(defaultLang, translate, original, lang);
toChange.put(l, translatedText);

changed
.computeIfAbsent(sheet.getSheetName(), key -> new LinkedList<>())
.add(i + "_" + l);
String transOriginal = originals.get(i);
if (!Objects.equals(original, "____DUMMY")){
Assert.isTrue(Objects.equals(transOriginal, original), "Originals does not equals!");
String translatedText = translations.get(lang).get(i);
toChange.put(l, translatedText);

changed
.computeIfAbsent(sheet.getSheetName(), key -> new LinkedList<>())
.add(i + "_" + l);
}
}
}
}
Expand All @@ -181,20 +163,6 @@ private static Map<String, List<String>> translateTextsByExternalTool(String tra
return changed;
}

private static String getTranslatedTextByDeepl(String defaultLang, Translator translator, String original, String lang) throws DeepLException, InterruptedException {
return translator.translateText(original, defaultLang, lang).getText();
}

private static String getTranslatedTextByGoogle(String defaultLang, Translate translate, String original, String lang) {

return translate.translate(
original,
sourceLanguage(defaultLang),
targetLanguage(lang))
.getTranslatedText();
}


private void warnDuplicatePaths(List<String> patternPaths) {
List<String> duplicatePaths = detectDuplicatePatternPaths(patternPaths);
if (!duplicatePaths.isEmpty()) {
Expand Down
Loading

0 comments on commit 58ea24c

Please sign in to comment.