Skip to content

Commit 9bbd863

Browse files
committed
add more improvements for translations
1 parent 87b0070 commit 9bbd863

File tree

1 file changed

+69
-73
lines changed

1 file changed

+69
-73
lines changed

deepl/src/DeeplCommand.php

Lines changed: 69 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111

1212
namespace Contao\Docs\DeeplTranslator;
1313

14-
use Gt\Dom\Element;
1514
use Gt\Dom\HTMLDocument;
1615
use League\HTMLToMarkdown\HtmlConverter;
1716
use Parsedown;
@@ -60,6 +59,11 @@ class DeeplCommand extends Command
6059
*/
6160
private $htmlConverter;
6261

62+
/**
63+
* @var array
64+
*/
65+
private $specialTagsIndex = [];
66+
6367
public function __construct()
6468
{
6569
parent::__construct();
@@ -154,19 +158,32 @@ private function translateFile(OutputInterface $output, string $sourceFilePath,
154158
// Process the meta data
155159
$meta = $this->processMeta($meta, $sourceLang, $targetLang, $targetFilePath);
156160

157-
// Temporarily replace refs
158-
$body = preg_replace('/{{< ref "(.+)" >}}/', 'REF::$1::REF', $body);
161+
// Replace short codes and insert tags, since they cause trouble during translation
162+
$body = $this->replaceSpecialTags($body);
159163

160164
// Parse markdown body to HTML
161-
$html = $this->parsedown->parse($body);
165+
$html = trim($this->parsedown->parse($body));
162166

163-
// Translate HTML nodes
164-
$doc = new HTMLDocument($html);
165-
166-
foreach ($doc->children as $child) {
167-
$this->translateNode($child, $sourceLang, $targetLang);
167+
if (empty($html)) {
168+
$output->writeln(' » no content, skipping.');
169+
return;
168170
}
169171

172+
// Translate the content of the node
173+
$translationConfig = new TranslationConfig(
174+
$html,
175+
strtoupper($targetLang),
176+
strtoupper($sourceLang),
177+
['xml'], [], ['code', 'pre'],
178+
TextHandlingEnum::SPLITSENTENCES_NONEWLINES,
179+
);
180+
181+
/** @var Translation $translationResult */
182+
$translationResult = $this->deepl->getTranslation($translationConfig);
183+
$html = $translationResult->getText();
184+
185+
$doc = new HTMLDocument($html);
186+
170187
// Translate alt attributes
171188
foreach ($doc->querySelectorAll('img[alt]') as $img) {
172189
$alt = $img->getAttribute('alt');
@@ -189,19 +206,25 @@ private function translateFile(OutputInterface $output, string $sourceFilePath,
189206
$img->setAttribute('alt', $translationResult->getText());
190207
}
191208

192-
$html = $doc->saveHTML();
209+
// Remove <br> from table cells
210+
foreach ($doc->querySelectorAll('td,th') as $td) {
211+
$td->innerHTML = str_replace('<br>', '', $td->innerHTML);
212+
}
213+
214+
$html = str_replace("\r", '', $doc->saveHTML());
193215

194216
// Convert back to markdown
195217
$markdown = $this->htmlConverter->convert($html);
196218

197-
// Fix some things
198-
$markdown = preg_replace('/{{&lt;(.+)&gt;}}/m', '{{<$1>}}', $markdown);
199-
$markdown = str_replace(['{{{% ', '{{{< '], ['{{% ', '{{< '], $markdown);
200-
$markdown = preg_replace('/({{% .+ %}}) ([^\s])/', "$1\n$2", $markdown);
201-
$markdown = preg_replace('@([^\s]) ({{% /.+ %}})@', "$1\n$2", $markdown);
219+
// Restore short codes and insert tags
220+
$markdown = $this->restoreSpecialTags($markdown);
202221

203-
// Restore and transform refs
204-
$markdown = preg_replace('/REF::(.+)\.'.$sourceLang.'\.md::REF/', '{{< ref "$1.'.$targetLang.'.md" >}}', $markdown);
222+
// Fix line breaks for short codes
223+
$markdown = preg_replace('/({{% .+ %}})([^\n])/', "$1\n$2", $markdown);
224+
$markdown = preg_replace('@([^\n])({{% /.+ %}})@', "$1\n$2", $markdown);
225+
226+
// Transform refs
227+
$markdown = preg_replace('/{{< ref "(.+)\.'.$sourceLang.'\.md" >}}/', '{{< ref "$1.'.$targetLang.'.md" >}}', $markdown);
205228

206229
// Add warning
207230
$markdown = self::MACHINE_TRANSLATED_WARNING."\n\n".$markdown;
@@ -215,74 +238,20 @@ private function translateFile(OutputInterface $output, string $sourceFilePath,
215238
$output->writeln(' » '.Path::makeRelative($targetFilePath, self::$manualPath));
216239
}
217240

218-
private function translateNode(\DOMNode $node, string $sourceLang, string $targetLang): void
219-
{
220-
// Do not translate code blocks
221-
if (\in_array($node->nodeName, ['pre', 'code'], true)) {
222-
return;
223-
}
224-
225-
// Recursively process child nodes, but not for certain elements like paragraphs or table cells
226-
if ($node->hasChildNodes() && !\in_array($node->nodeName, ['p', 'td', 'th', 'li'], true)) {
227-
foreach ($node->childNodes as $child) {
228-
$this->translateNode($child, $sourceLang, $targetLang);
229-
}
230-
231-
return;
232-
}
233-
234-
// Retrieve the inner HTML content of the node
235-
$inner = '';
236-
237-
if ($node instanceof Element) {
238-
$inner = $node->innerHTML;
239-
} else {
240-
$inner = $node->textContent;
241-
}
242-
243-
// Remove superfluous whitespace, as this interferes with the translation output
244-
$inner = str_replace(["\r", "\n"], ['', ' '], $inner);
245-
$inner = str_replace(' ', ' ', $inner);
246-
$inner = trim($inner);
247-
248-
if (empty($inner)) {
249-
return;
250-
}
251-
252-
// Translate the content of the node
253-
$translationConfig = new TranslationConfig(
254-
$inner,
255-
strtoupper($targetLang),
256-
strtoupper($sourceLang),
257-
['xml'], [], [],
258-
TextHandlingEnum::SPLITSENTENCES_NONEWLINES,
259-
);
260-
261-
/** @var Translation $translationResult */
262-
$translationResult = $this->deepl->getTranslation($translationConfig);
263-
$translatedText = $translationResult->getText();
264-
265-
// Write the translated content back into the node
266-
if ($node instanceof Element) {
267-
$node->innerHTML = $translatedText;
268-
} else {
269-
$node->textContent = $translatedText;
270-
}
271-
}
272-
273241
private function processMeta(string $meta, string $sourceLang, string $targetLang, string $targetFilePath): string
274242
{
275243
$inner = trim(substr($meta, 3, -3));
276244

277245
// Parse YAML data
278-
$data = Yaml::parse($inner);
246+
$data = array_filter(Yaml::parse($inner));
279247

280248
// Remove url
281249
unset($data['url']);
282250

283251
// Set alias
284252
$aliasPath = Path::makeRelative($targetFilePath, self::$manualPath);
285253
$aliasPath = str_replace('.'.$targetLang.'.md', '', $aliasPath);
254+
$aliasPath = str_replace('_index', '', $aliasPath);
286255
$aliasPath = Path::join($targetLang, $aliasPath);
287256
$data['aliases'] = ['/'.$aliasPath.'/'];
288257

@@ -311,4 +280,31 @@ private function processMeta(string $meta, string $sourceLang, string $targetLan
311280

312281
return "---\n".$yaml."---\n";
313282
}
283+
284+
private function replaceSpecialTags(string $buffer): string
285+
{
286+
while (preg_match('/{{[^{}]+}}/', $buffer)) {
287+
$buffer = preg_replace_callback('/{{[^{}]+}}/', function (array $matches): string {
288+
$index = \count($this->specialTagsIndex);
289+
$this->specialTagsIndex[] = $matches[0];
290+
291+
return '%%'.$index.'%%';
292+
}, $buffer);
293+
}
294+
295+
return $buffer;
296+
}
297+
298+
private function restoreSpecialTags(string $buffer): string
299+
{
300+
while (preg_match('/%%(\d+)%%/', $buffer)) {
301+
$buffer = preg_replace_callback('/%%(\d+)%%/', function (array $matches): string {
302+
$index = (int) $matches[1];
303+
304+
return $this->specialTagsIndex[$index];
305+
}, $buffer);
306+
}
307+
308+
return $buffer;
309+
}
314310
}

0 commit comments

Comments
 (0)