11
11
12
12
namespace Contao \Docs \DeeplTranslator ;
13
13
14
- use Gt \Dom \Element ;
15
14
use Gt \Dom \HTMLDocument ;
16
15
use League \HTMLToMarkdown \HtmlConverter ;
17
16
use Parsedown ;
@@ -60,6 +59,11 @@ class DeeplCommand extends Command
60
59
*/
61
60
private $ htmlConverter ;
62
61
62
+ /**
63
+ * @var array
64
+ */
65
+ private $ specialTagsIndex = [];
66
+
63
67
public function __construct ()
64
68
{
65
69
parent ::__construct ();
@@ -154,19 +158,32 @@ private function translateFile(OutputInterface $output, string $sourceFilePath,
154
158
// Process the meta data
155
159
$ meta = $ this ->processMeta ($ meta , $ sourceLang , $ targetLang , $ targetFilePath );
156
160
157
- // Temporarily replace refs
158
- $ body = preg_replace ( ' /{{< ref "(.+)" >}}/ ' , ' REF::$1::REF ' , $ body );
161
+ // Replace short codes and insert tags, since they cause trouble during translation
162
+ $ body = $ this -> replaceSpecialTags ( $ body );
159
163
160
164
// Parse markdown body to HTML
161
- $ html = $ this ->parsedown ->parse ($ body );
165
+ $ html = trim ( $ this ->parsedown ->parse ($ body) );
162
166
163
- // Translate HTML nodes
164
- $ doc = new HTMLDocument ($ html );
165
-
166
- foreach ($ doc ->children as $ child ) {
167
- $ this ->translateNode ($ child , $ sourceLang , $ targetLang );
167
+ if (empty ($ html )) {
168
+ $ output ->writeln (' » no content, skipping. ' );
169
+ return ;
168
170
}
169
171
172
+ // Translate the content of the node
173
+ $ translationConfig = new TranslationConfig (
174
+ $ html ,
175
+ strtoupper ($ targetLang ),
176
+ strtoupper ($ sourceLang ),
177
+ ['xml ' ], [], ['code ' , 'pre ' ],
178
+ TextHandlingEnum::SPLITSENTENCES_NONEWLINES ,
179
+ );
180
+
181
+ /** @var Translation $translationResult */
182
+ $ translationResult = $ this ->deepl ->getTranslation ($ translationConfig );
183
+ $ html = $ translationResult ->getText ();
184
+
185
+ $ doc = new HTMLDocument ($ html );
186
+
170
187
// Translate alt attributes
171
188
foreach ($ doc ->querySelectorAll ('img[alt] ' ) as $ img ) {
172
189
$ alt = $ img ->getAttribute ('alt ' );
@@ -189,19 +206,25 @@ private function translateFile(OutputInterface $output, string $sourceFilePath,
189
206
$ img ->setAttribute ('alt ' , $ translationResult ->getText ());
190
207
}
191
208
192
- $ html = $ doc ->saveHTML ();
209
+ // Remove <br> from table cells
210
+ foreach ($ doc ->querySelectorAll ('td,th ' ) as $ td ) {
211
+ $ td ->innerHTML = str_replace ('<br> ' , '' , $ td ->innerHTML );
212
+ }
213
+
214
+ $ html = str_replace ("\r" , '' , $ doc ->saveHTML ());
193
215
194
216
// Convert back to markdown
195
217
$ markdown = $ this ->htmlConverter ->convert ($ html );
196
218
197
- // Fix some things
198
- $ markdown = preg_replace ('/{{<(.+)>}}/m ' , '{{<$1>}} ' , $ markdown );
199
- $ markdown = str_replace (['{{{% ' , '{{{< ' ], ['{{% ' , '{{< ' ], $ markdown );
200
- $ markdown = preg_replace ('/({{% .+ %}}) ([^\s])/ ' , "$1 \n$2 " , $ markdown );
201
- $ markdown = preg_replace ('@([^\s]) ({{% /.+ %}})@ ' , "$1 \n$2 " , $ markdown );
219
+ // Restore short codes and insert tags
220
+ $ markdown = $ this ->restoreSpecialTags ($ markdown );
202
221
203
- // Restore and transform refs
204
- $ markdown = preg_replace ('/REF::(.+)\. ' .$ sourceLang .'\.md::REF/ ' , '{{< ref "$1. ' .$ targetLang .'.md" >}} ' , $ markdown );
222
+ // Fix line breaks for short codes
223
+ $ markdown = preg_replace ('/({{% .+ %}})([^\n])/ ' , "$1 \n$2 " , $ markdown );
224
+ $ markdown = preg_replace ('@([^\n])({{% /.+ %}})@ ' , "$1 \n$2 " , $ markdown );
225
+
226
+ // Transform refs
227
+ $ markdown = preg_replace ('/{{< ref "(.+)\. ' .$ sourceLang .'\.md" >}}/ ' , '{{< ref "$1. ' .$ targetLang .'.md" >}} ' , $ markdown );
205
228
206
229
// Add warning
207
230
$ markdown = self ::MACHINE_TRANSLATED_WARNING ."\n\n" .$ markdown ;
@@ -215,74 +238,20 @@ private function translateFile(OutputInterface $output, string $sourceFilePath,
215
238
$ output ->writeln (' » ' .Path::makeRelative ($ targetFilePath , self ::$ manualPath ));
216
239
}
217
240
218
- private function translateNode (\DOMNode $ node , string $ sourceLang , string $ targetLang ): void
219
- {
220
- // Do not translate code blocks
221
- if (\in_array ($ node ->nodeName , ['pre ' , 'code ' ], true )) {
222
- return ;
223
- }
224
-
225
- // Recursively process child nodes, but not for certain elements like paragraphs or table cells
226
- if ($ node ->hasChildNodes () && !\in_array ($ node ->nodeName , ['p ' , 'td ' , 'th ' , 'li ' ], true )) {
227
- foreach ($ node ->childNodes as $ child ) {
228
- $ this ->translateNode ($ child , $ sourceLang , $ targetLang );
229
- }
230
-
231
- return ;
232
- }
233
-
234
- // Retrieve the inner HTML content of the node
235
- $ inner = '' ;
236
-
237
- if ($ node instanceof Element) {
238
- $ inner = $ node ->innerHTML ;
239
- } else {
240
- $ inner = $ node ->textContent ;
241
- }
242
-
243
- // Remove superfluous whitespace, as this interferes with the translation output
244
- $ inner = str_replace (["\r" , "\n" ], ['' , ' ' ], $ inner );
245
- $ inner = str_replace (' ' , ' ' , $ inner );
246
- $ inner = trim ($ inner );
247
-
248
- if (empty ($ inner )) {
249
- return ;
250
- }
251
-
252
- // Translate the content of the node
253
- $ translationConfig = new TranslationConfig (
254
- $ inner ,
255
- strtoupper ($ targetLang ),
256
- strtoupper ($ sourceLang ),
257
- ['xml ' ], [], [],
258
- TextHandlingEnum::SPLITSENTENCES_NONEWLINES ,
259
- );
260
-
261
- /** @var Translation $translationResult */
262
- $ translationResult = $ this ->deepl ->getTranslation ($ translationConfig );
263
- $ translatedText = $ translationResult ->getText ();
264
-
265
- // Write the translated content back into the node
266
- if ($ node instanceof Element) {
267
- $ node ->innerHTML = $ translatedText ;
268
- } else {
269
- $ node ->textContent = $ translatedText ;
270
- }
271
- }
272
-
273
241
private function processMeta (string $ meta , string $ sourceLang , string $ targetLang , string $ targetFilePath ): string
274
242
{
275
243
$ inner = trim (substr ($ meta , 3 , -3 ));
276
244
277
245
// Parse YAML data
278
- $ data = Yaml::parse ($ inner );
246
+ $ data = array_filter ( Yaml::parse ($ inner) );
279
247
280
248
// Remove url
281
249
unset($ data ['url ' ]);
282
250
283
251
// Set alias
284
252
$ aliasPath = Path::makeRelative ($ targetFilePath , self ::$ manualPath );
285
253
$ aliasPath = str_replace ('. ' .$ targetLang .'.md ' , '' , $ aliasPath );
254
+ $ aliasPath = str_replace ('_index ' , '' , $ aliasPath );
286
255
$ aliasPath = Path::join ($ targetLang , $ aliasPath );
287
256
$ data ['aliases ' ] = ['/ ' .$ aliasPath .'/ ' ];
288
257
@@ -311,4 +280,31 @@ private function processMeta(string $meta, string $sourceLang, string $targetLan
311
280
312
281
return "--- \n" .$ yaml ."--- \n" ;
313
282
}
283
+
284
+ private function replaceSpecialTags (string $ buffer ): string
285
+ {
286
+ while (preg_match ('/{{[^{}]+}}/ ' , $ buffer )) {
287
+ $ buffer = preg_replace_callback ('/{{[^{}]+}}/ ' , function (array $ matches ): string {
288
+ $ index = \count ($ this ->specialTagsIndex );
289
+ $ this ->specialTagsIndex [] = $ matches [0 ];
290
+
291
+ return '%% ' .$ index .'%% ' ;
292
+ }, $ buffer );
293
+ }
294
+
295
+ return $ buffer ;
296
+ }
297
+
298
+ private function restoreSpecialTags (string $ buffer ): string
299
+ {
300
+ while (preg_match ('/%%(\d+)%%/ ' , $ buffer )) {
301
+ $ buffer = preg_replace_callback ('/%%(\d+)%%/ ' , function (array $ matches ): string {
302
+ $ index = (int ) $ matches [1 ];
303
+
304
+ return $ this ->specialTagsIndex [$ index ];
305
+ }, $ buffer );
306
+ }
307
+
308
+ return $ buffer ;
309
+ }
314
310
}
0 commit comments