diff --git a/.gitignore b/.gitignore index e1f9e47b924..2f61662a1ae 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ source/louis dlldata.c *.pdb .sconsign.dblite +user_docs/*/*.md.sub user_docs/*/*.html user_docs/*/*.css extras/controllerClient/x86/nvdaController.h diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 12ee6d84d81..bc511ad3a34 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -exclude: ^user_docs/(?!en/).*$ +exclude: ^user_docs/(?!en/).+/.+\.md$ # https://pre-commit.ci/ # Configuration for Continuous Integration service diff --git a/appveyor/scripts/setSconsArgs.ps1 b/appveyor/scripts/setSconsArgs.ps1 index 7e0ad79b0b2..96e65e07701 100644 --- a/appveyor/scripts/setSconsArgs.ps1 +++ b/appveyor/scripts/setSconsArgs.ps1 @@ -1,5 +1,5 @@ $ErrorActionPreference = "Stop"; -$sconsOutTargets = "launcher developerGuide changes userGuide keyCommands client moduleList" +$sconsOutTargets = "launcher developerGuide changes userGuide keyCommands client moduleList nvdaL10nUtil" if(!$env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:feature_buildAppx) { $sconsOutTargets += " appx" } diff --git a/projectDocs/dev/developerGuide/sconscript b/projectDocs/dev/developerGuide/sconscript index 590727221f8..fe27d69426b 100644 --- a/projectDocs/dev/developerGuide/sconscript +++ b/projectDocs/dev/developerGuide/sconscript @@ -12,7 +12,20 @@ env = env.Clone() devDocsOutputDir = outputDir.Dir("devDocs") # Build the developer guide and move it to the output directory -htmlFile = env.md2html("developerGuide.md") +mdFile = env.File("developerGuide.md") +# first substitute some variables such as NvDA version and URL into the markdown file +mdFileSub = env.Substfile( + target=mdFile.abspath.replace(".md", ".md.sub"), + source=mdFile, + SUBST_DICT={ + "NVDA_VERSION": env["version"], + }, +) +htmlFile = env.Command( + target=mdFile.abspath.replace(".md", ".html"), + source=mdFileSub, + action=[f'@{sys.executable} user_docs/md2html.py -t developerGuide "$SOURCE" "$TARGET"'], +) devGuide = env.Command( target=devDocsOutputDir.File("developerGuide.html"), source=htmlFile, action=Move("$TARGET", "$SOURCE") ) diff --git a/projectDocs/translating/crowdin.md b/projectDocs/translating/crowdin.md index b59899a358f..8aff66e1143 100644 --- a/projectDocs/translating/crowdin.md +++ b/projectDocs/translating/crowdin.md @@ -1,9 +1,9 @@ # Translating using Crowdin -Crowdin is used to translate the main NVDA interface. +Crowdin is used to translate the main NVDA interface and user documentation. NVDA's Crowdin project: . -This document covers setting up a Crowdin account, connecting it with PoEdit, and translating the main interface using Crowdin and PoEdit. +This document covers setting up a Crowdin account, connecting it with PoEdit, and translating the main interface and user documentation using Crowdin and PoEdit. ## Setup @@ -17,47 +17,29 @@ Please include your Crowdin username and the languages you wish to translate. It is recommended that you use the latest version of PoEdit and NVDA for translating. Alternatively, you can use the [Crowdin web interface](https://support.crowdin.com/online-editor/) directly. -As PoEdit only supports viewing approved strings, large translators team need to co-ordinate submitting unapproved strings to prevent conflicts. +As PoEdit only supports viewing approved strings, large translator teams need to co-ordinate submitting unapproved strings to prevent conflicts. Using Crowdin's interface avoids this problem. -PoEdit supports connecting with Crowdin directly. PoEdit's homepage is: 1. Download the latest Windows PoEdit version at 1. Install it by following the on-screen instructions, the default options should be sufficient. -1. When launching PoEdit: - 1. Choose "Translate cloud project" - 1. Connect your Crowdin account - 1. Select NVDA and the language you wish to translate ### Translation reviews - -Translated strings will need to be reviewed and approved by a proofreader before being included in NVDA. -A proofreader is required for each language. -Proofreader status is granted on a case-by-case basis by messaging the [translators mailing list](https://groups.io/g/nvda-translations) or - -Proofreaders approve strings using the [Crowdin web interface](https://support.crowdin.com/online-editor/). -PoEdit does not support viewing unapproved strings from other translators. -When manually uploading to Crowdin from PoEdit, proofreaders are able to auto-approve all submitted strings. +Due to accessibility issues, for now translation approvals have been disabled on Crowdin. +Any translation uploaded to Crowdin is automatically available in the project. +However, joining the project as a translator is by invitation only. ## Translation workflows -There are 3 common workflows for translating with Crowdin: +There are 2 common workflows for translating with Crowdin: -1. Only on Crowdin's web interface, either with: - - only one proofreader approving their own translations, - - or with many translators making suggestions and a proofreader approving them. -1. Multiple translators translating on PoEdit. - - Using Crowdin cloud synchronization. - - Proofreaders approve the translations on Crowdin's web interface. -1. Translating on PoEdit without cloud synchronization and performing manual uploads to Crowdin. - - Translators with proofreader status can upload strings manually with automatic approval. - As such, this may be a preference for single or small-team translators using PoEdit. - - Manual uploads without cloud synchronization means conflicts can occur, translator teams must be co-ordinated if following this approach. +1. Translating strings directly via Crowdin's interface. Or +1. Downloading from Crowdin, translating with Poedit and uploading again. ## Translating using PoEdit -After opening a .po file you will be placed on a list with all of the strings to translate. +After opening a .po or .xliff file you will be placed on a list with all of the strings to translate. You can read the status bar to see how many strings have already been translated, the number of untranslated messages, and how many are fuzzy. A fuzzy string is a message which has been automatically translated, thus it may be wrong. @@ -77,12 +59,15 @@ Each time you press this key, PoEdit saves the po file, and if you check compile NVDA provides additional shortcuts for PoEdit which are described in [the User Guide](https://www.nvaccess.org/files/nvda/documentation/userGuide.html#Poedit). -If you are unsure of meaning of the original interface message, consult automatic comments (also called translator comments), by pressing `control+shift+a`. +If you are unsure of the meaning of the original interface message, consult automatic comments (also called translator comments), by pressing `control+shift+a`. Some comments provide an example output message to help you understand what NVDA will say when speaking or brailling such messages. -## Translating the interface +## Translating NVDA's interface + +* Download nvda.po from the Files section of your language on Crowdin. +* Open the po file in Poedit, translate, and save the file. +* Upload the po file back to Crowdin. -Open "nvda.po" for the language you want to translate in PoEdit. Alternatively, you can use the [Crowdin interface directly](https://support.crowdin.com/online-editor/). ### Messages with formatting strings @@ -160,7 +145,7 @@ In Crowdin, this information appears at the end of the context section. ### Testing the interface translation -1. To test the current interface messages, save the current nvda.po file, and copy the nvda.mo file to the following location: `nvdadir/locale/langcode/LC_MESSAGES` +1. To test the current interface messages, save the current nvda.po file in Poedit, and copy the nvda.mo file to the following location: `nvdadir/locale/langcode/LC_MESSAGES` - `nvdadir`: the directory where NVDA has been installed - `langcode`: the ISO 639-1 language code for your language (e.g. en for English, es for Spanish, etc.) 1. Restart NVDA, then go to the NVDA menu, go to Preferences and choose General Settings, or press `NVDA+control+g` to open General Settings. @@ -168,3 +153,48 @@ In Crowdin, this information appears at the end of the context section. 1. The messages you have translated should now be heard or brailled in your native language provided that the synthesizer you are using supports your language or a braille code for your language exists. Whenever you add or update your interface translations, repeat the steps above (copying the updated .mo file and restarting NVDA) to test your updated translation messages. + +## Translating User Documentation + +Documentation available for translation includes: + +* The NVDA user guide (userGuide.xliff) +* The NVDA What's New document (changes.xliff) + +To translate any of these files: + +* Download the xliff file from the Files section of your language on Crowdin. + * Make sure to choose "Download" not "Export xliff". +* Make a copy of this file. +* Open the xliff file in Poedit, translate, and save the file. +* Use the nvdaL10nUtil program to strip the xliff so that it only contains translations that were added / changed. E.g. +``` +nvdaL10nUtil stripXliff -o +``` +* Upload the xliff file back to Crowdin. If it is a stripped xliff file, it is safe to check the `allow target to match source` checkbox. + +Alternatively, you can use the [Crowdin interface directly](https://support.crowdin.com/online-editor/). + +### Translating markdown + +The English NVDA user documentation is written in markdown syntax. +The xliff file you are directly translating has been generated from that markdown file. +It contains the content of any line that requires translation, shown in the order it appears in the original markdown file. + +Structural lines that do not contain any translatable content (such as blank lines, hidden table header rows, table header body separator lines etc) are not included here. + +Structural syntax from the beginning and end of lines (such as heading prefix like `###`, heading anchors like `{#Introduction}`, and initial and final vertical bars on table rows) has been removed from the content to translate, but is available to view in the translator notes for that line. +Content may still however contain inline markdown syntax such as links, inline code fences (``` `` ```), and table column separators (`|`). +This syntax must be kept intact when translating. + +All strings for translation contain translator notes which include: +* Line: the original line number in the markdown file. +* prefix: any structural markdown on the line before this content. +* Suffix: any structural markdown on the line after this content. + +### Verifying your translation + +When ever you have saved the xliff file with Poedit, you can use the nvdaL10nUtil program to generate the html version of the documentation file. E.g. +``` +nvdaL10nUtil xliff2html -t [userGuide|changes|keyCommands] +``` diff --git a/projectDocs/translating/readme.md b/projectDocs/translating/readme.md index 4c4f5cf77ae..da65c7498ed 100644 --- a/projectDocs/translating/readme.md +++ b/projectDocs/translating/readme.md @@ -23,19 +23,14 @@ For further information please see the [Release Process page](https://github.com You can view [Crowdin](https://crowdin.com/project/nvda) for an up to date report on the status of translating the NVDA interface. If you would like to improve or would like to work on a new language, please write to the [NVDA translations mailing list](https://groups.io/g/nvda-translations). -The translation status of user documentation (User Guide and Changes) can only be checked by translators. - ## New Localization Start by subscribing to the translation list above so that you can get help and advice. The current process for translation is split between multiple processes: -- Crowdin for the NVDA interface -- The legacy SVN translation system for the User Guide and Changes files. -This is planned to move to Crowdin. -- The legacy SVN translation system for Character Descriptions, Symbols and Gestures. -This is planned to move to GitHub. +- Crowdin for the NVDA interface and user documentation +- Github for Character Descriptions, Symbols and Gestures. Read [Files to be Localized](#files-to-be-localized) to learn the translation for process for these. @@ -57,6 +52,6 @@ Note that linked guides may be out of date, as the translation system is undergo - characterDescriptions.dic: names of characters in your language, see [Translating Character Descriptions](https://www.nvaccess.org/files/nvda/documentation/developerGuide.html#characterDescriptions) for more info. - symbols.dic: names of symbols and punctuation in your language, see [Translating Symbols](https://www.nvaccess.org/files/nvda/documentation/developerGuide.html#symbolPronunciation) for more information. - gestures.ini: remapping of gestures for your language, see [Translating Gestures](https://www.nvaccess.org/files/nvda/documentation/developerGuide.html#TranslatingGestures) for more information. -- userGuide.md: the User Guide, see [Translating the User Guide](https://github.com/nvaccess/nvda/wiki/TranslatingUserGuide) for more information. -- changes.md (optional): a list of changes between releases, see [Translating Changes](https://github.com/nvaccess/nvda/wiki/TranslatingChanges) for more information. +- userGuide.md: the User Guide, see [Translating using Crowdin](./crowdin.md) for more information. +- changes.md (optional): a list of changes between releases, see [Translating using Crowdin](./crowdin.md) for more information. - Add-ons (optional): a set of optional features that users can install, see [Translating Addons](https://github.com/nvaccess/nvda/wiki/TranslatingAddons) for more information. diff --git a/pyproject.toml b/pyproject.toml index de45a286b18..dd3054d9adf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,6 +73,10 @@ ignore_packages = [ "importlib-metadata", "opencv-python", + # Incompatible Apache licenses. + # For building NVDA, but not included. + "nuitka", + # Incompatible Apache licenses. # System testing frameworks, not bundled with NVDA. "robotframework", diff --git a/requirements.txt b/requirements.txt index ab96d3cf03f..db493975438 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,6 +21,8 @@ pycaw==20240210 # Packaging NVDA py2exe==0.13.0.2 +# xliff2html is packaged with nuitka +nuitka==2.4.8 # Creating XML unit test reports unittest-xml-reporting==3.2.0 diff --git a/sconstruct b/sconstruct index 8bbf1708f6c..92f9608aa89 100755 --- a/sconstruct +++ b/sconstruct @@ -149,7 +149,6 @@ env = Environment( tools=[ "textfile", "gettextTool", - "md2html", "doxygen", "recursiveInstall", "m4", @@ -346,7 +345,23 @@ for xliffFile in env.Glob(os.path.join(userDocsDir.path, "*", "*.xliff")): ) # Allow all markdown files to be converted to html in user_docs for mdFile in env.Glob(os.path.join(userDocsDir.path, "*", "*.md")): - htmlFile = env.md2html(mdFile) + # first substitute some variables such as NVDA version and URL into the markdown file + mdFileSub = env.Substfile( + target=mdFile.abspath.replace(".md", ".md.sub"), + source=mdFile, + SUBST_DICT={ + "NVDA_VERSION": env["version"], + "NVDA_URL": versionInfo.url, + "NVDA_COPYRIGHT_YEARS": versionInfo.copyrightYears, + }, + ) + lang = os.path.split(os.path.dirname(mdFile.path))[-1] + docType = os.path.basename(mdFile.path).split(".")[0] + htmlFile = env.Command( + target=mdFile.abspath.replace(".md", ".html"), + source=mdFileSub, + action=[f'@{sys.executable} user_docs/md2html.py -l {lang} -t {docType} "$SOURCE" "$TARGET"'], + ) styleInstallPath = os.path.dirname(mdFile.abspath) installedStyle = env.Install(styleInstallPath, styles) installedHeadingsStyle = env.Install(styleInstallPath, numberedHeadingsStyle) @@ -362,11 +377,14 @@ for mdFile in env.Glob(os.path.join(userDocsDir.path, "*", "*.md")): env.Depends(htmlFile, mdFile) # Create key commands files -for userGuideFile in env.Glob(os.path.join(userDocsDir.path, "*", "userGuide.md")): - keyCommandsHtmlFile = env.md2html( - userGuideFile.abspath.replace("userGuide.md", "keyCommands.html"), userGuideFile +for userGuideFileSub in env.Glob(os.path.join(userDocsDir.path, "*", "userGuide.md.sub")): + lang = os.path.split(os.path.dirname(userGuideFileSub.path))[-1] + keyCommandsHtmlFile = env.Command( + target=userGuideFileSub.abspath.replace("userGuide.md.sub", "keyCommands.html"), + source=userGuideFileSub, + action=[f'@{sys.executable} user_docs/md2html.py -l {lang} -t keyCommands "$SOURCE" "$TARGET"'], ) - env.Depends(keyCommandsHtmlFile, userGuideFile) + env.Depends(keyCommandsHtmlFile, userGuideFileSub) # Build unicode CLDR dictionaries env.SConscript("cldrDict_sconscript", exports=["env", "sourceDir"]) @@ -713,3 +731,11 @@ source = env.Dir(os.path.join(os.getcwd(), "dist")) # Putting the target in the output dir automatically causes AppVeyor to package it as an artefact target = env.File(os.path.join(outputDir.abspath, "library_modules.txt")) env.Alias("moduleList", env.GenerateModuleList(target, source)) + +nvdaL10nUtil = env.Command( + target=outputDir.File("nvdaL10nUtil.exe"), + source="user_docs/nvdaL10nUtil.py", + ENV=os.environ, + action=f"nuitka --assume-yes-for-downloads --remove-output --standalone --onefile --output-dir={outputDir.abspath} --include-module=mdx_truly_sane_lists --include-module=markdown_link_attr_modifier --include-module=mdx_gh_links user_docs/nvdaL10nUtil.py", +) +env.Alias("nvdaL10nUtil", nvdaL10nUtil) diff --git a/user_docs/markdownTranslate.py b/user_docs/markdownTranslate.py index c70353a0e60..0705ce0f8df 100644 --- a/user_docs/markdownTranslate.py +++ b/user_docs/markdownTranslate.py @@ -13,7 +13,6 @@ import re from itertools import zip_longest from xml.sax.saxutils import escape as xmlEscape -from xml.sax.saxutils import unescape as xmlUnescape import difflib from dataclasses import dataclass import subprocess @@ -32,12 +31,17 @@ def prettyPathString(path: str) -> str: + cwd = os.getcwd() + if os.path.normcase(os.path.splitdrive(path)[0]) != os.path.normcase(os.path.splitdrive(cwd)[0]): + return path return os.path.relpath(path, os.getcwd()) @contextlib.contextmanager def createAndDeleteTempFilePath_contextManager( - dir: str | None = None, prefix: str | None = None, suffix: str | None = None + dir: str | None = None, + prefix: str | None = None, + suffix: str | None = None, ) -> Generator[str, None, None]: """A context manager that creates a temporary file and deletes it when the context is exited""" with tempfile.NamedTemporaryFile(dir=dir, prefix=prefix, suffix=suffix, delete=False) as tempFile: @@ -64,7 +68,10 @@ def getLastCommitID(filePath: str) -> str: def getGitDir() -> str: # Run the git rev-parse command to get the root of the git directory result = subprocess.run( - ["git", "rev-parse", "--show-toplevel"], capture_output=True, text=True, check=True + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, + text=True, + check=True, ) gitDir = result.stdout.strip() if not os.path.isdir(gitDir): @@ -130,7 +137,7 @@ def generateSkeleton(mdPath: str, outputPath: str) -> Result_generateSkeleton: skelLine = mdLine outputFile.write(skelLine) print( - f"Generated skeleton file with {res.numTotalLines} total lines and {res.numTranslationPlaceholders} translation placeholders" + f"Generated skeleton file with {res.numTotalLines} total lines and {res.numTranslationPlaceholders} translation placeholders", ) return res @@ -158,15 +165,18 @@ def extractSkeleton(xliffPath: str, outputPath: str): if skeletonNode is None: raise ValueError("No skeleton found in xliff file") skeletonContent = skeletonNode.text.strip() - outputFile.write(xmlUnescape(skeletonContent)) + outputFile.write(skeletonContent) print(f"Extracted skeleton to {prettyPathString(outputPath)}") def updateSkeleton( - origMdPath: str, newMdPath: str, origSkelPath: str, outputPath: str + origMdPath: str, + newMdPath: str, + origSkelPath: str, + outputPath: str, ) -> Result_updateSkeleton: print( - f"Creating updated skeleton file {prettyPathString(outputPath)} from {prettyPathString(origSkelPath)} with changes from {prettyPathString(origMdPath)} to {prettyPathString(newMdPath)}..." + f"Creating updated skeleton file {prettyPathString(outputPath)} from {prettyPathString(origSkelPath)} with changes from {prettyPathString(origMdPath)} to {prettyPathString(newMdPath)}...", ) res = Result_updateSkeleton() with contextlib.ExitStack() as stack: @@ -204,7 +214,7 @@ def updateSkeleton( f"Updated skeleton file with {res.numAddedLines} added lines " f"({res.numAddedTranslationPlaceholders} translation placeholders), " f"{res.numRemovedLines} removed lines ({res.numRemovedTranslationPlaceholders} translation placeholders), " - f"and {res.numUnchangedLines} unchanged lines ({res.numUnchangedTranslationPlaceholders} translation placeholders)" + f"and {res.numUnchangedLines} unchanged lines ({res.numUnchangedTranslationPlaceholders} translation placeholders)", ) return res @@ -227,14 +237,14 @@ def generateXliff( dir=os.path.dirname(outputPath), prefix=os.path.basename(mdPath), suffix=".skel", - ) + ), ) generateSkeleton(mdPath=mdPath, outputPath=skelPath) with open(skelPath, "r", encoding="utf8") as skelFile: skelContent = skelFile.read() res = Result_generateXliff() print( - f"Generating xliff file {prettyPathString(outputPath)} from {prettyPathString(mdPath)} and {prettyPathString(skelPath)}..." + f"Generating xliff file {prettyPathString(outputPath)} from {prettyPathString(mdPath)} and {prettyPathString(skelPath)}...", ) with contextlib.ExitStack() as stack: mdFile = stack.enter_context(open(mdPath, "r", encoding="utf8")) @@ -245,12 +255,13 @@ def generateXliff( outputFile.write( '\n' f'\n' - f'\n' + f'\n', ) outputFile.write(f"\n{xmlEscape(skelContent)}\n\n") res.numTranslatableStrings = 0 for lineNo, (mdLine, skelLine) in enumerate( - zip_longest(mdFile.readlines(), skelContent.splitlines(keepends=True)), start=1 + zip_longest(mdFile.readlines(), skelContent.splitlines(keepends=True)), + start=1, ): mdLine = mdLine.rstrip() skelLine = skelLine.rstrip() @@ -263,7 +274,7 @@ def generateXliff( raise ValueError(f'Line {lineNo}: does not end with "{suffix}", {mdLine=}, {skelLine=}') source = mdLine[len(prefix) : len(mdLine) - len(suffix)] outputFile.write( - f'\n' "\n" f'line: {lineNo + 1}\n' + f'\n' "\n" f'line: {lineNo + 1}\n', ) if prefix: outputFile.write(f'prefix: {xmlEscape(prefix)}\n') @@ -274,7 +285,7 @@ def generateXliff( f"\n" f"{xmlEscape(source)}\n" "\n" - "\n" + "\n", ) else: if mdLine != skelLine: @@ -297,19 +308,19 @@ def updateXliff( # uses generateMarkdown, extractSkeleton, updateSkeleton, and generateXliff to generate an updated xliff file. outputDir = os.path.dirname(outputPath) print( - f"Generating updated xliff file {prettyPathString(outputPath)} from {prettyPathString(xliffPath)} and {prettyPathString(mdPath)}..." + f"Generating updated xliff file {prettyPathString(outputPath)} from {prettyPathString(xliffPath)} and {prettyPathString(mdPath)}...", ) with contextlib.ExitStack() as stack: origMdPath = stack.enter_context( - createAndDeleteTempFilePath_contextManager(dir=outputDir, prefix="generated_", suffix=".md") + createAndDeleteTempFilePath_contextManager(dir=outputDir, prefix="generated_", suffix=".md"), ) generateMarkdown(xliffPath=xliffPath, outputPath=origMdPath, translated=False) origSkelPath = stack.enter_context( - createAndDeleteTempFilePath_contextManager(dir=outputDir, prefix="extracted_", suffix=".skel") + createAndDeleteTempFilePath_contextManager(dir=outputDir, prefix="extracted_", suffix=".skel"), ) extractSkeleton(xliffPath=xliffPath, outputPath=origSkelPath) updatedSkelPath = stack.enter_context( - createAndDeleteTempFilePath_contextManager(dir=outputDir, prefix="updated_", suffix=".skel") + createAndDeleteTempFilePath_contextManager(dir=outputDir, prefix="updated_", suffix=".skel"), ) updateSkeleton( origMdPath=origMdPath, @@ -333,7 +344,7 @@ def translateXliff( allowBadAnchors: bool = False, ) -> Result_translateXliff: print( - f"Creating {lang} translated xliff file {prettyPathString(outputPath)} from {prettyPathString(xliffPath)} using {prettyPathString(pretranslatedMdPath)}..." + f"Creating {lang} translated xliff file {prettyPathString(outputPath)} from {prettyPathString(xliffPath)} using {prettyPathString(pretranslatedMdPath)}...", ) res = Result_translateXliff() with contextlib.ExitStack() as stack: @@ -347,7 +358,7 @@ def translateXliff( skeletonNode = xliffRoot.find("./xliff:file/xliff:skeleton", namespaces=namespace) if skeletonNode is None: raise ValueError("No skeleton found in xliff file") - skeletonContent = xmlUnescape(skeletonNode.text).strip() + skeletonContent = skeletonNode.text.strip() for lineNo, (skelLine, pretranslatedLine) in enumerate( zip_longest(skeletonContent.splitlines(), pretranslatedMdFile.readlines()), start=1, @@ -358,7 +369,7 @@ def translateXliff( prefix, ID, suffix = m.groups() if prefix and not pretranslatedLine.startswith(prefix): raise ValueError( - f'Line {lineNo} of translation does not start with "{prefix}", {pretranslatedLine=}, {skelLine=}' + f'Line {lineNo} of translation does not start with "{prefix}", {pretranslatedLine=}, {skelLine=}', ) if suffix and not pretranslatedLine.endswith(suffix): if allowBadAnchors and (m := re_heading.match(pretranslatedLine)): @@ -366,22 +377,30 @@ def translateXliff( suffix = m.group(3) if suffix and not pretranslatedLine.endswith(suffix): raise ValueError( - f'Line {lineNo} of translation: does not end with "{suffix}", {pretranslatedLine=}, {skelLine=}' + f'Line {lineNo} of translation: does not end with "{suffix}", {pretranslatedLine=}, {skelLine=}', ) translation = pretranslatedLine[len(prefix) : len(pretranslatedLine) - len(suffix)] - unit = xliffRoot.find(f'./xliff:file/xliff:unit[@id="{ID}"]', namespaces=namespace) - if unit is not None: - segment = unit.find("./xliff:segment", namespaces=namespace) - if segment is not None: - target = lxml.etree.Element("target") - target.text = xmlEscape(translation) - target.tail = "\n" - segment.append(target) - res.numTranslatedStrings += 1 + try: + unit = xliffRoot.find(f'./xliff:file/xliff:unit[@id="{ID}"]', namespaces=namespace) + if unit is not None: + segment = unit.find("./xliff:segment", namespaces=namespace) + if segment is not None: + target = lxml.etree.Element("target") + target.text = translation + target.tail = "\n" + segment.append(target) + res.numTranslatedStrings += 1 + else: + raise ValueError(f"No segment found for unit {ID}") else: - raise ValueError(f"No segment found for unit {ID}") - else: - raise ValueError(f"Cannot locate Unit {ID} in xliff file") + raise ValueError(f"Cannot locate Unit {ID} in xliff file") + except Exception as e: + e.add_note(f"Line {lineNo}: {pretranslatedLine=}, {skelLine=}") + raise + elif skelLine != pretranslatedLine: + raise ValueError( + f"Line {lineNo}: pretranslated line {pretranslatedLine!r}, does not match skeleton line {skelLine!r}", + ) xliff.write(outputPath, encoding="utf8", xml_declaration=True) print(f"Translated xliff file with {res.numTranslatedStrings} translated strings") return res @@ -392,6 +411,7 @@ class Result_generateMarkdown: numTotalLines = 0 numTranslatableStrings = 0 numTranslatedStrings = 0 + numBadTranslationStrings = 0 def generateMarkdown(xliffPath: str, outputPath: str, translated: bool = True) -> Result_generateMarkdown: @@ -407,37 +427,52 @@ def generateMarkdown(xliffPath: str, outputPath: str, translated: bool = True) - skeletonNode = xliffRoot.find("./xliff:file/xliff:skeleton", namespaces=namespace) if skeletonNode is None: raise ValueError("No skeleton found in xliff file") - skeletonContent = xmlUnescape(skeletonNode.text).strip() - for line in skeletonContent.splitlines(keepends=True): + skeletonContent = skeletonNode.text.strip() + for lineNum, line in enumerate(skeletonContent.splitlines(keepends=True), 1): res.numTotalLines += 1 if m := re_translationID.match(line): prefix, ID, suffix = m.groups() res.numTranslatableStrings += 1 unit = xliffRoot.find(f'./xliff:file/xliff:unit[@id="{ID}"]', namespaces=namespace) - if unit is not None: - segment = unit.find("./xliff:segment", namespaces=namespace) - if segment is not None: - source = segment.find("./xliff:source", namespaces=namespace) - if translated: - target = segment.find("./xliff:target", namespaces=namespace) - else: - target = None - if target is not None and target.text: - res.numTranslatedStrings += 1 - translation = xmlUnescape(target.text) - elif source is not None and source.text: - translation = xmlUnescape(source.text) - else: - raise ValueError(f"No source or target found for unit {ID}") - else: - raise ValueError(f"No segment found for unit {ID}") - else: + if unit is None: raise ValueError(f"Cannot locate Unit {ID} in xliff file") + segment = unit.find("./xliff:segment", namespaces=namespace) + if segment is None: + raise ValueError(f"No segment found for unit {ID}") + source = segment.find("./xliff:source", namespaces=namespace) + if source is None: + raise ValueError(f"No source found for unit {ID}") + translation = "" + if translated: + target = segment.find("./xliff:target", namespaces=namespace) + if target is not None: + targetText = target.text + if targetText: + translation = targetText + # Crowdin treats empty targets () as a literal translation. + # Filter out such strings and count them as bad translations. + if translation in ( + "", + "<target/>", + "", + "<target></target>", + ): + res.numBadTranslationStrings += 1 + print(f"Warning: line {lineNum} contained a corrupt empty translation. Using source") + translation = "" + else: + res.numTranslatedStrings += 1 + # If we have no translation, use the source text + if not translation: + sourceText = source.text + if sourceText is None: + raise ValueError(f"No source text found for unit {ID}") + translation = sourceText outputFile.write(f"{prefix}{translation}{suffix}\n") else: outputFile.write(line) print( - f"Generated markdown file with {res.numTotalLines} total lines, {res.numTranslatableStrings} translatable strings, and {res.numTranslatedStrings} translated strings" + f"Generated markdown file with {res.numTotalLines} total lines, {res.numTranslatableStrings} translatable strings, and {res.numTranslatedStrings} translated strings. Ignoring {res.numBadTranslationStrings} bad translated strings", ) return res @@ -457,7 +492,7 @@ def ensureMarkdownFilesMatch(path1: str, path2: str, allowBadAnchors: bool = Fal and line1.count("|") == line2.count("|") ): print( - f"Warning: ignoring cell padding of post table header line at line {lineNo}: {line1}, {line2}" + f"Warning: ignoring cell padding of post table header line at line {lineNo}: {line1}, {line2}", ) continue if ( @@ -466,7 +501,7 @@ def ensureMarkdownFilesMatch(path1: str, path2: str, allowBadAnchors: bool = Fal and line1.count("|") == line2.count("|") ): print( - f"Warning: ignoring cell padding of hidden header row at line {lineNo}: {line1}, {line2}" + f"Warning: ignoring cell padding of hidden header row at line {lineNo}: {line1}, {line2}", ) continue if allowBadAnchors and (m1 := re_heading.match(line1)) and (m2 := re_heading.match(line2)): @@ -554,14 +589,29 @@ def pretranslateAllPossibleLanguages(langsDir: str, mdBaseName: str): help="The markdown file to generate the xliff file for", ) generateXliffParser.add_argument( - "-o", "--output", dest="output", type=str, required=True, help="The file to output the xliff file to" + "-o", + "--output", + dest="output", + type=str, + required=True, + help="The file to output the xliff file to", ) updateXliffParser = commandParser.add_parser("updateXliff") updateXliffParser.add_argument( - "-x", "--xliff", dest="xliff", type=str, required=True, help="The original xliff file" + "-x", + "--xliff", + dest="xliff", + type=str, + required=True, + help="The original xliff file", ) updateXliffParser.add_argument( - "-m", "--newMarkdown", dest="md", type=str, required=True, help="The new markdown file" + "-m", + "--newMarkdown", + dest="md", + type=str, + required=True, + help="The new markdown file", ) updateXliffParser.add_argument( "-o", @@ -573,10 +623,20 @@ def pretranslateAllPossibleLanguages(langsDir: str, mdBaseName: str): ) translateXliffParser = commandParser.add_parser("translateXliff") translateXliffParser.add_argument( - "-x", "--xliff", dest="xliff", type=str, required=True, help="The xliff file to translate" + "-x", + "--xliff", + dest="xliff", + type=str, + required=True, + help="The xliff file to translate", ) translateXliffParser.add_argument( - "-l", "--lang", dest="lang", type=str, required=True, help="The language to translate to" + "-l", + "--lang", + dest="lang", + type=str, + required=True, + help="The language to translate to", ) translateXliffParser.add_argument( "-p", diff --git a/site_scons/site_tools/md2html.py b/user_docs/md2html.py similarity index 71% rename from site_scons/site_tools/md2html.py rename to user_docs/md2html.py index 929df794782..d5f2c9b3d1e 100644 --- a/site_scons/site_tools/md2html.py +++ b/user_docs/md2html.py @@ -3,16 +3,12 @@ # This file may be used under the terms of the GNU General Public License, version 2 or later. # For more details see: https://www.gnu.org/licenses/gpl-2.0.html +import argparse from copy import deepcopy -from importlib.util import find_spec import io -import pathlib import re import shutil -import SCons.Node.FS -import SCons.Environment - DEFAULT_EXTENSIONS = frozenset( { # Supports tables, HTML mixed with markdown, code blocks, custom attributes and more @@ -55,16 +51,6 @@ """.strip() -def _replaceNVDATags(md: str, env: SCons.Environment.Environment) -> str: - import versionInfo - - # Replace tags in source file - md = md.replace("NVDA_VERSION", env["version"]) - md = md.replace("NVDA_URL", versionInfo.url) - md = md.replace("NVDA_COPYRIGHT_YEARS", versionInfo.copyrightYears) - return md - - def _getTitle(mdBuffer: io.StringIO, isKeyCommands: bool = False) -> str: if isKeyCommands: TITLE_RE = re.compile(r"^$") @@ -122,7 +108,7 @@ def _generateSanitizedHTML(md: str, isKeyCommands: bool = False) -> str: extensions = set(DEFAULT_EXTENSIONS) if isKeyCommands: - from user_docs.keyCommandsDoc import KeyCommandsExtension + from keyCommandsDoc import KeyCommandsExtension extensions.add(KeyCommandsExtension()) @@ -145,36 +131,27 @@ def _generateSanitizedHTML(md: str, isKeyCommands: bool = False) -> str: return htmlOutput -def md2html_actionFunc( - target: list[SCons.Node.FS.File], - source: list[SCons.Node.FS.File], - env: SCons.Environment.Environment, -): - isKeyCommands = target[0].path.endswith("keyCommands.html") - isUserGuide = target[0].path.endswith("userGuide.html") - isDevGuide = target[0].path.endswith("developerGuide.html") - isChanges = target[0].path.endswith("changes.html") - - with open(source[0].path, "r", encoding="utf-8") as mdFile: +def main(source: str, dest: str, lang: str = "en", docType: str | None = None): + print(f"Converting {docType or 'document'} at {source} to {dest}, {lang=}") + isUserGuide = docType == "userGuide" + isDevGuide = docType == "developerGuide" + isChanges = docType == "changes" + isKeyCommands = docType == "keyCommands" + if docType and not any([isUserGuide, isDevGuide, isChanges, isKeyCommands]): + raise ValueError(f"Unknown docType {docType}") + with open(source, "r", encoding="utf-8") as mdFile: mdStr = mdFile.read() - mdStr = _replaceNVDATags(mdStr, env) - with io.StringIO() as mdBuffer: mdBuffer.write(mdStr) title = _getTitle(mdBuffer, isKeyCommands) - lang = pathlib.Path(source[0].path).parent.name - if isDevGuide and lang == "developerGuide": - # Parent folder in this case is the developerGuide folder in project docs - lang = "en" - if isUserGuide or isDevGuide: extraStylesheet = '' elif isChanges or isKeyCommands: extraStylesheet = "" else: - raise ValueError(f"Unknown target type for {target[0].path}") + raise ValueError(f"Unknown target type for {dest}") htmlBuffer = io.StringIO() htmlBuffer.write( @@ -195,7 +172,7 @@ def md2html_actionFunc( htmlBuffer.seek(0, io.SEEK_END) htmlBuffer.write("\n\n\n") - with open(target[0].path, "w", encoding="utf-8") as targetFile: + with open(dest, "w", encoding="utf-8") as targetFile: # Make next read at start of buffer htmlBuffer.seek(0) shutil.copyfileobj(htmlBuffer, targetFile) @@ -203,23 +180,17 @@ def md2html_actionFunc( htmlBuffer.close() -def exists(env: SCons.Environment.Environment) -> bool: - for ext in [ - "markdown", - "markdown_link_attr_modifier", - "mdx_truly_sane_lists", - "mdx_gh_links", - "nh3", - "user_docs.keyCommandsDoc", - ]: - if find_spec(ext) is None: - return False - return True - - -def generate(env: SCons.Environment.Environment): - env["BUILDERS"]["md2html"] = env.Builder( - action=env.Action(md2html_actionFunc, lambda t, s, e: f"Converting {s[0].path} to {t[0].path}"), - suffix=".html", - src_suffix=".md", +if __name__ == "__main__": + args = argparse.ArgumentParser() + args.add_argument("-l", "--lang", help="Language code", action="store", default="en") + args.add_argument( + "-t", + "--docType", + help="Type of document", + action="store", + choices=["userGuide", "developerGuide", "changes", "keyCommands"], ) + args.add_argument("source", help="Path to the markdown file") + args.add_argument("dest", help="Path to the resulting html file") + args = args.parse_args() + main(source=args.source, dest=args.dest, lang=args.lang, docType=args.docType) diff --git a/user_docs/nvdaL10nUtil.py b/user_docs/nvdaL10nUtil.py new file mode 100644 index 00000000000..faef12f1996 --- /dev/null +++ b/user_docs/nvdaL10nUtil.py @@ -0,0 +1,203 @@ +# A part of NonVisual Desktop Access (NVDA) +# Copyright (C) 2024 NV Access Limited. +# This file is covered by the GNU General Public License. +# See the file COPYING for more details. + + +import tempfile +import lxml.etree +import os +import argparse +import markdownTranslate +import md2html + + +def fetchLanguageFromXliff(xliffPath: str, source: bool = False) -> str: + """ + Fetch the language from an xliff file. + This function also prints a message to the console stating the detected language if found, or a warning if not found. + :param xliffPath: Path to the xliff file + :param source: If True, fetch the source language, otherwise fetch the target language + :return: The language code + """ + xliff = lxml.etree.parse(xliffPath) + xliffRoot = xliff.getroot() + if xliffRoot.tag != "{urn:oasis:names:tc:xliff:document:2.0}xliff": + raise ValueError(f"Not an xliff file: {xliffPath}") + lang = xliffRoot.get("srcLang" if source else "trgLang") + if lang is None: + print(f"Could not detect language for xliff file {xliffPath}, {source=}") + else: + print(f"Detected language {lang} for xliff file {xliffPath}, {source=}") + return lang + + +def stripXliff(xliffPath: str, outputPath: str, oldXliffPath: str | None = None): + """ + Remove prefilled, empty or corrupt target tags from an xliff file before upload to Crowdin. + Optionally also remove translations that already exist in an old xliff file. + This function also prints a message to the console stating the number of segments processed and the numbers of empty, corrupt, source and existing translations removed. + :param xliffPath: Path to the xliff file to be stripped + :param outputPath: Path to the resulting xliff file + :param oldXliffPath: Path to the old xliff file containing existing translations that should be also stripped. + """ + print(f"Creating stripped xliff at {outputPath} from {xliffPath}") + namespace = {"xliff": "urn:oasis:names:tc:xliff:document:2.0"} + xliff = lxml.etree.parse(xliffPath) + xliffRoot = xliff.getroot() + if xliffRoot.tag != "{urn:oasis:names:tc:xliff:document:2.0}xliff": + raise ValueError(f"Not an xliff file: {xliffPath}") + skeletonNode = xliffRoot.find("./xliff:file/xliff:skeleton", namespaces=namespace) + if skeletonNode is not None: + skeletonNode.getparent().remove(skeletonNode) + if oldXliffPath: + oldXliff = lxml.etree.parse(oldXliffPath) + oldXliffRoot = oldXliff.getroot() + if oldXliffRoot.tag != "{urn:oasis:names:tc:xliff:document:2.0}xliff": + raise ValueError(f"Not an xliff file: {oldXliffPath}") + else: + oldXliffRoot = None + file = xliffRoot.find("./xliff:file", namespaces=namespace) + units = file.findall("./xliff:unit", namespaces=namespace) + segmentCount = 0 + emptyTargetCount = 0 + corruptTargetcount = 0 + sourceTargetcount = 0 + existingTranslationCount = 0 + for unit in units: + notes = unit.find("./xliff:notes", namespaces=namespace) + if notes is not None: + unit.remove(notes) + segment = unit.find("./xliff:segment", namespaces=namespace) + if segment is None: + print("Warning: No segment element in unit") + continue + state = segment.get("state") + source = segment.find("./xliff:source", namespaces=namespace) + if source is None: + print("Warning: No source element in segment") + continue + sourceText = source.text + segmentCount += 1 + target = segment.find("./xliff:target", namespaces=namespace) + if target is None: + continue + targetText = target.text + # remove empty / self-closing target tags + if not targetText: + emptyTargetCount += 1 + file.remove(unit) + # remove corrupt target tags + elif targetText in ( + "", + "<target/>", + "", + "<target></target>", + ): + corruptTargetcount += 1 + file.remove(unit) + # remove target tags pre-filled with source text + elif (not state or state == "initial") and targetText == sourceText: + sourceTargetcount += 1 + file.remove(unit) + # remove translations that already exist in the old xliff file + elif oldXliffRoot is not None: + unitId = unit.get("id") + oldTarget = oldXliffRoot.find( + f'./xliff:file/xliff:unit[@id="{unitId}"]/xliff:segment/xliff:target', + namespaces=namespace, + ) + if oldTarget is not None and oldTarget.text == targetText: + existingTranslationCount += 1 + file.remove(unit) + xliff.write(outputPath) + keptTranslations = ( + segmentCount - emptyTargetCount - corruptTargetcount - sourceTargetcount - existingTranslationCount + ) + print( + f"Processed {segmentCount} segments, removing {emptyTargetCount} empty targets, {corruptTargetcount} corrupt targets, {sourceTargetcount} source targets, and {existingTranslationCount} existing translations, resulting in {keptTranslations} translations kept", + ) + + +if __name__ == "__main__": + args = argparse.ArgumentParser() + commands = args.add_subparsers(title="commands", dest="command", required=True) + command_xliff2md = commands.add_parser("xliff2md", help="Convert xliff to markdown") + command_xliff2md.add_argument( + "-u", + "--untranslated", + help="Produce the untranslated markdown file", + action="store_true", + default=False, + ) + command_xliff2md.add_argument("xliffPath", help="Path to the xliff file") + command_xliff2md.add_argument("mdPath", help="Path to the resulting markdown file") + command_md2html = commands.add_parser("md2html", help="Convert markdown to html") + command_md2html.add_argument("-l", "--lang", help="Language code", action="store", default="en") + command_md2html.add_argument( + "-t", + "--docType", + help="Type of document", + action="store", + choices=["userGuide", "developerGuide", "changes", "keyCommands"], + ) + command_md2html.add_argument("mdPath", help="Path to the markdown file") + command_md2html.add_argument("htmlPath", help="Path to the resulting html file") + command_xliff2html = commands.add_parser("xliff2html", help="Convert xliff to html") + command_xliff2html.add_argument("-l", "--lang", help="Language code", action="store", required=False) + command_xliff2html.add_argument( + "-t", + "--docType", + help="Type of document", + action="store", + choices=["userGuide", "developerGuide", "changes", "keyCommands"], + ) + command_xliff2html.add_argument( + "-u", + "--untranslated", + help="Produce the untranslated markdown file", + action="store_true", + default=False, + ) + command_xliff2html.add_argument("xliffPath", help="Path to the xliff file") + command_xliff2html.add_argument("htmlPath", help="Path to the resulting html file") + command_stripXliff = commands.add_parser( + "stripXliff", + help="Remove prefilled, empty or corrupt target tags from an xliff file before upload to Crowdin. Optionally also remove translations that already exist in an old xliff file", + ) + command_stripXliff.add_argument( + "-o", + "--oldXliffPath", + help="Path to the old xliff file containing existing translations that should be stripped", + action="store", + default=None, + ) + command_stripXliff.add_argument("xliffPath", help="Path to the xliff file") + command_stripXliff.add_argument("outputPath", help="Path to the resulting xliff file") + args = args.parse_args() + match args.command: + case "xliff2md": + markdownTranslate.generateMarkdown( + xliffPath=args.xliffPath, + outputPath=args.mdPath, + translated=not args.untranslated, + ) + case "md2html": + md2html.main(source=args.mdPath, dest=args.htmlPath, lang=args.lang, docType=args.docType) + case "xliff2html": + lang = args.lang or fetchLanguageFromXliff(args.xliffPath, source=args.untranslated) + temp_mdFile = tempfile.NamedTemporaryFile(suffix=".md", delete=False, mode="w", encoding="utf-8") + temp_mdFile.close() + try: + markdownTranslate.generateMarkdown( + xliffPath=args.xliffPath, + outputPath=temp_mdFile.name, + translated=not args.untranslated, + ) + md2html.main(source=temp_mdFile.name, dest=args.htmlPath, lang=lang, docType=args.docType) + finally: + os.remove(temp_mdFile.name) + case "stripXliff": + stripXliff(args.xliffPath, args.outputPath, args.oldXliffPath) + case _: + raise ValueError(f"Unknown command {args.command}")