Skip to content

Commit

Permalink
Update translator of OP doc building. (#554)
Browse files Browse the repository at this point in the history
* * use translator from alibaba to translate the doc, which is faster and more stable

* * update all beta to stable

* Typo Fix

* Typo Fix

* * fix typo

---------

Co-authored-by: Daoyuan Chen <[email protected]>
  • Loading branch information
HYLcool and yxdyc authored Jan 17, 2025
1 parent 0815c29 commit 80d0b27
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 170 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ repos:
language: python
require_serial: true
additional_dependencies:
- googletrans==4.0.2
- translators==5.9.3

exclude: |
(?x)^(
Expand Down
27 changes: 11 additions & 16 deletions .pre-commit-hooks/build_op_doc.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import ast
import asyncio
import json
import os
import re
from typing import Any, List

from googletrans import Translator
import translators as ts

DOC_PATH = 'docs/Operators.md'

Expand Down Expand Up @@ -152,7 +151,7 @@ def analyze_resource_tag(code):
def analyze_model_tags(code):
"""
Analyze the model tag for the given code content string. SHOULD be one of
the "Modal Tags" in `tagging_mappings.json`. It makes the choice by finding
the "Model Tags" in `tagging_mappings.json`. It makes the choice by finding
the `model_type` arg in `prepare_model` method invocation.
"""
pattern = r'model_type=[\'|\"](.*?)[\'|\"]'
Expand Down Expand Up @@ -431,20 +430,16 @@ def generate_op_table_section(op_type, op_record_list):
return '\n\n'.join(doc)


async def translate_text(text, dest='zh'):
async with Translator() as translator:
res = await translator.translate(text, src='en', dest=dest)
return res


def get_op_desc_in_en_zh_batched(descs):
zhs = asyncio.run(translate_text(descs, dest='zh'))
return [desc + ' ' + zh.text for desc, zh in zip(descs, zhs)]


def get_op_desc_in_en_zh(desc):
zh = asyncio.run(translate_text(desc, dest='zh')).text
return desc + ' ' + zh
separator = '\n'
batch = separator.join(descs)
res = ts.translate_text(batch,
translator='alibaba',
from_language='en',
to_language='zh')
zhs = res.split(separator)
assert len(zhs) == len(descs)
return [desc + ' ' + zh.strip() for desc, zh in zip(descs, zhs)]


def parse_op_record_from_current_doc():
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-hooks/tag_mappings.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"desc": "stable version OP. Based on the beta version, OP optimizations related to DJ (e.g. model management, batched processing, OP fusion, ...) are added to this OP. 表示 stable 版本算子。基于 beta 版本,完善了DJ相关的算子优化项(如模型管理,批处理,算子融合等)。"
}
},
"Modal Tags": {
"Model Tags": {
"api": {
"icon": "🔗API",
"desc": "equipped with API-based models. (e.g. ChatGPT, GPT-4o). 支持基于 API 调用模型(如 ChatGPT,GPT-4o)。"
Expand Down
Loading

0 comments on commit 80d0b27

Please sign in to comment.