Skip to content

Commit 478a5c1

Browse files
committed
Merge remote-tracking branch 'upstream/develop' into develop
2 parents 5e3d538 + bdfccac commit 478a5c1

File tree

149 files changed

+18534
-466
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

149 files changed

+18534
-466
lines changed

.github/workflows/deploy_docs.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name: Develop Docs
22
on:
33
push:
44
branches: #设置更新哪个分支会更新站点
5-
- release/3.2
5+
- release/3.3
66
permissions:
77
contents: write
88
jobs:
@@ -27,5 +27,5 @@ jobs:
2727
- run: pip install mike mkdocs-material jieba mkdocs-git-revision-date-localized-plugin mkdocs-git-committers-plugin-2 mkdocs-git-authors-plugin mkdocs-static-i18n mkdocs-minify-plugin
2828
- run: |
2929
git fetch origin gh-pages --depth=1
30-
mike deploy --push --update-aliases 3.2 latest
30+
mike deploy --push --update-aliases 3.3 latest
3131
mike set-default --push latest

.precommit/check_imports.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@
2424
from stdlib_list import stdlib_list
2525

2626
sys.path.append(str(pathlib.Path(__file__).parent.parent))
27-
from setup import DEP_SPECS, REQUIRED_DEPS
27+
from setup import REQUIRED_DEPS
2828

2929
# NOTE: We do not use `importlib.metadata.packages_distributions` here because
3030
# 1. It is supported only in Python 3.10+.
3131
# 2. It requires the packages to be installed, but we are doing a static check.
3232
MOD_TO_DEP = {
33-
"aistudio_sdk": "aistudio_sdk",
33+
"aistudio_sdk": "aistudio-sdk",
3434
"aiohttp": "aiohttp",
3535
"baidubce": "bce-python-sdk",
3636
"bs4": "beautifulsoup4",
@@ -43,9 +43,10 @@
4343
"fastapi": "fastapi",
4444
"filelock": "filelock",
4545
"filetype": "filetype",
46+
"flash_attn": "flash-attn",
4647
"ftfy": "ftfy",
4748
"GPUtil": "GPUtil",
48-
"huggingface_hub": "huggingface_hub",
49+
"huggingface_hub": "huggingface-hub",
4950
"imagesize": "imagesize",
5051
"jinja2": "Jinja2",
5152
"joblib": "joblib",
@@ -61,6 +62,7 @@
6162
"cv2": "opencv-contrib-python",
6263
"openpyxl": "openpyxl",
6364
"packaging": "packaging",
65+
"paddle2onnx": "paddle2onnx",
6466
"pandas": "pandas",
6567
"PIL": "pillow",
6668
"premailer": "premailer",
@@ -74,22 +76,28 @@
7476
"regex": "regex",
7577
"requests": "requests",
7678
"ruamel.yaml": "ruamel.yaml",
79+
"safetensors": "safetensors",
7780
"skimage": "scikit-image",
7881
"sklearn": "scikit-learn",
82+
"sentencepiece": "sentencepiece",
83+
"sglang": "sglang",
7984
"shapely": "shapely",
8085
"soundfile": "soundfile",
8186
"starlette": "starlette",
8287
"tiktoken": "tiktoken",
8388
"tokenizers": "tokenizers",
89+
"torch": "torch",
8490
"tqdm": "tqdm",
91+
"transformers": "transformers",
8592
"typing_extensions": "typing-extensions",
8693
"ujson": "ujson",
8794
"uvicorn": "uvicorn",
95+
"uvloop": "uvloop",
96+
"vllm": "vllm",
97+
"xformers": "xformers",
8898
"yarl": "yarl",
99+
"bidi": "python-bidi",
89100
}
90-
assert (
91-
set(MOD_TO_DEP.values()) == DEP_SPECS.keys()
92-
), f"`MOD_TO_DEP` should be updated to match `DEP_SPECS`. Symmetric difference: {set(MOD_TO_DEP.values()) ^ DEP_SPECS.keys()}"
93101
MOD_PATTERN = re.compile(
94102
rf"^(?:{'|'.join([re.escape(mod) for mod in MOD_TO_DEP])})(?=\.|$)"
95103
)
@@ -107,7 +115,11 @@
107115
"paddle3d",
108116
"paddlevideo",
109117
}
110-
MANUALLY_MANAGED_OPTIONAL_HEAVY_MODS = {"paddle_custom_device", "ultra_infer"}
118+
MANUALLY_MANAGED_OPTIONAL_HEAVY_MODS = {
119+
"paddle_custom_device",
120+
"ultra_infer",
121+
"fastdeploy",
122+
}
111123

112124

113125
def check(file_path):

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ PaddleX 3.0 是基于飞桨框架构建的低代码开发工具,它集成了
3535

3636
## 📣 近期更新
3737

38+
🔥🔥 **2025.10.16,发布 PaddleX v3.3.0**,新增能力如下:
39+
40+
- **支持PaddleOCR-VL、PP-OCRv5多语种模型的推理部署能力。**
41+
3842
🔥🔥 **2025.8.20,发布 PaddleX v3.2.0**,新增能力如下:
3943

4044
- **部署能力升级:**

README_en.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ PaddleX 3.0 is a low-code development tool for AI models built on the PaddlePadd
3737

3838
## 📣 Recent Updates
3939

40+
🔥🔥 **2025.10.16, PaddleX v3.3.0 Released**
41+
42+
- **Added support for inference and deployment of PaddleOCR-VL and PP-OCRv5 multilingual models.**
43+
4044
🔥🔥 **2025.8.20, PaddleX v3.2.0 Released**
4145

4246
- **Deployment Capability Upgrades:**
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from paddlex import create_pipeline
16+
17+
pipeline = create_pipeline(pipeline="PaddleOCR-VL")
18+
19+
output = pipeline.predict(
20+
"/paddle/project/PaddleX/demo_paper.png",
21+
use_doc_orientation_classify=False,
22+
use_doc_unwarping=False,
23+
)
24+
25+
for res in output:
26+
res.print()
27+
res.save_to_img("./output")
28+
res.save_to_json("./output")
29+
res.save_to_xlsx("./output")
30+
res.save_to_html("./output")
31+
res.save_to_markdown("./output", pretty=False)

deploy/hps/sdk/pipelines/OCR/server/model_repo/ocr/1/model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ def run(self, input, log_id):
113113
text_det_box_thresh=input.textDetBoxThresh,
114114
text_det_unclip_ratio=input.textDetUnclipRatio,
115115
text_rec_score_thresh=input.textRecScoreThresh,
116+
return_word_box=input.returnWordBox,
116117
)
117118
)
118119

deploy/hps/sdk/pipelines/PP-StructureV3/server/pipeline_config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use_table_recognition: True
99
use_formula_recognition: True
1010
use_chart_recognition: False
1111
use_region_detection: True
12+
format_block_content: False
1213

1314
SubModules:
1415
LayoutDetection:
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
import argparse
18+
import sys
19+
from pathlib import Path
20+
21+
from paddlex_hps_client import triton_request, utils
22+
from tritonclient import grpc as triton_grpc
23+
24+
25+
def main():
26+
parser = argparse.ArgumentParser()
27+
parser.add_argument("--file", type=str, required=True)
28+
parser.add_argument("--file-type", type=int, choices=[0, 1])
29+
parser.add_argument("--no-visualization", action="store_true")
30+
parser.add_argument("--url", type=str, default="localhost:8001")
31+
32+
args = parser.parse_args()
33+
34+
client = triton_grpc.InferenceServerClient(args.url)
35+
input_ = {"file": utils.prepare_input_file(args.file)}
36+
if args.file_type is not None:
37+
input_["fileType"] = args.file_type
38+
if args.no_visualization:
39+
input_["visualize"] = False
40+
output = triton_request(client, "layout-parsing", input_)
41+
if output["errorCode"] != 0:
42+
print(f"Error code: {output['errorCode']}", file=sys.stderr)
43+
print(f"Error message: {output['errorMsg']}", file=sys.stderr)
44+
sys.exit(1)
45+
result = output["result"]
46+
for i, res in enumerate(result["layoutParsingResults"]):
47+
print(res["prunedResult"])
48+
md_dir = Path(f"markdown_{i}")
49+
md_dir.mkdir(exist_ok=True)
50+
(md_dir / "doc.md").write_text(res["markdown"]["text"])
51+
for img_path, img in res["markdown"]["images"].items():
52+
img_path = md_dir / img_path
53+
img_path.parent.mkdir(parents=True, exist_ok=True)
54+
utils.save_output_file(img, img_path)
55+
print(f"Markdown document saved at {md_dir / 'doc.md'}")
56+
for img_name, img in res["outputImages"].items():
57+
img_path = f"{img_name}_{i}.jpg"
58+
Path(img_path).parent.mkdir(exist_ok=True)
59+
utils.save_output_file(img, img_path)
60+
print(f"Output image saved at {img_path}")
61+
62+
63+
if __name__ == "__main__":
64+
main()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# paddlex-hps-client
2+
protobuf == 3.19.6
3+
tritonclient [grpc] == 2.15

0 commit comments

Comments
 (0)