Skip to content

Commit 271f3c1

Browse files
linjiecccsijunhe
andauthored
Fix uie predictor (PaddlePaddle#4081)
* fix input names for uie * update predictor Co-authored-by: Sijun He <[email protected]>
1 parent cc085b5 commit 271f3c1

File tree

4 files changed

+37
-17
lines changed

4 files changed

+37
-17
lines changed

model_zoo/uie/README.md

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -915,19 +915,33 @@ paddlenlp server server:app --host 0.0.0.0 --port 8989
915915
python deploy/python/infer_cpu.py --model_path_prefix ${finetuned_model}/model
916916
```
917917

918+
部署UIE-M模型
919+
920+
```shell
921+
python deploy/python/infer_cpu.py --model_path_prefix ${finetuned_model}/model --multilingual
922+
```
923+
924+
918925
可配置参数说明:
919926

920927
- `model_path_prefix`: 用于推理的Paddle模型文件路径,需加上文件前缀名称。例如模型文件路径为`./export/model.pdiparams`,则传入`./export/model`
921928
- `position_prob`:模型对于span的起始位置/终止位置的结果概率 0~1 之间,返回结果去掉小于这个阈值的结果,默认为 0.5,span 的最终概率输出为起始位置概率和终止位置概率的乘积。
922929
- `max_seq_len`: 文本最大切分长度,输入超过最大长度时会对输入文本进行自动切分,默认为 512。
923930
- `batch_size`: 批处理大小,请结合机器情况进行调整,默认为 4。
931+
- `multilingual`:是否是跨语言模型,用 "uie-m-base", "uie-m-large" 等模型进微调得到的模型是多语言模型,需要设置为 True;默认为 False。
924932

925933
- GPU端推理样例
926934

927935
在GPU端,请使用如下命令进行部署
928936

929937
```shell
930-
python deploy/python/infer_gpu.py --model_path_prefix export/model --use_fp16 --device_id 0
938+
python deploy/python/infer_gpu.py --model_path_prefix ${finetuned_model}/model --use_fp16 --device_id 0
939+
```
940+
941+
部署UIE-M模型
942+
943+
```shell
944+
python deploy/python/infer_gpu.py --model_path_prefix ${finetuned_model}/model --use_fp16 --device_id 0 --multilingual
931945
```
932946

933947
可配置参数说明:
@@ -938,6 +952,7 @@ paddlenlp server server:app --host 0.0.0.0 --port 8989
938952
- `max_seq_len`: 文本最大切分长度,输入超过最大长度时会对输入文本进行自动切分,默认为 512。
939953
- `batch_size`: 批处理大小,请结合机器情况进行调整,默认为 4。
940954
- `device_id`: GPU 设备 ID,默认为 0。
955+
- `multilingual`:是否是跨语言模型,用 "uie-m-base", "uie-m-large" 等模型进微调得到的模型是多语言模型,需要设置为 True;默认为 False。
941956

942957
<a name="CCKS比赛"></a>
943958

model_zoo/uie/deploy/python/infer_cpu.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,8 @@
1313
# limitations under the License.
1414

1515
import argparse
16-
import math
1716
from pprint import pprint
1817

19-
import paddle
2018
from uie_predictor import UIEPredictor
2119

2220

@@ -35,6 +33,7 @@ def parse_args():
3533
type=float,
3634
help="Probability threshold for start/end index probabiliry.",
3735
)
36+
parser.add_argument("--multilingual", action="store_true", help="Whether is the multilingual model.")
3837
parser.add_argument(
3938
"--max_seq_len",
4039
default=512,

model_zoo/uie/deploy/python/infer_gpu.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import argparse
1616
from pprint import pprint
1717

18-
import paddle
1918
from uie_predictor import UIEPredictor
2019

2120

@@ -39,6 +38,7 @@ def parse_args():
3938
action="store_true",
4039
help="Whether to use fp16 inference, only takes effect when deploying on gpu.",
4140
)
41+
parser.add_argument("--multilingual", action="store_true", help="Whether is the multilingual model.")
4242
parser.add_argument(
4343
"--max_seq_len",
4444
default=512,

model_zoo/uie/deploy/python/uie_predictor.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,13 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import six
16-
import os
1715
import math
18-
import numpy as np
19-
import onnxruntime as ort
16+
import os
17+
import re
2018

21-
import paddle
19+
import onnxruntime as ort
2220
import paddle2onnx
21+
import six
2322

2423
from paddlenlp.transformers import AutoTokenizer
2524
from paddlenlp.utils.tools import get_bool_ids_greater_than, get_span
@@ -45,8 +44,8 @@ def __init__(self, model_path_prefix, device="cpu", use_fp16=False, device_id=0)
4544
print(">>> [InferBackend] Use GPU to inference ...")
4645
if use_fp16:
4746
print(">>> [InferBackend] Use FP16 to inference ...")
48-
from onnxconverter_common import float16
4947
import onnx
48+
from onnxconverter_common import float16
5049

5150
fp16_model_file = os.path.join(infer_model_dir, "fp16_model.onnx")
5251
onnx_model = onnx.load_model(float_onnx_file)
@@ -62,7 +61,7 @@ def __init__(self, model_path_prefix, device="cpu", use_fp16=False, device_id=0)
6261
self.predictor = ort.InferenceSession(onnx_model, sess_options=sess_options, providers=providers)
6362
if device == "gpu":
6463
assert "CUDAExecutionProvider" in self.predictor.get_providers(), (
65-
f"The environment for GPU inference is not set properly. "
64+
"The environment for GPU inference is not set properly. "
6665
"A possible cause is that you had installed both onnxruntime and onnxruntime-gpu. "
6766
"Please run the following commands to reinstall: \n "
6867
"1) pip uninstall -y onnxruntime onnxruntime-gpu \n 2) pip install onnxruntime-gpu"
@@ -87,6 +86,7 @@ def __init__(self, args):
8786
self._position_prob = args.position_prob
8887
self._max_seq_len = args.max_seq_len
8988
self._batch_size = args.batch_size
89+
self._multilingual = args.multilingual
9090
self._schema_tree = None
9191
self.set_schema(args.schema)
9292
if args.device == "cpu":
@@ -167,12 +167,18 @@ def _single_stage_predict(self, inputs):
167167
end_probs = []
168168
for idx in range(0, len(texts), self._batch_size):
169169
l, r = idx, idx + self._batch_size
170-
input_dict = {
171-
"input_ids": encoded_inputs["input_ids"][l:r].astype("int64"),
172-
"token_type_ids": encoded_inputs["token_type_ids"][l:r].astype("int64"),
173-
"pos_ids": encoded_inputs["position_ids"][l:r].astype("int64"),
174-
"att_mask": encoded_inputs["attention_mask"][l:r].astype("int64"),
175-
}
170+
if self._multilingual:
171+
input_dict = {
172+
"input_ids": encoded_inputs["input_ids"][l:r].astype("int64"),
173+
"position_ids": encoded_inputs["position_ids"][l:r].astype("int64"),
174+
}
175+
else:
176+
input_dict = {
177+
"input_ids": encoded_inputs["input_ids"][l:r].astype("int64"),
178+
"token_type_ids": encoded_inputs["token_type_ids"][l:r].astype("int64"),
179+
"position_ids": encoded_inputs["position_ids"][l:r].astype("int64"),
180+
"attention_mask": encoded_inputs["attention_mask"][l:r].astype("int64"),
181+
}
176182
start_prob, end_prob = self._infer(input_dict)
177183
start_prob = start_prob.tolist()
178184
end_prob = end_prob.tolist()

0 commit comments

Comments
 (0)