Fix uie predictor (PaddlePaddle#4081)

linjieccc · sijunhe · web-flow · commit 271f3c15cd2a · 2022-12-16T12:10:28.000+08:00
* fix input names for uie

* update predictor

Co-authored-by: Sijun He &lt;sijun.he@hotmail.com&gt;
diff --git a/model_zoo/uie/README.md b/model_zoo/uie/README.md
@@ -915,19 +915,33 @@ paddlenlp server server:app --host 0.0.0.0 --port 8989
     python deploy/python/infer_cpu.py --model_path_prefix ${finetuned_model}/model
     ```
 
+    部署UIE-M模型
+
+    ```shell
+    python deploy/python/infer_cpu.py --model_path_prefix ${finetuned_model}/model --multilingual
+    ```
+
+
     可配置参数说明：
 
     - `model_path_prefix`: 用于推理的Paddle模型文件路径，需加上文件前缀名称。例如模型文件路径为`./export/model.pdiparams`，则传入`./export/model`。
     - `position_prob`：模型对于span的起始位置/终止位置的结果概率 0~1 之间，返回结果去掉小于这个阈值的结果，默认为 0.5，span 的最终概率输出为起始位置概率和终止位置概率的乘积。
     - `max_seq_len`: 文本最大切分长度，输入超过最大长度时会对输入文本进行自动切分，默认为 512。
     - `batch_size`: 批处理大小，请结合机器情况进行调整，默认为 4。
+    - `multilingual`：是否是跨语言模型，用 "uie-m-base", "uie-m-large" 等模型进微调得到的模型是多语言模型，需要设置为 True；默认为 False。
 
   - GPU端推理样例
 
     在GPU端，请使用如下命令进行部署
 
     ```shell
-    python deploy/python/infer_gpu.py --model_path_prefix export/model --use_fp16 --device_id 0
+    python deploy/python/infer_gpu.py --model_path_prefix ${finetuned_model}/model --use_fp16 --device_id 0
+    ```
+
+    部署UIE-M模型
+
+    ```shell
+    python deploy/python/infer_gpu.py --model_path_prefix ${finetuned_model}/model --use_fp16 --device_id 0 --multilingual
     ```
 
     可配置参数说明：
@@ -938,6 +952,7 @@ paddlenlp server server:app --host 0.0.0.0 --port 8989
     - `max_seq_len`: 文本最大切分长度，输入超过最大长度时会对输入文本进行自动切分，默认为 512。
     - `batch_size`: 批处理大小，请结合机器情况进行调整，默认为 4。
     - `device_id`: GPU 设备 ID，默认为 0。
+    - `multilingual`：是否是跨语言模型，用 "uie-m-base", "uie-m-large" 等模型进微调得到的模型是多语言模型，需要设置为 True；默认为 False。
 
 <a name="CCKS比赛"></a>
 
diff --git a/model_zoo/uie/deploy/python/infer_cpu.py b/model_zoo/uie/deploy/python/infer_cpu.py
@@ -13,10 +13,8 @@
 # limitations under the License.
 
 import argparse
-import math
 from pprint import pprint
 
-import paddle
 from uie_predictor import UIEPredictor
 
 
@@ -35,6 +33,7 @@ def parse_args():
         type=float,
         help="Probability threshold for start/end index probabiliry.",
     )
+    parser.add_argument("--multilingual", action="store_true", help="Whether is the multilingual model.")
     parser.add_argument(
         "--max_seq_len",
         default=512,
diff --git a/model_zoo/uie/deploy/python/infer_gpu.py b/model_zoo/uie/deploy/python/infer_gpu.py
@@ -15,7 +15,6 @@
 import argparse
 from pprint import pprint
 
-import paddle
 from uie_predictor import UIEPredictor
 
 
@@ -39,6 +38,7 @@ def parse_args():
         action="store_true",
         help="Whether to use fp16 inference, only takes effect when deploying on gpu.",
     )
+    parser.add_argument("--multilingual", action="store_true", help="Whether is the multilingual model.")
     parser.add_argument(
         "--max_seq_len",
         default=512,
diff --git a/model_zoo/uie/deploy/python/uie_predictor.py b/model_zoo/uie/deploy/python/uie_predictor.py
@@ -12,14 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import six
-import os
 import math
-import numpy as np
-import onnxruntime as ort
+import os
+import re
 
-import paddle
+import onnxruntime as ort
 import paddle2onnx
+import six
 
 from paddlenlp.transformers import AutoTokenizer
 from paddlenlp.utils.tools import get_bool_ids_greater_than, get_span
@@ -45,8 +44,8 @@ def __init__(self, model_path_prefix, device="cpu", use_fp16=False, device_id=0)
             print(">>> [InferBackend] Use GPU to inference ...")
             if use_fp16:
                 print(">>> [InferBackend] Use FP16 to inference ...")
-                from onnxconverter_common import float16
                 import onnx
+                from onnxconverter_common import float16
 
                 fp16_model_file = os.path.join(infer_model_dir, "fp16_model.onnx")
                 onnx_model = onnx.load_model(float_onnx_file)
@@ -62,7 +61,7 @@ def __init__(self, model_path_prefix, device="cpu", use_fp16=False, device_id=0)
         self.predictor = ort.InferenceSession(onnx_model, sess_options=sess_options, providers=providers)
         if device == "gpu":
             assert "CUDAExecutionProvider" in self.predictor.get_providers(), (
-                f"The environment for GPU inference is not set properly. "
+                "The environment for GPU inference is not set properly. "
                 "A possible cause is that you had installed both onnxruntime and onnxruntime-gpu. "
                 "Please run the following commands to reinstall: \n "
                 "1) pip uninstall -y onnxruntime onnxruntime-gpu \n 2) pip install onnxruntime-gpu"
@@ -87,6 +86,7 @@ def __init__(self, args):
         self._position_prob = args.position_prob
         self._max_seq_len = args.max_seq_len
         self._batch_size = args.batch_size
+        self._multilingual = args.multilingual
         self._schema_tree = None
         self.set_schema(args.schema)
         if args.device == "cpu":
@@ -167,12 +167,18 @@ def _single_stage_predict(self, inputs):
         end_probs = []
         for idx in range(0, len(texts), self._batch_size):
             l, r = idx, idx + self._batch_size
-            input_dict = {
-                "input_ids": encoded_inputs["input_ids"][l:r].astype("int64"),
-                "token_type_ids": encoded_inputs["token_type_ids"][l:r].astype("int64"),
-                "pos_ids": encoded_inputs["position_ids"][l:r].astype("int64"),
-                "att_mask": encoded_inputs["attention_mask"][l:r].astype("int64"),
-            }
+            if self._multilingual:
+                input_dict = {
+                    "input_ids": encoded_inputs["input_ids"][l:r].astype("int64"),
+                    "position_ids": encoded_inputs["position_ids"][l:r].astype("int64"),
+                }
+            else:
+                input_dict = {
+                    "input_ids": encoded_inputs["input_ids"][l:r].astype("int64"),
+                    "token_type_ids": encoded_inputs["token_type_ids"][l:r].astype("int64"),
+                    "position_ids": encoded_inputs["position_ids"][l:r].astype("int64"),
+                    "attention_mask": encoded_inputs["attention_mask"][l:r].astype("int64"),
+                }
             start_prob, end_prob = self._infer(input_dict)
             start_prob = start_prob.tolist()
             end_prob = end_prob.tolist()