Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
lvhan028 committed Nov 10, 2023
1 parent 619f543 commit 4a11a23
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 54 deletions.
14 changes: 3 additions & 11 deletions lmdeploy/serve/turbomind/triton_models/preprocessing/1/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@ def initialize(self, args):
self.model_config = model_config = json.loads(args['model_config'])

# Parse model output configs and convert Triton types to numpy types
input_names = [
'INPUT_ID', 'REQUEST_INPUT_LEN', 'BAD_WORDS_IDS', 'STOP_WORDS_IDS'
]
input_names = ['INPUT_ID', 'REQUEST_INPUT_LEN']
for input_name in input_names:
setattr(
self,
Expand Down Expand Up @@ -89,8 +87,6 @@ def execute(self, requests):
# Get input tensors
query = pb_utils.get_input_tensor_by_name(request,
'QUERY').as_numpy()
request_output_len = pb_utils.get_input_tensor_by_name(
request, 'REQUEST_OUTPUT_LEN').as_numpy()

# Preprocessing input data.
input_id, request_input_len = self._create_request(query)
Expand All @@ -104,8 +100,6 @@ def execute(self, requests):
'REQUEST_INPUT_LEN',
np.array(request_input_len).astype(
self.request_input_len_dtype))
request_output_len_tensor = pb_utils.Tensor(
'REQUEST_OUTPUT_LEN', request_output_len)

# Create InferenceResponse. You can set an error here in case
# there was a problem with handling this inference request.
Expand All @@ -114,10 +108,8 @@ def execute(self, requests):
#
# pb_utils.InferenceResponse(
# output_tensors=..., TritonError("An error occurred"))
inference_response = pb_utils.InferenceResponse(output_tensors=[
input_id_tensor, request_input_len_tensor,
request_output_len_tensor
])
inference_response = pb_utils.InferenceResponse(
output_tensors=[input_id_tensor, request_input_len_tensor])
responses.append(inference_response)

# You should return a list of pb_utils.InferenceResponse. Length
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,53 +7,16 @@ input [
name: "QUERY"
data_type: TYPE_STRING
dims: [ -1 ]
},
{
name: "BAD_WORDS_DICT"
data_type: TYPE_STRING
dims: [ -1 ]
optional: true
},
{
name: "STOP_WORDS_DICT"
data_type: TYPE_STRING
dims: [ -1 ]
optional: true
},
{
name: "REQUEST_OUTPUT_LEN"
data_type: TYPE_UINT32
dims: [ -1 ]
}
]
output [
{
name: "INPUT_ID"
data_type: TYPE_UINT32
dims: [ -1 ]
},
{
name: "REQUEST_INPUT_LEN"
data_type: TYPE_UINT32
dims: [ 1 ]
},
{
name: "BAD_WORDS_IDS"
data_type: TYPE_INT32
dims: [ 2, -1 ]
},
{
name: "STOP_WORDS_IDS"
data_type: TYPE_INT32
dims: [ 2, -1 ]
},
{
name: "REQUEST_OUTPUT_LEN"
data_type: TYPE_UINT32
dims: [ -1 ]
},
{
name: "PROMPT_LEARNING_TASK_NAME_IDS"
name: "REQUEST_INPUT_LEN"
data_type: TYPE_UINT32
dims: [ 1 ]
}
Expand Down
6 changes: 1 addition & 5 deletions lmdeploy/serve/turbomind/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,7 @@ def infer(self, prompts: Union[str, List[str]]) -> tuple:
f'{type(prompts)}'

input0_data = np.array(input0).astype(object)
output0_len = np.ones_like(input0).astype(np.uint32)
inputs = [
prepare_tensor('QUERY', input0_data),
prepare_tensor('REQUEST_OUTPUT_LEN', output0_len)
]
inputs = [prepare_tensor('QUERY', input0_data)]

with grpcclient.InferenceServerClient(self.tritonserver_addr) as \
client:
Expand Down

0 comments on commit 4a11a23

Please sign in to comment.