Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
lvhan028 committed Jul 22, 2023
1 parent 53f4559 commit 458d5e6
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 21 deletions.
9 changes: 4 additions & 5 deletions benchmark/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

We provide several profiling tools to benchmark our models.

## profiling with dataset
## profile with dataset

Download the dataset below or create your own dataset.

Expand All @@ -16,7 +16,6 @@ Profiling your model with `profile_throughput.py`
python profile_throughput.py \
ShareGPT_V3_unfiltered_cleaned_split.json \
/path/to/your/model \
${ModelType} \
--concurrency 64
```

Expand All @@ -27,7 +26,6 @@ python profile_throughput.py \
```bash
python profile_generation.py \
/path/to/your/model \
${ModelType} \
--concurrency 8 --input_seqlen 0 --output_seqlen 2048
```

Expand All @@ -36,10 +34,11 @@ python profile_generation.py \
Tools above profile models with Python API. `profile_serving.py` is used to do benchmark on serving.

```bash
wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json

python profile_serving.py \
${TritonServerAddress} \
${ModelName} \
/path/to/tokenizer \
/path/to/dataset \
ShareGPT_V3_unfiltered_cleaned_split.json \
--concurrency 64
```
28 changes: 12 additions & 16 deletions lmdeploy/serve/turbomind/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def stream_infer(self,
break
else:
yield status, res, tokens
if status.value >= 0:
if status.value == 0:
self._session.histories = \
self._session.histories + self._session.prompt + \
self._session.response
Expand Down Expand Up @@ -197,11 +197,11 @@ def end(self, session_id: int, *args, **kwargs):
request_output_len=0,
sequence_start=False,
sequence_end=True):
if status != StatusCode.TRITON_STREAM_END:
return status
if status.value < 0:
break

self.reset_session()
return StatusCode.TRITON_STREAM_END
return status

def cancel(self, session_id: int, *args, **kwargs):
"""Cancel the session during generating tokens.
Expand Down Expand Up @@ -244,7 +244,7 @@ def cancel(self, session_id: int, *args, **kwargs):
if status == StatusCode.TRITON_STREAM_END:
logger.info(f'cancel session {session_id} successfully')
if prev_session.histories:
logger.warn(f'TODO: start to recover session {session_id}')
logger.warning(f'TODO: start to recover session {session_id}')
else:
logger.info(f'cancel session {session_id} failed: {res}')
return status
Expand Down Expand Up @@ -285,7 +285,7 @@ def resume(self, session_id: int, *args, **kwargs):
sequence_start=True,
sequence_end=False):
if status.value < 0:
return status
break

self._session.histories = histories
return status
Expand Down Expand Up @@ -420,16 +420,12 @@ def _stream_infer(self,
request_output_len, sequence_start,
sequence_end, preseq_length, cancel))
producer.start()
for state, res, tokens in self.stream_consumer(self.postprocess, que,
session, input_tokens,
preseq_length, cancel,
logger, self.display,
self.profile_generation,
self.eos_id):
if state.value < 0:
yield state, res, 0
else:
yield state, res, tokens
for status, res, n_token in self.stream_consumer(
self.postprocess, que, session, input_tokens, preseq_length,
cancel, logger, self.display, self.profile_generation,
self.eos_id):
yield status, res, n_token

producer.join()
self._session = que.get()
curseq_length = self._session.sequence_length
Expand Down

0 comments on commit 458d5e6

Please sign in to comment.