Skip to content

Commit fc7b8d1

Browse files
authored
[Performance] e2e overheads reduction: Small followup diff (vllm-project#7364)
1 parent 67abdbb commit fc7b8d1

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

vllm/core/block_manager_v1.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -336,9 +336,9 @@ def allocate(self, seq_group: SequenceGroup) -> None:
336336

337337
# Assign the self-attention block tables for each sequence.
338338
if len(wait_seqs) == 1:
339-
self.block_tables[wait_seqs[0].seq_id] = block_table
339+
self.block_tables[seq.seq_id] = block_table
340340
else:
341-
for seq in seq_group.get_seqs(status=SequenceStatus.WAITING):
341+
for seq in wait_seqs:
342342
self.block_tables[seq.seq_id] = block_table.copy()
343343

344344
# Allocate encoder sequence

vllm/sequence.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,9 @@ def get_unfinished_seqs(self) -> List[Sequence]:
655655
return [seq for seq in self.seqs if not seq.is_finished()]
656656

657657
def get_finished_seqs(self) -> List[Sequence]:
658+
if self.is_single_seq:
659+
return self.seqs if self.seqs[0].is_finished() else []
660+
658661
return [seq for seq in self.seqs if seq.is_finished()]
659662

660663
def update_num_computed_tokens(self, num_new_computed_tokens: int):

0 commit comments

Comments
 (0)