Skip to content

Commit

Permalink
[Fix] Prefix cache only enables sliding window on leaf sequence (#2615)
Browse files Browse the repository at this point in the history
This PR updates the prefix cache to align the logic of enabling sliding window. Now only leaf sequence is enabled sliding window attention.
  • Loading branch information
cyx-6 committed Jul 2, 2024
1 parent 2d32094 commit 0fb5609
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 4 deletions.
10 changes: 8 additions & 2 deletions cpp/serve/engine_actions/eagle_new_request_prefill.cc
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,10 @@ class EagleNewRequestPrefillActionObj : public BatchPrefillBaseActionObj {
CHECK_EQ(result.reused_seq_pop_last_tokens, 0);
for (int i = 0; i < models_.size(); ++i) {
models_[i]->AddNewSequence(rsentry->mstates[0]->internal_id);
models_[i]->EnableSlidingWindowForSeq(rsentry->mstates[0]->internal_id);
// Enable sliding window for the sequence if it is not a parent.
if (rsentry->child_indices.empty()) {
models_[i]->EnableSlidingWindowForSeq(rsentry->mstates[0]->internal_id);
}
}
} else {
if (result.forked_seq_id != -1) {
Expand All @@ -435,7 +438,10 @@ class EagleNewRequestPrefillActionObj : public BatchPrefillBaseActionObj {
for (int i = 0; i < models_.size(); ++i) {
models_[i]->ForkSequence(result.forked_seq_id, rsentry->mstates[0]->internal_id,
result.prefilled_offset - 1);
models_[i]->EnableSlidingWindowForSeq(rsentry->mstates[0]->internal_id);
// Enable sliding window for the sequence if it is not a parent.
if (rsentry->child_indices.empty()) {
models_[i]->EnableSlidingWindowForSeq(rsentry->mstates[0]->internal_id);
}
}
} else {
// Reuse recycling sequence
Expand Down
10 changes: 8 additions & 2 deletions cpp/serve/engine_actions/new_request_prefill.cc
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,10 @@ class NewRequestPrefillActionObj : public BatchPrefillBaseActionObj {
CHECK_EQ(result.reused_seq_pop_last_tokens, 0);
for (Model model : models_) {
model->AddNewSequence(rsentry->mstates[0]->internal_id);
model->EnableSlidingWindowForSeq(rsentry->mstates[0]->internal_id);
// Enable sliding window for the sequence if it is not a parent.
if (rsentry->child_indices.empty()) {
model->EnableSlidingWindowForSeq(rsentry->mstates[0]->internal_id);
}
}
} else {
if (result.forked_seq_id != -1) {
Expand All @@ -282,7 +285,10 @@ class NewRequestPrefillActionObj : public BatchPrefillBaseActionObj {
for (Model model : models_) {
model->ForkSequence(result.forked_seq_id, rsentry->mstates[0]->internal_id,
result.prefilled_offset);
model->EnableSlidingWindowForSeq(rsentry->mstates[0]->internal_id);
// Enable sliding window for the sequence if it is not a parent.
if (rsentry->child_indices.empty()) {
model->EnableSlidingWindowForSeq(rsentry->mstates[0]->internal_id);
}
}
} else {
// Reuse recycling sequence
Expand Down

0 comments on commit 0fb5609

Please sign in to comment.