Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX] Update EOS from config #2475

Merged
merged 5 commits into from
Dec 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion python/sglang/srt/configs/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
import json
import logging
from enum import IntEnum, auto
from typing import List, Optional, Union
from functools import lru_cache
from typing import List, Optional, Set, Union

import torch
from transformers import PretrainedConfig
Expand Down Expand Up @@ -264,6 +265,14 @@ def _verify_quantization(self) -> None:
self.quantization,
)

@lru_cache()
def get_hf_eos_token_id(self) -> Optional[Set[int]]:
eos_ids = getattr(self.hf_config, "eos_token_id", None)
if eos_ids:
# it can be either int or list of int
eos_ids = {eos_ids} if isinstance(eos_ids, int) else set(eos_ids)
return eos_ids


def get_hf_text_config(config: PretrainedConfig):
"""Get the "sub" config relevant to llm for multi modal models.
Expand Down
33 changes: 20 additions & 13 deletions python/sglang/srt/managers/schedule_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

import dataclasses
import logging
from typing import List, Optional, Tuple, Union
from typing import List, Optional, Set, Tuple, Union

import numpy as np
import torch
Expand Down Expand Up @@ -209,6 +209,7 @@ def __init__(
lora_path: Optional[str] = None,
input_embeds: Optional[List[List[float]]] = None,
session_id: Optional[str] = None,
eos_token_ids: Optional[Set[int]] = None,
):
# Input and output info
self.rid = rid
Expand Down Expand Up @@ -236,6 +237,7 @@ def __init__(
self.finished_reason = None
self.to_abort = False
self.stream = stream
self.eos_token_ids = eos_token_ids

# For incremental decoding
# ----- | --------- read_ids -------|
Expand Down Expand Up @@ -395,18 +397,23 @@ def check_finished(self):

last_token_id = self.output_ids[-1]

matched_eos = False

# Check stop token ids
if self.sampling_params.stop_token_ids:
matched_eos = last_token_id in self.sampling_params.stop_token_ids
if self.tokenizer is not None:
matched_eos |= last_token_id == self.tokenizer.eos_token_id
if self.tokenizer.additional_stop_token_ids:
matched_eos |= last_token_id in self.tokenizer.additional_stop_token_ids
if matched_eos and not self.sampling_params.ignore_eos:
self.finished_reason = FINISH_MATCHED_TOKEN(matched=last_token_id)
return
if not self.sampling_params.ignore_eos:
matched_eos = False

# Check stop token ids
if self.sampling_params.stop_token_ids:
matched_eos = last_token_id in self.sampling_params.stop_token_ids
if self.eos_token_ids:
matched_eos |= last_token_id in self.eos_token_ids
if self.tokenizer is not None:
matched_eos |= last_token_id == self.tokenizer.eos_token_id
if self.tokenizer.additional_stop_token_ids:
matched_eos |= (
last_token_id in self.tokenizer.additional_stop_token_ids
)
if matched_eos:
self.finished_reason = FINISH_MATCHED_TOKEN(matched=last_token_id)
return

# Check stop strings
if len(self.sampling_params.stop_strs) > 0:
Expand Down
1 change: 1 addition & 0 deletions python/sglang/srt/managers/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,7 @@ def handle_generate_request(
stream=recv_req.stream,
lora_path=recv_req.lora_path,
input_embeds=recv_req.input_embeds,
eos_token_ids=self.model_config.get_hf_eos_token_id(),
)
req.tokenizer = self.tokenizer

Expand Down
Loading