diff --git a/vllm/model_executor/guided_decoding/xgrammar_decoding.py b/vllm/model_executor/guided_decoding/xgrammar_decoding.py index 80e88dd5b4b37..379fed799ab3c 100644 --- a/vllm/model_executor/guided_decoding/xgrammar_decoding.py +++ b/vllm/model_executor/guided_decoding/xgrammar_decoding.py @@ -229,6 +229,7 @@ def __call__(self, input_ids: list[int], scores: torch.Tensor) -> torch.Tensor: if self.ctx is None: self._ensure_ctx() + assert self.ctx is not None if len(self.matchers) == 0: self.matchers = [ @@ -243,6 +244,9 @@ def __call__(self, input_ids: list[int], else: for i, matcher in enumerate(self.matchers): if not matcher.is_terminated(): + if input_ids[ + -1] in self.ctx.tokenizer_info.special_token_ids: + continue sampled_token = input_ids[-1] assert self.matchers[i].accept_token(sampled_token)