diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py index 1c8b0f1cadbaf5..613cf7bea4763a 100644 --- a/tests/generation/test_utils.py +++ b/tests/generation/test_utils.py @@ -2036,6 +2036,8 @@ def test_generate_with_dynamic_sliding_window_cache(self, left_padding: bool): config, _ = self.prepare_config_and_inputs_for_generate() if getattr(config, "sliding_window", None) is None: self.skipTest(reason="This model does not support sliding window.") + if "qwen2" in str(model_class).lower(): + self.skipTest(reason="Sliding window attention is not implemented for sdpa in Qwen2 models.") input_ids = ids_tensor((2, 7), vocab_size=config.vocab_size) if left_padding: @@ -2084,6 +2086,8 @@ def test_generate_continue_from_dynamic_sliding_window_cache(self, sliding_windo config, _ = self.prepare_config_and_inputs_for_generate() if getattr(config, "sliding_window", None) is None: self.skipTest(reason="This model does not support sliding window.") + if "qwen2" in str(model_class).lower(): + self.skipTest(reason="Sliding window attention is not implemented for sdpa in Qwen2 models.") # We need to be sure to always have shape (2, 7) for the different test assumptions to hold input_ids = ids_tensor((2, 7), vocab_size=config.vocab_size)