diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index d55399a951c9c5..840bdac4668815 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -4711,6 +4711,7 @@ def test_custom_4d_attention_mask(self): normalized_1 = F.softmax(out_shared_prefix_last_tokens) torch.testing.assert_close(normalized_0, normalized_1, rtol=1e-3, atol=1e-4) + @is_flaky(max_attempts=10) # TODO @raushan: this test is VERY flaky on some VLMs, like paligemma def test_static_cache_matches_dynamic(self): """ Tests that generating with static cache give almost same results as with dynamic cache. @@ -4747,7 +4748,7 @@ def test_static_cache_matches_dynamic(self): output_logits=True, return_dict_in_generate=True, ) - self.assertTrue(torch.allclose(dynamic_out.logits[0], static_out.logits[0], rtol=1e-3, atol=1e-3)) + self.assertTrue(torch.allclose(dynamic_out.logits[0], static_out.logits[0], rtol=1e-3, atol=1e-4)) # For now, Let's focus only on GPU for `torch.compile` @slow