Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
ydshieh committed Nov 4, 2024
1 parent c443d8d commit 2afd90e
Show file tree
Hide file tree
Showing 19 changed files with 266 additions and 615 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def __init__(self, config):
mode = config.spatial_pool_mode
stride = config.spatial_pool_stride
out_channels = getattr(config, "spatial_pool_out_channels", config.vision_config.hidden_size)
self.image_size = config.vision_config.image_size // config.vision_config.patch_size**2
self.image_size = (config.vision_config.image_size // config.vision_config.patch_size) ** 2

if mode == "average":
self.pool = nn.AvgPool2d(kernel_size=stride, stride=stride)
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/unispeech/modeling_unispeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -1577,7 +1577,7 @@ def forward(
quantized_features, codevector_perplexity = self.quantizer(extract_features)

# project quantized features twice
quantized_features = self.project_q(quantized_features)
quantized_features = self.project_q(quantized_features.to(self.project_q.weight.dtype))
quantized_features = self.project_hid(quantized_features)

prob_replace_matrix = torch.empty(transformer_features.size(0), transformer_features.size(1)).fill_(
Expand Down
9 changes: 8 additions & 1 deletion tests/models/albert/test_modeling_albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@

import unittest

from parameterized import parameterized
from packaging import version

from transformers import AlbertConfig, AutoTokenizer, is_torch_available
from transformers.models.auto import get_values
from transformers.testing_utils import require_torch, slow, torch_device
from transformers.testing_utils import require_torch_sdpa, require_torch, slow, torch_device

from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask
Expand Down Expand Up @@ -288,6 +289,12 @@ def setUp(self):
self.model_tester = AlbertModelTester(self)
self.config_tester = ConfigTester(self, config_class=AlbertConfig, hidden_size=37)

@parameterized.expand([("float16",), ("bfloat16",), ("float32",)])
@require_torch_sdpa
@unittest.skip("Albert requires `head_mask` which is currently not done in this test.")
def test_eager_matches_sdpa_inference(self):
pass

def test_config(self):
self.config_tester.run_common_tests()

Expand Down
297 changes: 0 additions & 297 deletions tests/models/glm/test_modeling_glm.py

Large diffs are not rendered by default.

9 changes: 0 additions & 9 deletions tests/models/granite/test_modeling_granite.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,15 +445,6 @@ def test_use_flash_attention_2_true(self):
if not has_flash:
raise ValueError("The flash model should have flash attention layers")

@parameterized.expand([("float16",), ("bfloat16",), ("float32",)])
@require_torch_sdpa
@slow
def test_eager_matches_sdpa_inference(self, torch_dtype: str):
"""
skipping the test since mup is very flaky and gets consistently different outputs
"""
self.skipTest("skipping the test since mup is very flaky and gets consistently different outputs")


@require_torch_gpu
class GraniteIntegrationTest(unittest.TestCase):
Expand Down
9 changes: 0 additions & 9 deletions tests/models/granitemoe/test_modeling_granitemoe.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,15 +444,6 @@ def test_use_flash_attention_2_true(self):
if not has_flash:
raise ValueError("The flash model should have flash attention layers")

@parameterized.expand([("float16",), ("bfloat16",), ("float32",)])
@require_torch_sdpa
@slow
def test_eager_matches_sdpa_inference(self, torch_dtype: str):
"""
skipping the test since mup is very flaky and gets consistently different outputs
"""
self.skipTest("skipping the test since mup is very flaky and gets consistently different outputs")


@require_torch_gpu
class GraniteMoeIntegrationTest(unittest.TestCase):
Expand Down
15 changes: 13 additions & 2 deletions tests/models/idefics/test_modeling_idefics.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def __init__(
num_attention_heads=self.vision_num_attention_heads,
num_hidden_layers=self.vision_num_hidden_layers,
intermediate_size=self.vision_intermediate_size,
)
).to_dict()

self.perceiver_qk_layer_norms_perceiver = perceiver_qk_layer_norms_perceiver
self.perceiver_resampler_depth = perceiver_resampler_depth
Expand Down Expand Up @@ -316,7 +316,6 @@ def prepare_pixel_values(self):
return floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])

@require_torch_sdpa
@slow
@parameterized.expand([("float16",), ("bfloat16",), ("float32",)])
def test_eager_matches_sdpa_inference(self, torch_dtype: str):
self.skipTest(reason="Idefics has a hard requirement on SDPA, skipping this test")
Expand Down Expand Up @@ -349,6 +348,12 @@ def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):

return inputs_dict

@parameterized.expand([("float16",), ("bfloat16",), ("float32",)])
@require_torch_sdpa
@unittest.skip("Idefics requires both text and image inputs which is currently not done in this test.")
def test_eager_matches_sdpa_inference(self):
pass

def test_model_outputs_equivalence(self):
try:
orig = self.all_model_classes
Expand Down Expand Up @@ -598,6 +603,12 @@ def setUp(self):
)
self.config_tester = ConfigTester(self, config_class=IdeficsConfig, hidden_size=37)

@parameterized.expand([("float16",), ("bfloat16",), ("float32",)])
@require_torch_sdpa
@unittest.skip("Idefics requires both text and image inputs which is currently not done in this test.")
def test_eager_matches_sdpa_inference(self, torch_dtype):
pass

@pytest.mark.generate
def test_left_padding_compatibility(self):
"""Overwrite because IDEFICS needs image attention mask to be also padded"""
Expand Down
8 changes: 4 additions & 4 deletions tests/models/llava_next/test_modeling_llava_next.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def __init__(
},
is_training=True,
vision_config={
"image_size": 16,
"image_size": 8,
"patch_size": 4,
"num_channels": 3,
"is_training": True,
Expand Down Expand Up @@ -123,10 +123,10 @@ def __init__(
self.batch_size = 3
self.num_channels = 3
self.image_size = 30
self.encoder_seq_length = 95
self.image_grid_pinpoints = [[32, 32]]
self.num_image_tokens = 88
self.image_grid_pinpoints = [[16, 16]]
self.num_image_tokens = 24
self.seq_length = seq_length + self.num_image_tokens
self.encoder_seq_length = self.seq_length

def get_config(self):
return LlavaNextConfig(
Expand Down
11 changes: 6 additions & 5 deletions tests/models/llava_next_video/test_modeling_llava_next_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def __init__(
},
is_training=True,
vision_config={
"image_size": 16,
"image_size": 8,
"patch_size": 4,
"num_channels": 3,
"is_training": True,
Expand Down Expand Up @@ -125,11 +125,12 @@ def __init__(
self.batch_size = 3
self.num_channels = 3
self.image_size = 30
self.encoder_seq_length = 127
self.image_grid_pinpoints = [[32, 32]]
self.num_image_tokens = 88
self.num_video_tokens = 32

self.image_grid_pinpoints = [[16, 16]]
self.num_image_tokens = 24
self.num_video_tokens = 8
self.seq_length = seq_length + self.num_image_tokens + self.num_video_tokens
self.encoder_seq_length = self.seq_length

def get_config(self):
return LlavaNextVideoConfig(
Expand Down
82 changes: 32 additions & 50 deletions tests/models/mimi/test_modeling_mimi.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,8 +411,10 @@ def test_identity_shortcut(self):

@parameterized.expand([("float16",), ("bfloat16",), ("float32",)])
@require_torch_sdpa
@slow
def test_eager_matches_sdpa_inference(self, torch_dtype: str):
if torch_dtype == "float16" and torch_device == "cpu":
self.skipTest("`replication_pad1d` not implemented for 'Half")

if not self.has_attentions:
self.skipTest(reason="Model architecture does not support attentions")

Expand Down Expand Up @@ -513,7 +515,7 @@ def get_mean_reldiff(failcase, x, ref, atol, rtol):
can_output_attn = "output_attentions" in inspect.signature(model_sdpa.forward).parameters
if not (self.has_attentions and can_output_attn) and output_attentions:
continue
for batch_size in [1, 5]:
for batch_size in [7]:
dummy_input = inputs_dict[model.main_input_name]

if dummy_input.dtype in [torch.float32, torch.bfloat16, torch.float16]:
Expand Down Expand Up @@ -564,11 +566,11 @@ def get_mean_reldiff(failcase, x, ref, atol, rtol):

dummy_attention_mask[:] = 1
if padding_side == "left":
dummy_attention_mask[-1, :-1] = 1
dummy_attention_mask[-1, -4:] = 0
dummy_attention_mask[-1, :2] = 0
dummy_attention_mask[-1, 2:] = 1
elif padding_side == "right":
dummy_attention_mask[-1, 1:] = 1
dummy_attention_mask[-1, :3] = 0
dummy_attention_mask[-1, -2:] = 0
dummy_attention_mask[-1, :-2] = 1

for enable_kernels in [False, True]:
failcase = f"padding_side={padding_side}, use_mask={use_mask}, batch_size={batch_size}, enable_kernels={enable_kernels}"
Expand Down Expand Up @@ -655,52 +657,32 @@ def get_mean_reldiff(failcase, x, ref, atol, rtol):

# Masked tokens output slightly deviates - we don't mind that.
if use_mask:
_logits_sdpa = torch.zeros_like(input=logits_sdpa)
_logits_eager = torch.zeros_like(input=logits_eager)

_logits_sdpa[:-1] = logits_sdpa[:-1]
_logits_eager[:-1] = logits_eager[:-1]

if padding_side == "left":
sub_sdpa = logits_sdpa[:-1]
sub_eager = logits_eager[:-1]
if not torch.allclose(sub_sdpa, sub_eager, atol=atol, rtol=rtol):
fail_cases.append(
get_mean_reldiff(failcase, sub_sdpa, sub_eager, atol, rtol)
)

sub_sdpa = logits_sdpa[-1, :-4]
sub_eager = logits_eager[-1, :-4]
if not torch.allclose(sub_sdpa, sub_eager, atol=atol, rtol=rtol):
fail_cases.append(
get_mean_reldiff(failcase, sub_sdpa, sub_eager, atol, rtol)
)

# Testing the padding tokens is not really meaningful but anyway
# sub_sdpa = logits_sdpa[-1, -4:]
# sub_eager = logits_eager[-1, -4:]
# if not torch.allclose(sub_sdpa, sub_eager, atol=atol, rtol=rtol):
# fail_cases.append(get_mean_reldiff(failcase, sub_sdpa, sub_eager, 4e-2, 4e-2))
elif padding_side == "right":
sub_sdpa = logits_sdpa[:-1]
sub_eager = logits_eager[:-1]
if not torch.allclose(sub_sdpa, sub_eager, atol=atol, rtol=rtol):
fail_cases.append(
get_mean_reldiff(failcase, sub_sdpa, sub_eager, atol, rtol)
)

sub_sdpa = logits_sdpa[-1, 3:]
sub_eager = logits_eager[-1, 3:]
if not torch.allclose(sub_sdpa, sub_eager, atol=atol, rtol=rtol):
fail_cases.append(
get_mean_reldiff(failcase, sub_sdpa, sub_eager, atol, rtol)
)

# Testing the padding tokens is not really meaningful but anyway
# sub_sdpa = logits_sdpa[-1, :3]
# sub_eager = logits_eager[-1, :3]
# if not torch.allclose(sub_sdpa, sub_eager, atol=atol, rtol=rtol):
# fail_cases.append(get_mean_reldiff(failcase, sub_sdpa, sub_eager, 4e-2, 4e-2))
_logits_sdpa[-1:, 2:] = logits_sdpa[-1:, 2:]
_logits_eager[-1:, 2:] = logits_eager[-1:, 2:]

else:
if not torch.allclose(logits_sdpa, logits_eager, atol=atol, rtol=rtol):
fail_cases.append(
get_mean_reldiff(failcase, logits_sdpa, logits_eager, atol, rtol)
)
elif padding_side == "right":
_logits_sdpa[-1:, 2:] = logits_sdpa[-1:, :-2]
_logits_eager[-1:, 2:] = logits_eager[-1:, :-2]

logits_sdpa = _logits_sdpa
logits_eager = _logits_eager

results = [
torch.allclose(_logits_sdpa, _logits_eager, atol=atol, rtol=rtol)
for (_logits_sdpa, _logits_eager) in zip(logits_sdpa, logits_eager)
]
# If 80% batch elements have matched results, it's fine
if np.mean(results) < 0.8:
fail_cases.append(
get_mean_reldiff(failcase, logits_sdpa, logits_eager, atol, rtol)
)

self.assertTrue(len(fail_cases) == 0, "\n".join(fail_cases))

Expand Down
7 changes: 0 additions & 7 deletions tests/models/mllama/test_modeling_mllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,13 +353,6 @@ def _check_attentions_for_generate(

self.assertListEqual([layer_attention.shape for layer_attention in iter_attentions], expected_shapes)

@require_torch_sdpa
@slow
@is_flaky()
def test_eager_matches_sdpa_inference_1_bfloat16(self):
# A workaround to override parametrized test with flaky decorator
super().test_eager_matches_sdpa_inference_1_bfloat16()

@unittest.skip("For some unknown reasons the tests fails in CrossAttention layer when doing torch.sdpa(). ")
def test_sdpa_can_compile_dynamic(self):
pass
Expand Down
Loading

0 comments on commit 2afd90e

Please sign in to comment.