From cb961ada70d1a7d3fab520635a41c1aa161680c1 Mon Sep 17 00:00:00 2001 From: yonigozlan Date: Mon, 16 Sep 2024 20:14:30 +0000 Subject: [PATCH] fix failing tests --- .../llava_next/test_processor_llava_next.py | 144 +++++++++++++++++- .../pix2struct/test_processor_pix2struct.py | 49 +++++- 2 files changed, 185 insertions(+), 8 deletions(-) diff --git a/tests/models/llava_next/test_processor_llava_next.py b/tests/models/llava_next/test_processor_llava_next.py index e10e7fd38fba2f..06c7ebda755d42 100644 --- a/tests/models/llava_next/test_processor_llava_next.py +++ b/tests/models/llava_next/test_processor_llava_next.py @@ -17,7 +17,10 @@ import torch -from transformers.testing_utils import require_vision +from transformers.testing_utils import ( + require_torch, + require_vision, +) from transformers.utils import is_vision_available from ...test_processing_common import ProcessorTesterMixin @@ -96,3 +99,142 @@ def test_image_token_filling(self): ) image_tokens = (inputs["input_ids"] == image_token_index).sum().item() self.assertEqual(expected_image_tokens, image_tokens) + + @require_torch + @require_vision + def test_image_processor_defaults_preserved_by_image_kwargs(self): + if "image_processor" not in self.processor_class.attributes: + self.skipTest(f"image_processor attribute not present in {self.processor_class}") + image_processor = self.get_component("image_processor", crop_size=(234, 234)) + tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length") + + processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) + self.skip_processor_without_typed_kwargs(processor) + + input_str = "lower newer" + image_input = self.prepare_image_inputs() + + inputs = processor(text=input_str, images=image_input) + self.assertEqual(len(inputs["pixel_values"][0][0][0]), 234) + + @require_torch + @require_vision + def test_kwargs_overrides_default_image_processor_kwargs(self): + if "image_processor" not in self.processor_class.attributes: + self.skipTest(f"image_processor attribute not present in {self.processor_class}") + image_processor = self.get_component("image_processor", crop_size=(234, 234)) + tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length") + + processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) + self.skip_processor_without_typed_kwargs(processor) + + input_str = "lower newer" + image_input = self.prepare_image_inputs() + + inputs = processor(text=input_str, images=image_input, crop_size=[224, 224]) + self.assertEqual(len(inputs["pixel_values"][0][0][0]), 224) + + @require_torch + @require_vision + def test_structured_kwargs_nested(self): + if "image_processor" not in self.processor_class.attributes: + self.skipTest(f"image_processor attribute not present in {self.processor_class}") + image_processor = self.get_component("image_processor") + tokenizer = self.get_component("tokenizer") + + processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) + self.skip_processor_without_typed_kwargs(processor) + + input_str = "lower newer" + image_input = self.prepare_image_inputs() + + # Define the kwargs for each modality + all_kwargs = { + "common_kwargs": {"return_tensors": "pt"}, + "images_kwargs": {"crop_size": {"height": 214, "width": 214}}, + "text_kwargs": {"padding": "max_length", "max_length": 76}, + } + + inputs = processor(text=input_str, images=image_input, **all_kwargs) + self.skip_processor_without_typed_kwargs(processor) + + self.assertEqual(inputs["pixel_values"].shape[-1], 214) + + self.assertEqual(len(inputs["input_ids"][0]), 76) + + @require_torch + @require_vision + def test_structured_kwargs_nested_from_dict(self): + if "image_processor" not in self.processor_class.attributes: + self.skipTest(f"image_processor attribute not present in {self.processor_class}") + + image_processor = self.get_component("image_processor") + tokenizer = self.get_component("tokenizer") + + processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) + self.skip_processor_without_typed_kwargs(processor) + input_str = "lower newer" + image_input = self.prepare_image_inputs() + + # Define the kwargs for each modality + all_kwargs = { + "common_kwargs": {"return_tensors": "pt"}, + "images_kwargs": {"crop_size": {"height": 214, "width": 214}}, + "text_kwargs": {"padding": "max_length", "max_length": 76}, + } + + inputs = processor(text=input_str, images=image_input, **all_kwargs) + self.assertEqual(inputs["pixel_values"].shape[-1], 214) + + self.assertEqual(len(inputs["input_ids"][0]), 76) + + @require_torch + @require_vision + def test_unstructured_kwargs(self): + if "image_processor" not in self.processor_class.attributes: + self.skipTest(f"image_processor attribute not present in {self.processor_class}") + image_processor = self.get_component("image_processor") + tokenizer = self.get_component("tokenizer") + + processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) + self.skip_processor_without_typed_kwargs(processor) + + input_str = "lower newer" + image_input = self.prepare_image_inputs() + inputs = processor( + text=input_str, + images=image_input, + return_tensors="pt", + crop_size={"height": 214, "width": 214}, + padding="max_length", + max_length=76, + ) + + self.assertEqual(inputs["pixel_values"].shape[-1], 214) + self.assertEqual(len(inputs["input_ids"][0]), 76) + + @require_torch + @require_vision + def test_unstructured_kwargs_batched(self): + if "image_processor" not in self.processor_class.attributes: + self.skipTest(f"image_processor attribute not present in {self.processor_class}") + image_processor = self.get_component("image_processor") + tokenizer = self.get_component("tokenizer") + + processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) + self.skip_processor_without_typed_kwargs(processor) + + input_str = ["lower newer", "upper older longer string"] + image_input = self.prepare_image_inputs() * 2 + inputs = processor( + text=input_str, + images=image_input, + return_tensors="pt", + crop_size={"height": 214, "width": 214}, + padding="longest", + max_length=76, + ) + + self.assertEqual(inputs["pixel_values"].shape[-1], 214) + + self.assertEqual(len(inputs["input_ids"][0]), 5) diff --git a/tests/models/pix2struct/test_processor_pix2struct.py b/tests/models/pix2struct/test_processor_pix2struct.py index 7be23af9679430..a0cf25528ca7e3 100644 --- a/tests/models/pix2struct/test_processor_pix2struct.py +++ b/tests/models/pix2struct/test_processor_pix2struct.py @@ -37,7 +37,6 @@ @require_torch class Pix2StructProcessorTest(ProcessorTesterMixin, unittest.TestCase): processor_class = Pix2StructProcessor - text_data_arg_name = "decoder_input_ids" def setUp(self): self.tmpdirname = tempfile.mkdtemp() @@ -182,10 +181,27 @@ def test_model_input_names(self): # For now the processor supports only ["flattened_patches", "input_ids", "attention_mask", "decoder_attention_mask"] self.assertListEqual(list(inputs.keys()), ["input_ids", "attention_mask"]) - # Rewrite as pix2struct processor return "flattened_patches" and not "pixel_values" + @require_vision + @require_torch + def test_tokenizer_defaults_preserved_by_kwargs(self): + # Rewrite as pix2struct processor return "decoder_input_ids" and not "input_ids" + if "image_processor" not in self.processor_class.attributes: + self.skipTest(f"image_processor attribute not present in {self.processor_class}") + image_processor = self.get_component("image_processor") + tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length") + + processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) + self.skip_processor_without_typed_kwargs(processor) + input_str = "lower newer" + image_input = self.prepare_image_inputs() + + inputs = processor(text=input_str, images=image_input, return_tensors="pt") + self.assertEqual(len(inputs["decoder_input_ids"][0]), 117) + @require_torch @require_vision def test_image_processor_defaults_preserved_by_image_kwargs(self): + # Rewrite as pix2struct processor return "flattened_patches" and not "pixel_values" if "image_processor" not in self.processor_class.attributes: self.skipTest(f"image_processor attribute not present in {self.processor_class}") image_processor = self.get_component("image_processor", max_patches=1024, patch_size={"height": 8, "width": 8}) @@ -200,10 +216,29 @@ def test_image_processor_defaults_preserved_by_image_kwargs(self): inputs = processor(text=input_str, images=image_input) self.assertEqual(len(inputs["flattened_patches"][0][0]), 194) - # Rewrite as pix2struct processor return "flattened_patches" and not "pixel_values" + @require_vision + @require_torch + def test_kwargs_overrides_default_tokenizer_kwargs(self): + # Rewrite as pix2struct processor return "decoder_input_ids" and not "input_ids" + if "image_processor" not in self.processor_class.attributes: + self.skipTest(f"image_processor attribute not present in {self.processor_class}") + image_processor = self.get_component("image_processor") + tokenizer = self.get_component("tokenizer", padding="longest") + + processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) + self.skip_processor_without_typed_kwargs(processor) + input_str = "lower newer" + image_input = self.prepare_image_inputs() + + inputs = processor( + text=input_str, images=image_input, return_tensors="pt", max_length=112, padding="max_length" + ) + self.assertEqual(len(inputs["decoder_input_ids"][0]), 112) + @require_torch @require_vision def test_kwargs_overrides_default_image_processor_kwargs(self): + # Rewrite as pix2struct processor return "flattened_patches" and not "pixel_values" if "image_processor" not in self.processor_class.attributes: self.skipTest(f"image_processor attribute not present in {self.processor_class}") image_processor = self.get_component("image_processor", max_patches=4096) @@ -218,10 +253,10 @@ def test_kwargs_overrides_default_image_processor_kwargs(self): inputs = processor(text=input_str, images=image_input, max_patches=1024) self.assertEqual(len(inputs["flattened_patches"][0]), 1024) - # Rewrite as pix2struct processor return "flattened_patches" and not "pixel_values" @require_torch @require_vision def test_unstructured_kwargs(self): + # Rewrite as pix2struct processor return "decoder_input_ids" and not "input_ids" if "image_processor" not in self.processor_class.attributes: self.skipTest(f"image_processor attribute not present in {self.processor_class}") image_processor = self.get_component("image_processor") @@ -244,10 +279,10 @@ def test_unstructured_kwargs(self): self.assertEqual(inputs["flattened_patches"].shape[1], 1024) self.assertEqual(len(inputs["decoder_input_ids"][0]), 76) - # Rewrite as pix2struct processor return "flattened_patches" and not "pixel_values" @require_torch @require_vision def test_unstructured_kwargs_batched(self): + # Rewrite as pix2struct processor return "decoder_input_ids" and not "input_ids" if "image_processor" not in self.processor_class.attributes: self.skipTest(f"image_processor attribute not present in {self.processor_class}") image_processor = self.get_component("image_processor") @@ -271,10 +306,10 @@ def test_unstructured_kwargs_batched(self): self.assertEqual(len(inputs["decoder_input_ids"][0]), 5) - # Rewrite as pix2struct processor return "flattened_patches" and not "pixel_values" @require_torch @require_vision def test_structured_kwargs_nested(self): + # Rewrite as pix2struct processor return "decoder_input_ids" and not "input_ids" if "image_processor" not in self.processor_class.attributes: self.skipTest(f"image_processor attribute not present in {self.processor_class}") image_processor = self.get_component("image_processor") @@ -300,10 +335,10 @@ def test_structured_kwargs_nested(self): self.assertEqual(len(inputs["decoder_input_ids"][0]), 76) - # Rewrite as pix2struct processor return "flattened_patches" and not "pixel_values" @require_torch @require_vision def test_structured_kwargs_nested_from_dict(self): + # Rewrite as pix2struct processor return "decoder_input_ids" and not "input_ids" if "image_processor" not in self.processor_class.attributes: self.skipTest(f"image_processor attribute not present in {self.processor_class}")