Fix BC tests and cleanup

huggingface · Aug 13, 2024 · a72c3cd · a72c3cd
1 parent 76bb138
commit a72c3cd
Show file tree

Hide file tree

Showing 7 changed files with 59 additions and 50 deletions.
diff --git a/src/transformers/models/align/processing_align.py b/src/transformers/models/align/processing_align.py
@@ -118,10 +118,15 @@ def __call__(
               `None`).
             - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
         """
-        # check if images and text inputs are reversed for BC
         if text is None and images is None:
             raise ValueError("You must specify either text or images.")
-        if text is not None and not isinstance(text[0], str) or images is not None and isinstance(images[0], str):
+        # check if images and text inputs are reversed for BC
+        if (
+            text is not None
+            and not isinstance(text[0], str)
+            or images is not None
+            and (isinstance(images, str) or (isinstance(images, (list, tuple)) and isinstance(images[0], str)))
+        ):
             warnings.warn(
                 "It looks like you are passing the inputs in the wrong order. You should pass the images input first and the text input second."
                 "Images and text inputs will be swapped."

diff --git a/src/transformers/models/kosmos2/processing_kosmos2.py b/src/transformers/models/kosmos2/processing_kosmos2.py
@@ -173,14 +173,13 @@ def __call__(
         if images is None and text is None:
             raise ValueError("You have to specify either images or text.")
 
-        # Temporary fix for "paddding_side" in init_kwargs
-        _ = self.tokenizer.init_kwargs.pop("padding_side", None)
-
         output_kwargs = self._merge_kwargs(
             Kosmos2ProcessorKwargs,
             tokenizer_init_kwargs=self.tokenizer.init_kwargs,
             **kwargs,
         )
+        # Temporary fix for "paddding_side" in init_kwargs
+        _ = output_kwargs["text_kwargs"].pop("padding_side", None)
 
         bboxes = output_kwargs["images_kwargs"].pop("bboxes", None)
         num_image_tokens = output_kwargs["images_kwargs"].pop("num_image_tokens", 64)

diff --git a/src/transformers/models/llava_next/processing_llava_next.py b/src/transformers/models/llava_next/processing_llava_next.py
@@ -127,7 +127,12 @@ def __call__(
         if images is None and text is None:
             raise ValueError("You have to specify at least images or text.")
         # check if images and text inputs are reversed for BC
-        if text is not None and not isinstance(text[0], str) or images is not None and isinstance(images[0], str):
+        if (
+            text is not None
+            and not isinstance(text[0], str)
+            or images is not None
+            and (isinstance(images, str) or (isinstance(images, (list, tuple)) and isinstance(images[0], str)))
+        ):
             warnings.warn(
                 "It looks like you are passing the inputs in the wrong order. You should pass the images input first and the text input second."
                 "Images and text inputs will be swapped."

diff --git a/tests/models/fuyu/test_processing_fuyu.py b/tests/models/fuyu/test_processing_fuyu.py
@@ -13,14 +13,15 @@
 if is_vision_available():
     from PIL import Image
 
-if is_vision_available() and is_torch_available():
-    from transformers import AutoProcessor, FuyuImageProcessor, FuyuProcessor
 
 if is_torch_available():
     import torch
 
     from transformers.models.fuyu.processing_fuyu import construct_full_unpacked_stream, full_unpacked_stream_to_tensor
 
+if is_vision_available() and is_torch_available():
+    from transformers import AutoProcessor, FuyuImageProcessor, FuyuProcessor
+
 
 @require_torch
 @require_vision
@@ -177,7 +178,7 @@ def test_fuyu_processing_multiple_image_sample(self):
     @require_vision
     @require_torch
     def test_kwargs_overrides_default_tokenizer_kwargs(self):
-        # rewrite as Fuyu supports tokenizer kwargs only when image is None.
+        # Rewrite as Fuyu supports tokenizer kwargs only when image is None.
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
         image_processor = self.get_component("image_processor")
@@ -194,15 +195,18 @@ def test_kwargs_overrides_default_tokenizer_kwargs(self):
         )
         self.assertEqual(len(inputs["input_ids"][0]), 112)
 
+    @unittest.skip("Fuyu processor does not support image_processor kwargs")
     def test_image_processor_defaults_preserved_by_image_kwargs(self):
-        self.skipTest(reason="Fuyu processor does not support image_processor kwargs")
+        pass
 
+    @unittest.skip("Fuyu processor does not support image_processor kwargs")
     def test_kwargs_overrides_default_image_processor_kwargs(self):
-        self.skipTest(reason="Fuyu processor does not support image_processor kwargs")
+        pass
 
     @require_vision
     @require_torch
     def test_tokenizer_defaults_preserved_by_kwargs(self):
+        # Rewrite as Fuyu supports tokenizer kwargs only when image is None.
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
         image_processor = self.get_component("image_processor")
@@ -211,6 +215,7 @@ def test_tokenizer_defaults_preserved_by_kwargs(self):
         processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
         self.skip_processor_without_typed_kwargs(processor)
         input_str = "lower newer"
+        # Fuyu uses tokenizer kwargs only when image is None.
         image_input = None
 
         inputs = processor(text=input_str, images=image_input, return_tensors="pt")
@@ -219,7 +224,7 @@ def test_tokenizer_defaults_preserved_by_kwargs(self):
     @require_torch
     @require_vision
     def test_structured_kwargs_nested(self):
-        # rewrite as Fuyu image processor does not return pixel values
+        # Rewrite as Fuyu image processor does not return pixel values
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
         image_processor = self.get_component("image_processor")
@@ -246,7 +251,7 @@ def test_structured_kwargs_nested(self):
     @require_torch
     @require_vision
     def test_structured_kwargs_nested_from_dict(self):
-        # rewrite as Fuyu image processor does not return pixel values
+        # Rewrite as Fuyu image processor does not return pixel values
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
 
@@ -272,7 +277,7 @@ def test_structured_kwargs_nested_from_dict(self):
     @require_torch
     @require_vision
     def test_unstructured_kwargs(self):
-        # rewrite as Fuyu supports tokenizer kwargs only when image is None.
+        # Rewrite as Fuyu supports tokenizer kwargs only when image is None.
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
         image_processor = self.get_component("image_processor")
@@ -297,7 +302,7 @@ def test_unstructured_kwargs(self):
     @require_torch
     @require_vision
     def test_unstructured_kwargs_batched(self):
-        # rewrite as Fuyu supports tokenizer kwargs only when image is None.
+        # Rewrite as Fuyu supports tokenizer kwargs only when image is None.
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
         image_processor = self.get_component("image_processor")

diff --git a/tests/models/instructblip/test_processor_instructblip.py b/tests/models/instructblip/test_processor_instructblip.py
@@ -207,7 +207,7 @@ def test_model_input_names(self):
     @require_torch
     @require_vision
     def test_image_processor_defaults_preserved_by_image_kwargs(self):
-        # rewrite as instructblip needs a qformer_tokenizer
+        # Rewrite as InstructBlip needs a qformer_tokenizer
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
         image_processor = self.get_component("image_processor", size=(234, 234))
@@ -220,14 +220,14 @@ def test_image_processor_defaults_preserved_by_image_kwargs(self):
 
         input_str = "lower newer"
         image_input = self.prepare_image_inputs()
-
         inputs = processor(text=input_str, images=image_input)
+
         self.assertEqual(len(inputs["pixel_values"][0][0]), 234)
 
     @require_torch
     @require_vision
     def test_kwargs_overrides_default_image_processor_kwargs(self):
-        # rewrite as instructblip needs a qformer_tokenizer
+        # Rewrite as InstructBlip needs a qformer_tokenizer
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
         image_processor = self.get_component("image_processor", size=(234, 234))
@@ -241,14 +241,14 @@ def test_kwargs_overrides_default_image_processor_kwargs(self):
 
         input_str = "lower newer"
         image_input = self.prepare_image_inputs()
-
         inputs = processor(text=input_str, images=image_input, size=[224, 224])
+
         self.assertEqual(len(inputs["pixel_values"][0][0]), 224)
 
     @require_vision
     @require_torch
     def test_kwargs_overrides_default_tokenizer_kwargs(self):
-        # rewrite as instructblip needs a qformer_tokenizer
+        # Rewrite as InstructBlip needs a qformer_tokenizer
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
         image_processor = self.get_component("image_processor")
@@ -263,16 +263,16 @@ def test_kwargs_overrides_default_tokenizer_kwargs(self):
         self.skip_processor_without_typed_kwargs(processor)
         input_str = "lower newer"
         image_input = self.prepare_image_inputs()
-
         inputs = processor(
             text=input_str, images=image_input, return_tensors="pt", max_length=112, padding="max_length"
         )
+
         self.assertEqual(len(inputs["input_ids"][0]), 112)
 
     @require_torch
     @require_vision
     def test_structured_kwargs_nested(self):
-        # rewrite as instructblip needs a qformer_tokenizer
+        # Rewrite as InstructBlip needs a qformer_tokenizer
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
         image_processor = self.get_component("image_processor")
@@ -295,18 +295,16 @@ def test_structured_kwargs_nested(self):
             "images_kwargs": {"size": {"height": 214, "width": 214}},
             "text_kwargs": {"padding": "max_length", "max_length": 76},
         }
-
         inputs = processor(text=input_str, images=image_input, **all_kwargs)
         self.skip_processor_without_typed_kwargs(processor)
 
         self.assertEqual(inputs["pixel_values"].shape[2], 214)
-
         self.assertEqual(len(inputs["input_ids"][0]), 76)
 
     @require_torch
     @require_vision
     def test_structured_kwargs_nested_from_dict(self):
-        # rewrite as instructblip needs a qformer_tokenizer
+        # Rewrite as InstructBlip needs a qformer_tokenizer
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
 
@@ -329,16 +327,15 @@ def test_structured_kwargs_nested_from_dict(self):
             "images_kwargs": {"size": {"height": 214, "width": 214}},
             "text_kwargs": {"padding": "max_length", "max_length": 76},
         }
-
         inputs = processor(text=input_str, images=image_input, **all_kwargs)
-        self.assertEqual(inputs["pixel_values"].shape[2], 214)
 
+        self.assertEqual(inputs["pixel_values"].shape[2], 214)
         self.assertEqual(len(inputs["input_ids"][0]), 76)
 
     @require_vision
     @require_torch
     def test_tokenizer_defaults_preserved_by_kwargs(self):
-        # rewrite as instructblip needs a qformer_tokenizer
+        # Rewrite as InstructBlip needs a qformer_tokenizer
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
         image_processor = self.get_component("image_processor")
@@ -353,14 +350,14 @@ def test_tokenizer_defaults_preserved_by_kwargs(self):
         self.skip_processor_without_typed_kwargs(processor)
         input_str = "lower newer"
         image_input = self.prepare_image_inputs()
-
         inputs = processor(text=input_str, images=image_input, return_tensors="pt")
+
         self.assertEqual(len(inputs["input_ids"][0]), 117)
 
     @require_torch
     @require_vision
     def test_unstructured_kwargs(self):
-        # rewrite as instructblip needs a qformer_tokenizer
+        # Rewrite as InstructBlip needs a qformer_tokenizer
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
         image_processor = self.get_component("image_processor")
@@ -391,7 +388,7 @@ def test_unstructured_kwargs(self):
     @require_torch
     @require_vision
     def test_unstructured_kwargs_batched(self):
-        # rewrite as instructblip needs a qformer_tokenizer
+        # Rewrite as InstructBlip needs a qformer_tokenizer
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
         image_processor = self.get_component("image_processor")
@@ -417,5 +414,4 @@ def test_unstructured_kwargs_batched(self):
         )
 
         self.assertEqual(inputs["pixel_values"].shape[2], 214)
-
         self.assertEqual(len(inputs["input_ids"][0]), 11)
diff --git a/tests/models/kosmos2/test_processor_kosmos2.py b/tests/models/kosmos2/test_processor_kosmos2.py
@@ -60,7 +60,7 @@ class Kosmos2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
     def setUp(self):
         self.tmpdirname = tempfile.mkdtemp()
 
-        image_processor = CLIPImageProcessor()
+        image_processor = CLIPImageProcessor(do_center_crop=False)
 
         # We have a SentencePiece fixture for testing
         slow_tokenizer = XLMRobertaTokenizer(SAMPLE_VOCAB)
@@ -487,7 +487,7 @@ def check(texts, bboxes, expected_input_ids):
     @require_vision
     @require_torch
     def test_kwargs_overrides_default_tokenizer_kwargs(self):
-        # rewrite as Kosmos-2 supports custom padding only when image is None.
+        # Rewrite as Kosmos-2 supports custom padding only when image is None.
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
         image_processor = self.get_component("image_processor")
@@ -506,12 +506,13 @@ def test_kwargs_overrides_default_tokenizer_kwargs(self):
             max_length=112,
             padding="max_length",
         )
+
         self.assertEqual(len(inputs["input_ids"][0]), 112)
 
     @require_torch
     @require_vision
     def test_structured_kwargs_nested(self):
-        # rewrite to test only image_processor kwargs
+        # Rewrite to test only image_processor kwargs
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
         image_processor = self.get_component("image_processor")
@@ -526,7 +527,7 @@ def test_structured_kwargs_nested(self):
         # Define the kwargs for each modality
         all_kwargs = {
             "common_kwargs": {"return_tensors": "pt"},
-            "images_kwargs": {"crop_size": {"height": 214, "width": 214}},
+            "images_kwargs": {"size": {"height": 214, "width": 214}},
         }
 
         inputs = processor(text=input_str, images=image_input, **all_kwargs)
@@ -537,7 +538,7 @@ def test_structured_kwargs_nested(self):
     @require_torch
     @require_vision
     def test_structured_kwargs_nested_from_dict(self):
-        # rewrite to test only image_processor kwargs
+        # Rewrite to test only image_processor kwargs
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
 
@@ -552,7 +553,7 @@ def test_structured_kwargs_nested_from_dict(self):
         # Define the kwargs for each modality
         all_kwargs = {
             "common_kwargs": {"return_tensors": "pt"},
-            "images_kwargs": {"crop_size": {"height": 214, "width": 214}},
+            "images_kwargs": {"size": {"height": 214, "width": 214}},
         }
 
         inputs = processor(text=input_str, images=image_input, **all_kwargs)
@@ -561,7 +562,7 @@ def test_structured_kwargs_nested_from_dict(self):
     @require_vision
     @require_torch
     def test_tokenizer_defaults_preserved_by_kwargs(self):
-        # rewrite as Kosmos-2 supports custom padding only when image is None.
+        # Rewrite as Kosmos-2 supports custom padding only when image is None.
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
         image_processor = self.get_component("image_processor")
@@ -579,7 +580,7 @@ def test_tokenizer_defaults_preserved_by_kwargs(self):
     @require_torch
     @require_vision
     def test_unstructured_kwargs(self):
-        # rewrite as Kosmos-2 supports custom padding only when image is None.
+        # Rewrite as Kosmos-2 supports custom padding only when image is None.
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
         image_processor = self.get_component("image_processor")
@@ -604,7 +605,7 @@ def test_unstructured_kwargs(self):
     @require_torch
     @require_vision
     def test_unstructured_kwargs_batched(self):
-        # rewrite as Kosmos-2 supports custom padding only when image is None.
+        # Rewrite as Kosmos-2 supports custom padding only when image is None.
         if "image_processor" not in self.processor_class.attributes:
             self.skipTest(f"image_processor attribute not present in {self.processor_class}")
         image_processor = self.get_component("image_processor")
@@ -620,11 +621,9 @@ def test_unstructured_kwargs_batched(self):
             text=input_str,
             images=image_input,
             return_tensors="pt",
-            crop_size={"height": 214, "width": 214},
+            size={"height": 214, "width": 214},
             padding="longest",
             max_length=76,
         )
 
-        # self.assertEqual(inputs["pixel_values"].shape[2], 214)
-
         self.assertEqual(len(inputs["input_ids"][0]), 10)