Skip to content

Commit

Permalink
Fix BC tests and cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
yonigozlan committed Aug 13, 2024
1 parent 76bb138 commit a72c3cd
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 50 deletions.
9 changes: 7 additions & 2 deletions src/transformers/models/align/processing_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,15 @@ def __call__(
`None`).
- **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
"""
# check if images and text inputs are reversed for BC
if text is None and images is None:
raise ValueError("You must specify either text or images.")
if text is not None and not isinstance(text[0], str) or images is not None and isinstance(images[0], str):
# check if images and text inputs are reversed for BC
if (
text is not None
and not isinstance(text[0], str)
or images is not None
and (isinstance(images, str) or (isinstance(images, (list, tuple)) and isinstance(images[0], str)))
):
warnings.warn(
"It looks like you are passing the inputs in the wrong order. You should pass the images input first and the text input second."
"Images and text inputs will be swapped."
Expand Down
5 changes: 2 additions & 3 deletions src/transformers/models/kosmos2/processing_kosmos2.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,14 +173,13 @@ def __call__(
if images is None and text is None:
raise ValueError("You have to specify either images or text.")

# Temporary fix for "paddding_side" in init_kwargs
_ = self.tokenizer.init_kwargs.pop("padding_side", None)

output_kwargs = self._merge_kwargs(
Kosmos2ProcessorKwargs,
tokenizer_init_kwargs=self.tokenizer.init_kwargs,
**kwargs,
)
# Temporary fix for "paddding_side" in init_kwargs
_ = output_kwargs["text_kwargs"].pop("padding_side", None)

bboxes = output_kwargs["images_kwargs"].pop("bboxes", None)
num_image_tokens = output_kwargs["images_kwargs"].pop("num_image_tokens", 64)
Expand Down
7 changes: 6 additions & 1 deletion src/transformers/models/llava_next/processing_llava_next.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,12 @@ def __call__(
if images is None and text is None:
raise ValueError("You have to specify at least images or text.")
# check if images and text inputs are reversed for BC
if text is not None and not isinstance(text[0], str) or images is not None and isinstance(images[0], str):
if (
text is not None
and not isinstance(text[0], str)
or images is not None
and (isinstance(images, str) or (isinstance(images, (list, tuple)) and isinstance(images[0], str)))
):
warnings.warn(
"It looks like you are passing the inputs in the wrong order. You should pass the images input first and the text input second."
"Images and text inputs will be swapped."
Expand Down
23 changes: 14 additions & 9 deletions tests/models/fuyu/test_processing_fuyu.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,15 @@
if is_vision_available():
from PIL import Image

if is_vision_available() and is_torch_available():
from transformers import AutoProcessor, FuyuImageProcessor, FuyuProcessor

if is_torch_available():
import torch

from transformers.models.fuyu.processing_fuyu import construct_full_unpacked_stream, full_unpacked_stream_to_tensor

if is_vision_available() and is_torch_available():
from transformers import AutoProcessor, FuyuImageProcessor, FuyuProcessor


@require_torch
@require_vision
Expand Down Expand Up @@ -177,7 +178,7 @@ def test_fuyu_processing_multiple_image_sample(self):
@require_vision
@require_torch
def test_kwargs_overrides_default_tokenizer_kwargs(self):
# rewrite as Fuyu supports tokenizer kwargs only when image is None.
# Rewrite as Fuyu supports tokenizer kwargs only when image is None.
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
Expand All @@ -194,15 +195,18 @@ def test_kwargs_overrides_default_tokenizer_kwargs(self):
)
self.assertEqual(len(inputs["input_ids"][0]), 112)

@unittest.skip("Fuyu processor does not support image_processor kwargs")
def test_image_processor_defaults_preserved_by_image_kwargs(self):
self.skipTest(reason="Fuyu processor does not support image_processor kwargs")
pass

@unittest.skip("Fuyu processor does not support image_processor kwargs")
def test_kwargs_overrides_default_image_processor_kwargs(self):
self.skipTest(reason="Fuyu processor does not support image_processor kwargs")
pass

@require_vision
@require_torch
def test_tokenizer_defaults_preserved_by_kwargs(self):
# Rewrite as Fuyu supports tokenizer kwargs only when image is None.
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
Expand All @@ -211,6 +215,7 @@ def test_tokenizer_defaults_preserved_by_kwargs(self):
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer"
# Fuyu uses tokenizer kwargs only when image is None.
image_input = None

inputs = processor(text=input_str, images=image_input, return_tensors="pt")
Expand All @@ -219,7 +224,7 @@ def test_tokenizer_defaults_preserved_by_kwargs(self):
@require_torch
@require_vision
def test_structured_kwargs_nested(self):
# rewrite as Fuyu image processor does not return pixel values
# Rewrite as Fuyu image processor does not return pixel values
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
Expand All @@ -246,7 +251,7 @@ def test_structured_kwargs_nested(self):
@require_torch
@require_vision
def test_structured_kwargs_nested_from_dict(self):
# rewrite as Fuyu image processor does not return pixel values
# Rewrite as Fuyu image processor does not return pixel values
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")

Expand All @@ -272,7 +277,7 @@ def test_structured_kwargs_nested_from_dict(self):
@require_torch
@require_vision
def test_unstructured_kwargs(self):
# rewrite as Fuyu supports tokenizer kwargs only when image is None.
# Rewrite as Fuyu supports tokenizer kwargs only when image is None.
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
Expand All @@ -297,7 +302,7 @@ def test_unstructured_kwargs(self):
@require_torch
@require_vision
def test_unstructured_kwargs_batched(self):
# rewrite as Fuyu supports tokenizer kwargs only when image is None.
# Rewrite as Fuyu supports tokenizer kwargs only when image is None.
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
Expand Down
30 changes: 13 additions & 17 deletions tests/models/instructblip/test_processor_instructblip.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def test_model_input_names(self):
@require_torch
@require_vision
def test_image_processor_defaults_preserved_by_image_kwargs(self):
# rewrite as instructblip needs a qformer_tokenizer
# Rewrite as InstructBlip needs a qformer_tokenizer
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor", size=(234, 234))
Expand All @@ -220,14 +220,14 @@ def test_image_processor_defaults_preserved_by_image_kwargs(self):

input_str = "lower newer"
image_input = self.prepare_image_inputs()

inputs = processor(text=input_str, images=image_input)

self.assertEqual(len(inputs["pixel_values"][0][0]), 234)

@require_torch
@require_vision
def test_kwargs_overrides_default_image_processor_kwargs(self):
# rewrite as instructblip needs a qformer_tokenizer
# Rewrite as InstructBlip needs a qformer_tokenizer
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor", size=(234, 234))
Expand All @@ -241,14 +241,14 @@ def test_kwargs_overrides_default_image_processor_kwargs(self):

input_str = "lower newer"
image_input = self.prepare_image_inputs()

inputs = processor(text=input_str, images=image_input, size=[224, 224])

self.assertEqual(len(inputs["pixel_values"][0][0]), 224)

@require_vision
@require_torch
def test_kwargs_overrides_default_tokenizer_kwargs(self):
# rewrite as instructblip needs a qformer_tokenizer
# Rewrite as InstructBlip needs a qformer_tokenizer
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
Expand All @@ -263,16 +263,16 @@ def test_kwargs_overrides_default_tokenizer_kwargs(self):
self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer"
image_input = self.prepare_image_inputs()

inputs = processor(
text=input_str, images=image_input, return_tensors="pt", max_length=112, padding="max_length"
)

self.assertEqual(len(inputs["input_ids"][0]), 112)

@require_torch
@require_vision
def test_structured_kwargs_nested(self):
# rewrite as instructblip needs a qformer_tokenizer
# Rewrite as InstructBlip needs a qformer_tokenizer
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
Expand All @@ -295,18 +295,16 @@ def test_structured_kwargs_nested(self):
"images_kwargs": {"size": {"height": 214, "width": 214}},
"text_kwargs": {"padding": "max_length", "max_length": 76},
}

inputs = processor(text=input_str, images=image_input, **all_kwargs)
self.skip_processor_without_typed_kwargs(processor)

self.assertEqual(inputs["pixel_values"].shape[2], 214)

self.assertEqual(len(inputs["input_ids"][0]), 76)

@require_torch
@require_vision
def test_structured_kwargs_nested_from_dict(self):
# rewrite as instructblip needs a qformer_tokenizer
# Rewrite as InstructBlip needs a qformer_tokenizer
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")

Expand All @@ -329,16 +327,15 @@ def test_structured_kwargs_nested_from_dict(self):
"images_kwargs": {"size": {"height": 214, "width": 214}},
"text_kwargs": {"padding": "max_length", "max_length": 76},
}

inputs = processor(text=input_str, images=image_input, **all_kwargs)
self.assertEqual(inputs["pixel_values"].shape[2], 214)

self.assertEqual(inputs["pixel_values"].shape[2], 214)
self.assertEqual(len(inputs["input_ids"][0]), 76)

@require_vision
@require_torch
def test_tokenizer_defaults_preserved_by_kwargs(self):
# rewrite as instructblip needs a qformer_tokenizer
# Rewrite as InstructBlip needs a qformer_tokenizer
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
Expand All @@ -353,14 +350,14 @@ def test_tokenizer_defaults_preserved_by_kwargs(self):
self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer"
image_input = self.prepare_image_inputs()

inputs = processor(text=input_str, images=image_input, return_tensors="pt")

self.assertEqual(len(inputs["input_ids"][0]), 117)

@require_torch
@require_vision
def test_unstructured_kwargs(self):
# rewrite as instructblip needs a qformer_tokenizer
# Rewrite as InstructBlip needs a qformer_tokenizer
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
Expand Down Expand Up @@ -391,7 +388,7 @@ def test_unstructured_kwargs(self):
@require_torch
@require_vision
def test_unstructured_kwargs_batched(self):
# rewrite as instructblip needs a qformer_tokenizer
# Rewrite as InstructBlip needs a qformer_tokenizer
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
Expand All @@ -417,5 +414,4 @@ def test_unstructured_kwargs_batched(self):
)

self.assertEqual(inputs["pixel_values"].shape[2], 214)

self.assertEqual(len(inputs["input_ids"][0]), 11)
23 changes: 11 additions & 12 deletions tests/models/kosmos2/test_processor_kosmos2.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class Kosmos2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def setUp(self):
self.tmpdirname = tempfile.mkdtemp()

image_processor = CLIPImageProcessor()
image_processor = CLIPImageProcessor(do_center_crop=False)

# We have a SentencePiece fixture for testing
slow_tokenizer = XLMRobertaTokenizer(SAMPLE_VOCAB)
Expand Down Expand Up @@ -487,7 +487,7 @@ def check(texts, bboxes, expected_input_ids):
@require_vision
@require_torch
def test_kwargs_overrides_default_tokenizer_kwargs(self):
# rewrite as Kosmos-2 supports custom padding only when image is None.
# Rewrite as Kosmos-2 supports custom padding only when image is None.
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
Expand All @@ -506,12 +506,13 @@ def test_kwargs_overrides_default_tokenizer_kwargs(self):
max_length=112,
padding="max_length",
)

self.assertEqual(len(inputs["input_ids"][0]), 112)

@require_torch
@require_vision
def test_structured_kwargs_nested(self):
# rewrite to test only image_processor kwargs
# Rewrite to test only image_processor kwargs
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
Expand All @@ -526,7 +527,7 @@ def test_structured_kwargs_nested(self):
# Define the kwargs for each modality
all_kwargs = {
"common_kwargs": {"return_tensors": "pt"},
"images_kwargs": {"crop_size": {"height": 214, "width": 214}},
"images_kwargs": {"size": {"height": 214, "width": 214}},
}

inputs = processor(text=input_str, images=image_input, **all_kwargs)
Expand All @@ -537,7 +538,7 @@ def test_structured_kwargs_nested(self):
@require_torch
@require_vision
def test_structured_kwargs_nested_from_dict(self):
# rewrite to test only image_processor kwargs
# Rewrite to test only image_processor kwargs
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")

Expand All @@ -552,7 +553,7 @@ def test_structured_kwargs_nested_from_dict(self):
# Define the kwargs for each modality
all_kwargs = {
"common_kwargs": {"return_tensors": "pt"},
"images_kwargs": {"crop_size": {"height": 214, "width": 214}},
"images_kwargs": {"size": {"height": 214, "width": 214}},
}

inputs = processor(text=input_str, images=image_input, **all_kwargs)
Expand All @@ -561,7 +562,7 @@ def test_structured_kwargs_nested_from_dict(self):
@require_vision
@require_torch
def test_tokenizer_defaults_preserved_by_kwargs(self):
# rewrite as Kosmos-2 supports custom padding only when image is None.
# Rewrite as Kosmos-2 supports custom padding only when image is None.
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
Expand All @@ -579,7 +580,7 @@ def test_tokenizer_defaults_preserved_by_kwargs(self):
@require_torch
@require_vision
def test_unstructured_kwargs(self):
# rewrite as Kosmos-2 supports custom padding only when image is None.
# Rewrite as Kosmos-2 supports custom padding only when image is None.
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
Expand All @@ -604,7 +605,7 @@ def test_unstructured_kwargs(self):
@require_torch
@require_vision
def test_unstructured_kwargs_batched(self):
# rewrite as Kosmos-2 supports custom padding only when image is None.
# Rewrite as Kosmos-2 supports custom padding only when image is None.
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
Expand All @@ -620,11 +621,9 @@ def test_unstructured_kwargs_batched(self):
text=input_str,
images=image_input,
return_tensors="pt",
crop_size={"height": 214, "width": 214},
size={"height": 214, "width": 214},
padding="longest",
max_length=76,
)

# self.assertEqual(inputs["pixel_values"].shape[2], 214)

self.assertEqual(len(inputs["input_ids"][0]), 10)
Loading

0 comments on commit a72c3cd

Please sign in to comment.