Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify ProcessorTesterMixin for better generalization #32637

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions tests/models/align/test_processor_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,6 @@ def setUp(self):
image_processor_map = {
"do_resize": True,
"size": 20,
"do_center_crop": True,
"crop_size": 18,
"do_normalize": True,
"image_mean": [0.48145466, 0.4578275, 0.40821073],
"image_std": [0.26862954, 0.26130258, 0.27577711],
Expand Down
42 changes: 28 additions & 14 deletions tests/test_processing_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,9 @@ def test_tokenizer_defaults_preserved_by_kwargs(self):
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
tokenizer = self.get_component("tokenizer", max_length=117)
tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length")
if not tokenizer.pad_token:
tokenizer.pad_token = "[TEST_PAD]"

processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor)
Expand All @@ -141,8 +143,10 @@ def test_tokenizer_defaults_preserved_by_kwargs(self):
def test_image_processor_defaults_preserved_by_image_kwargs(self):
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor", crop_size=(234, 234))
tokenizer = self.get_component("tokenizer", max_length=117)
image_processor = self.get_component("image_processor", size=(234, 234))
tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length")
if not tokenizer.pad_token:
tokenizer.pad_token = "[TEST_PAD]"

processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor)
Expand All @@ -159,31 +163,37 @@ def test_kwargs_overrides_default_tokenizer_kwargs(self):
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
tokenizer = self.get_component("tokenizer", max_length=117)
tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length")
if not tokenizer.pad_token:
tokenizer.pad_token = "[TEST_PAD]"

processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor)
input_str = "lower newer"
image_input = self.prepare_image_inputs()

inputs = processor(text=input_str, images=image_input, return_tensors="pt", max_length=112)
inputs = processor(
text=input_str, images=image_input, return_tensors="pt", max_length=112, padding="max_length"
)
Comment on lines +171 to +173
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: a bit redundant to indicate max-length when init tokenizer and when calling. IMO we can remove it from init because we're testing kwargs at call

Or we can even explicitly init with a padding=longest and test if kwargs overrides defaults, as the test name suggests

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 on removing from the init.

Checking the kwargs overrides is also a good idea - we should add a new test to isolate and check this behaviour

self.assertEqual(len(inputs["input_ids"][0]), 112)

@require_torch
@require_vision
def test_kwargs_overrides_default_image_processor_kwargs(self):
if "image_processor" not in self.processor_class.attributes:
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor", crop_size=(234, 234))
tokenizer = self.get_component("tokenizer", max_length=117)
image_processor = self.get_component("image_processor", size=(234, 234))
tokenizer = self.get_component("tokenizer", max_length=117, padding="max_length")
if not tokenizer.pad_token:
tokenizer.pad_token = "[TEST_PAD]"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see this recurring in all tests, maybe we should add pad token when saving dummy tokenizer, so it's loaded with a pad id

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1


processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor)

input_str = "lower newer"
image_input = self.prepare_image_inputs()

inputs = processor(text=input_str, images=image_input, crop_size=[224, 224])
inputs = processor(text=input_str, images=image_input, size=[224, 224])
self.assertEqual(len(inputs["pixel_values"][0][0]), 224)

@require_torch
Expand All @@ -193,6 +203,8 @@ def test_unstructured_kwargs(self):
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
tokenizer = self.get_component("tokenizer")
if not tokenizer.pad_token:
tokenizer.pad_token = "[TEST_PAD]"

processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor)
Expand All @@ -203,7 +215,7 @@ def test_unstructured_kwargs(self):
text=input_str,
images=image_input,
return_tensors="pt",
crop_size={"height": 214, "width": 214},
size={"height": 214, "width": 214},
padding="max_length",
max_length=76,
)
Expand All @@ -218,6 +230,8 @@ def test_unstructured_kwargs_batched(self):
self.skipTest(f"image_processor attribute not present in {self.processor_class}")
image_processor = self.get_component("image_processor")
tokenizer = self.get_component("tokenizer")
if not tokenizer.pad_token:
tokenizer.pad_token = "[TEST_PAD]"

processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)
self.skip_processor_without_typed_kwargs(processor)
Expand All @@ -228,7 +242,7 @@ def test_unstructured_kwargs_batched(self):
text=input_str,
images=image_input,
return_tensors="pt",
crop_size={"height": 214, "width": 214},
size={"height": 214, "width": 214},
padding="longest",
max_length=76,
)
Expand All @@ -254,8 +268,8 @@ def test_doubly_passed_kwargs(self):
_ = processor(
text=input_str,
images=image_input,
images_kwargs={"crop_size": {"height": 222, "width": 222}},
crop_size={"height": 214, "width": 214},
images_kwargs={"size": {"height": 222, "width": 222}},
size={"height": 214, "width": 214},
)

@require_torch
Expand All @@ -275,7 +289,7 @@ def test_structured_kwargs_nested(self):
# Define the kwargs for each modality
all_kwargs = {
"common_kwargs": {"return_tensors": "pt"},
"images_kwargs": {"crop_size": {"height": 214, "width": 214}},
"images_kwargs": {"size": {"height": 214, "width": 214}},
"text_kwargs": {"padding": "max_length", "max_length": 76},
}

Expand Down Expand Up @@ -303,7 +317,7 @@ def test_structured_kwargs_nested_from_dict(self):
# Define the kwargs for each modality
all_kwargs = {
"common_kwargs": {"return_tensors": "pt"},
"images_kwargs": {"crop_size": {"height": 214, "width": 214}},
"images_kwargs": {"size": {"height": 214, "width": 214}},
"text_kwargs": {"padding": "max_length", "max_length": 76},
}

Expand Down
Loading