From e71711d60984c5621b33c41e50eea9795bd0b2df Mon Sep 17 00:00:00 2001 From: Andres Marafioti Date: Thu, 15 Aug 2024 10:10:18 +0000 Subject: [PATCH] fixes tests --- .../models/idefics3/processing_idefics3.py | 5 ++--- tests/models/idefics3/test_modeling_idefics3.py | 4 ++-- tests/models/idefics3/test_processing_idefics3.py | 10 +++++----- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/transformers/models/idefics3/processing_idefics3.py b/src/transformers/models/idefics3/processing_idefics3.py index 2472a4543601ae..eb812a7f7a1664 100644 --- a/src/transformers/models/idefics3/processing_idefics3.py +++ b/src/transformers/models/idefics3/processing_idefics3.py @@ -16,9 +16,9 @@ Processor class for Idefics3. """ +import re import sys from typing import TYPE_CHECKING, List, Optional, Union -import re from ...feature_extraction_utils import BatchFeature from ...image_utils import ImageInput, is_valid_image, load_image @@ -147,7 +147,7 @@ def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 169, ch self.global_img_token = "" self.image_seq_len = image_seq_len - self._regex_to_remove_extra_special_tokens = re.compile(r'(\n?\n?|\n?)+') + self._regex_to_remove_extra_special_tokens = re.compile(r"(\n?\n?|\n?)+") tokens_to_add = { "additional_special_tokens": [ @@ -356,7 +356,6 @@ def decode(self, *args, **kwargs): decode_output = self.tokenizer.decode(*args, **kwargs) return self._regex_to_remove_extra_special_tokens.sub("", decode_output) - @property def model_input_names(self): tokenizer_input_names = self.tokenizer.model_input_names diff --git a/tests/models/idefics3/test_modeling_idefics3.py b/tests/models/idefics3/test_modeling_idefics3.py index 43c01ba2f0aa30..947434b3f12637 100644 --- a/tests/models/idefics3/test_modeling_idefics3.py +++ b/tests/models/idefics3/test_modeling_idefics3.py @@ -501,7 +501,7 @@ def test_integration_test(self): generated_texts = self.processor.batch_decode(generated_ids, skip_special_tokens=True) # Batch affects generated text. Single batch output: ['In this image, we see the Statue of Liberty in the foreground and'] - expected_generated_text = "In this image, we see the Statue of Liberty, the New York City" + expected_generated_text = "In this image, we see the Statue of Liberty, which is located on Liberty" self.assertEqual(generated_texts[0], expected_generated_text) @slow @@ -520,5 +520,5 @@ def test_integration_test_4bit(self): generated_ids = model.generate(**inputs, max_new_tokens=10) generated_texts = self.processor.batch_decode(generated_ids, skip_special_tokens=True) - expected_generated_text = "In this image, we see the Statue of Liberty, the Hudson River," + expected_generated_text = "In this image, we see the Statue of Liberty, which is located on Liberty" self.assertEqual(generated_texts[0], expected_generated_text) diff --git a/tests/models/idefics3/test_processing_idefics3.py b/tests/models/idefics3/test_processing_idefics3.py index d50d2d4eb91ef8..88b7704d5a8f75 100644 --- a/tests/models/idefics3/test_processing_idefics3.py +++ b/tests/models/idefics3/test_processing_idefics3.py @@ -67,7 +67,7 @@ def setUp(self): self.bos_token_id = processor.tokenizer.convert_tokens_to_ids(self.bos_token) self.image_token_id = processor.tokenizer.convert_tokens_to_ids(self.image_token) self.fake_image_token_id = processor.tokenizer.convert_tokens_to_ids(self.fake_image_token) - self.global_img_token_id = processor.global_img_token_id + self.global_img_tokens_id = processor.tokenizer(self.global_img_token, add_special_tokens=False)["input_ids"] self.padding_token_id = processor.tokenizer.pad_token_id self.image_seq_len = processor.image_seq_len @@ -96,7 +96,7 @@ def get_splitted_image_expected_tokens(self, processor, image_rows, image_cols): ] # add double newline, as it gets its own token text_split_images += ( [self.fake_image_token_id] - + [self.global_img_token_id] + + self.global_img_tokens_id + [self.image_token_id] * self.image_seq_len + [self.fake_image_token_id] ) @@ -124,7 +124,7 @@ def test_process_interleaved_images_prompts_no_image_splitting(self): # fmt: off tokenized_sentence = processor.tokenizer(text_str, add_special_tokens=False) - expected_input_ids = [[self.bos_token_id] + [self.fake_image_token_id] + [self.global_img_token_id] + [self.image_token_id] * self.image_seq_len + [self.fake_image_token_id] + tokenized_sentence["input_ids"]] + expected_input_ids = [[self.bos_token_id] + [self.fake_image_token_id] + self.global_img_tokens_id + [self.image_token_id] * self.image_seq_len + [self.fake_image_token_id] + tokenized_sentence["input_ids"]] self.assertEqual(inputs["input_ids"], expected_input_ids) self.assertEqual(inputs["attention_mask"], [[1] * len(expected_input_ids[0])]) self.assertEqual(inputs["pixel_values"].shape, (1, 1, 3, 1092, 1456)) @@ -147,7 +147,7 @@ def test_process_interleaved_images_prompts_no_image_splitting(self): # fmt: off tokenized_sentence_1 = processor.tokenizer(text_str_1, add_special_tokens=False) tokenized_sentence_2 = processor.tokenizer(text_str_2, add_special_tokens=False) - image_tokens = [self.fake_image_token_id] + [self.global_img_token_id] + [self.image_token_id] * self.image_seq_len + [self.fake_image_token_id] + image_tokens = [self.fake_image_token_id] + self.global_img_tokens_id + [self.image_token_id] * self.image_seq_len + [self.fake_image_token_id] expected_input_ids_1 = [self.bos_token_id] + image_tokens + tokenized_sentence_1["input_ids"] expected_input_ids_2 = [self.bos_token_id] + 2 * image_tokens + tokenized_sentence_2["input_ids"] # Pad the first input to match the second input @@ -436,7 +436,7 @@ def test_unstructured_kwargs_batched(self): self.assertEqual(inputs["pixel_values"].shape[2], 3) self.assertEqual(inputs["pixel_values"].shape[3], 214) - self.assertEqual(len(inputs["input_ids"][0]), 88) + self.assertEqual(len(inputs["input_ids"][0]), 91) # We need to overwrite this test to adapt it to our processor. @require_torch