Skip to content

Commit

Permalink
fixes tests
Browse files Browse the repository at this point in the history
  • Loading branch information
andimarafioti committed Aug 15, 2024
1 parent 1f11f95 commit f67ed1e
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 10 deletions.
5 changes: 2 additions & 3 deletions src/transformers/models/idefics3/processing_idefics3.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
Processor class for Idefics3.
"""

import re
import sys
from typing import TYPE_CHECKING, List, Optional, Union
import re

from ...feature_extraction_utils import BatchFeature
from ...image_utils import ImageInput, is_valid_image, load_image
Expand Down Expand Up @@ -147,7 +147,7 @@ def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 169, ch
self.global_img_token = "<global-img>"
self.image_seq_len = image_seq_len

self._regex_to_remove_extra_special_tokens = re.compile(r'(\n?<global-img>\n?|<row_\d+_col_\d+>\n?)+')
self._regex_to_remove_extra_special_tokens = re.compile(r"(\n?<global-img>\n?|<row_\d+_col_\d+>\n?)+")

tokens_to_add = {
"additional_special_tokens": [
Expand Down Expand Up @@ -356,7 +356,6 @@ def decode(self, *args, **kwargs):
decode_output = self.tokenizer.decode(*args, **kwargs)
return self._regex_to_remove_extra_special_tokens.sub("<image>", decode_output)


@property
def model_input_names(self):
tokenizer_input_names = self.tokenizer.model_input_names
Expand Down
4 changes: 2 additions & 2 deletions tests/models/idefics3/test_modeling_idefics3.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,7 @@ def test_integration_test(self):
generated_texts = self.processor.batch_decode(generated_ids, skip_special_tokens=True)

# Batch affects generated text. Single batch output: ['In this image, we see the Statue of Liberty in the foreground and']
expected_generated_text = "In this image, we see the Statue of Liberty, the New York City"
expected_generated_text = "<image>In this image, we see the Statue of Liberty, which is located on Liberty"
self.assertEqual(generated_texts[0], expected_generated_text)

@slow
Expand All @@ -520,5 +520,5 @@ def test_integration_test_4bit(self):
generated_ids = model.generate(**inputs, max_new_tokens=10)
generated_texts = self.processor.batch_decode(generated_ids, skip_special_tokens=True)

expected_generated_text = "In this image, we see the Statue of Liberty, the Hudson River,"
expected_generated_text = "<image>In this image, we see the Statue of Liberty, which is located on Liberty"
self.assertEqual(generated_texts[0], expected_generated_text)
10 changes: 5 additions & 5 deletions tests/models/idefics3/test_processing_idefics3.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def setUp(self):
self.bos_token_id = processor.tokenizer.convert_tokens_to_ids(self.bos_token)
self.image_token_id = processor.tokenizer.convert_tokens_to_ids(self.image_token)
self.fake_image_token_id = processor.tokenizer.convert_tokens_to_ids(self.fake_image_token)
self.global_img_token_id = processor.global_img_token_id
self.global_img_tokens_id = processor.tokenizer(self.global_img_token, add_special_tokens=False)["input_ids"]
self.padding_token_id = processor.tokenizer.pad_token_id
self.image_seq_len = processor.image_seq_len

Expand Down Expand Up @@ -96,7 +96,7 @@ def get_splitted_image_expected_tokens(self, processor, image_rows, image_cols):
] # add double newline, as it gets its own token
text_split_images += (
[self.fake_image_token_id]
+ [self.global_img_token_id]
+ self.global_img_tokens_id
+ [self.image_token_id] * self.image_seq_len
+ [self.fake_image_token_id]
)
Expand Down Expand Up @@ -124,7 +124,7 @@ def test_process_interleaved_images_prompts_no_image_splitting(self):

# fmt: off
tokenized_sentence = processor.tokenizer(text_str, add_special_tokens=False)
expected_input_ids = [[self.bos_token_id] + [self.fake_image_token_id] + [self.global_img_token_id] + [self.image_token_id] * self.image_seq_len + [self.fake_image_token_id] + tokenized_sentence["input_ids"]]
expected_input_ids = [[self.bos_token_id] + [self.fake_image_token_id] + self.global_img_tokens_id + [self.image_token_id] * self.image_seq_len + [self.fake_image_token_id] + tokenized_sentence["input_ids"]]
self.assertEqual(inputs["input_ids"], expected_input_ids)
self.assertEqual(inputs["attention_mask"], [[1] * len(expected_input_ids[0])])
self.assertEqual(inputs["pixel_values"].shape, (1, 1, 3, 1092, 1456))
Expand All @@ -147,7 +147,7 @@ def test_process_interleaved_images_prompts_no_image_splitting(self):
# fmt: off
tokenized_sentence_1 = processor.tokenizer(text_str_1, add_special_tokens=False)
tokenized_sentence_2 = processor.tokenizer(text_str_2, add_special_tokens=False)
image_tokens = [self.fake_image_token_id] + [self.global_img_token_id] + [self.image_token_id] * self.image_seq_len + [self.fake_image_token_id]
image_tokens = [self.fake_image_token_id] + self.global_img_tokens_id + [self.image_token_id] * self.image_seq_len + [self.fake_image_token_id]
expected_input_ids_1 = [self.bos_token_id] + image_tokens + tokenized_sentence_1["input_ids"]
expected_input_ids_2 = [self.bos_token_id] + 2 * image_tokens + tokenized_sentence_2["input_ids"]
# Pad the first input to match the second input
Expand Down Expand Up @@ -436,7 +436,7 @@ def test_unstructured_kwargs_batched(self):

self.assertEqual(inputs["pixel_values"].shape[2], 3)
self.assertEqual(inputs["pixel_values"].shape[3], 214)
self.assertEqual(len(inputs["input_ids"][0]), 88)
self.assertEqual(len(inputs["input_ids"][0]), 91)

# We need to overwrite this test to adapt it to our processor.
@require_torch
Expand Down

0 comments on commit f67ed1e

Please sign in to comment.