Skip to content

Commit

Permalink
fixes peft tests and ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
andimarafioti committed Sep 20, 2024
1 parent f1ae8ae commit 39d88b2
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 26 deletions.
16 changes: 3 additions & 13 deletions src/transformers/models/idefics3/image_processing_idefics3.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@

import numpy as np

from transformers.utils.import_utils import is_flax_available, is_tf_available, is_torch_available

from ...image_processing_utils import BaseImageProcessor, BatchFeature
from ...image_transforms import PaddingMode, pad, to_channel_dimension_format
from ...image_utils import (
Expand All @@ -39,15 +37,6 @@
from ...utils import TensorType, is_vision_available, logging


if is_torch_available():
pass

if is_tf_available():
pass

if is_flax_available():
pass

logger = logging.get_logger(__name__)


Expand Down Expand Up @@ -235,7 +224,7 @@ def make_pixel_mask(

# Custom to_pil_image function to support image_mode
def to_pil_image(
image: Union[np.ndarray, "PIL.Image.Image", "torch.Tensor", "tf.Tensor", "jnp.ndarray"],
image: Union[np.ndarray, "PIL.Image.Image", TensorType],
image_mode: Optional[str] = None,
) -> "PIL.Image.Image":
"""
Expand Down Expand Up @@ -266,6 +255,7 @@ def to_pil_image(
image = image.astype(np.uint8)
return PIL.Image.fromarray(image, mode=image_mode)


def convert_to_rgb(
image: np.ndarray,
palette: Optional[PIL.ImagePalette.ImagePalette] = None,
Expand Down Expand Up @@ -932,4 +922,4 @@ def preprocess(
encoding["rows"] = images_list_rows
encoding["cols"] = images_list_cols

return encoding
return encoding
12 changes: 11 additions & 1 deletion src/transformers/models/idefics3/modeling_idefics3.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def forward(
return attn_output, attn_weights


# copied from transformers.models.idefics2.modeling_idefics2.Idefics2VisionFlashAttention2
# Copied from transformers.models.idefics2.modeling_idefics2.Idefics2VisionFlashAttention2 with Idefics2->Idefics3
class Idefics3VisionFlashAttention2(Idefics3VisionAttention):
"""
Idefics3Vision flash attention module. This module inherits from `Idefics3VisionAttention` as the weights of the module stays
Expand Down Expand Up @@ -865,6 +865,11 @@ def make_inputs_require_grads(module, input, output):
make_inputs_require_grads
)

# Copied from transformers.models.idefics2.modeling_idefics2.Idefics2Model.disable_input_require_grads
def disable_input_require_grads(self):
self._text_require_grads_hook.remove()
self._vision_require_grads_hook.remove()

# Copied from transformers.models.idefics2.modeling_idefics2.Idefics2Model.get_input_embeddings
def get_input_embeddings(self):
return self.text_model.get_input_embeddings()
Expand Down Expand Up @@ -1065,6 +1070,11 @@ def make_inputs_require_grads(module, input, output):
make_inputs_require_grads
)

# Copied from transformers.models.idefics2.modeling_idefics2.Idefics2ForConditionalGeneration.disable_input_require_grads
def disable_input_require_grads(self):
self._text_require_grads_hook.remove()
self._vision_require_grads_hook.remove()

# Copied from transformers.models.idefics2.modeling_idefics2.Idefics2ForConditionalGeneration.get_input_embeddings
def get_input_embeddings(self):
return self.model.text_model.get_input_embeddings()
Expand Down
4 changes: 3 additions & 1 deletion tests/models/idefics3/test_image_processing_idefics3.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,9 @@ def get_expected_values(self, image_inputs, batched=False):

def expected_output_image_shape(self, images):
height, width = self.get_expected_values(images, batched=True)
effective_nb_images = self.num_images * 5 if self.do_image_splitting else 1
effective_nb_images = (
self.num_images * 5 if self.do_image_splitting else 1
) # 5 is a squared image divided into 4 + global image resized
return effective_nb_images, self.num_channels, height, width

def prepare_image_inputs(
Expand Down
22 changes: 11 additions & 11 deletions tests/models/idefics3/test_processing_idefics3.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def get_image_processor(self, **kwargs):
def get_processor(self, **kwargs):
return AutoProcessor.from_pretrained(self.tmpdirname, **kwargs)

def get_splitted_image_expected_tokens(self, processor, image_rows, image_cols):
def get_split_image_expected_tokens(self, processor, image_rows, image_cols):
text_split_images = []
for n_h in range(image_rows):
for n_w in range(image_cols):
Expand Down Expand Up @@ -185,8 +185,8 @@ def test_process_interleaved_images_prompts_image_splitting(self):

# fmt: off
tokenized_sentence = processor.tokenizer(text_str, add_special_tokens=False)
splitted_image1_tokens = self.get_splitted_image_expected_tokens(processor, 3, 4)
expected_input_ids_1 = [[self.bos_token_id] + splitted_image1_tokens + tokenized_sentence["input_ids"]]
split_image1_tokens = self.get_split_image_expected_tokens(processor, 3, 4)
expected_input_ids_1 = [[self.bos_token_id] + split_image1_tokens + tokenized_sentence["input_ids"]]
self.assertEqual(inputs["input_ids"], expected_input_ids_1)
self.assertEqual(inputs["attention_mask"], [[1] * len(expected_input_ids_1[0])])
self.assertEqual(np.array(inputs["pixel_values"]).shape, (1, 13, 3, 364, 364))
Expand All @@ -210,11 +210,11 @@ def test_process_interleaved_images_prompts_image_splitting(self):
tokenized_sentence_1 = processor.tokenizer(text_str_1, add_special_tokens=False)
tokenized_sentence_2 = processor.tokenizer(text_str_2, add_special_tokens=False)

splitted_image1_tokens = self.get_splitted_image_expected_tokens(processor, 3, 4)
splitted_image2_tokens = self.get_splitted_image_expected_tokens(processor, 4, 4)
splitted_image3_tokens = self.get_splitted_image_expected_tokens(processor, 3, 4)
expected_input_ids_1 = [self.bos_token_id] + splitted_image1_tokens + tokenized_sentence_1["input_ids"]
expected_input_ids_2 = [self.bos_token_id] + tokenized_sentence_2["input_ids"] + splitted_image2_tokens + splitted_image3_tokens
split_image1_tokens = self.get_split_image_expected_tokens(processor, 3, 4)
split_image2_tokens = self.get_split_image_expected_tokens(processor, 4, 4)
split_image3_tokens = self.get_split_image_expected_tokens(processor, 3, 4)
expected_input_ids_1 = [self.bos_token_id] + split_image1_tokens + tokenized_sentence_1["input_ids"]
expected_input_ids_2 = [self.bos_token_id] + tokenized_sentence_2["input_ids"] + split_image2_tokens + split_image3_tokens
# Pad the first input to match the second input
pad_len = len(expected_input_ids_2) - len(expected_input_ids_1)
padded_expected_input_ids_1 = [self.padding_token_id] * pad_len + expected_input_ids_1
Expand All @@ -240,12 +240,12 @@ def test_add_special_tokens_processor(self):
# fmt: off
inputs = processor(text=text, images=self.image1, add_special_tokens=False)
tokenized_sentence = processor.tokenizer(text_str, add_special_tokens=False)
splitted_image1_tokens = self.get_splitted_image_expected_tokens(processor, 3, 4)
expected_input_ids = [tokenized_sentence["input_ids"] + splitted_image1_tokens]
split_image1_tokens = self.get_split_image_expected_tokens(processor, 3, 4)
expected_input_ids = [tokenized_sentence["input_ids"] + split_image1_tokens]
self.assertEqual(inputs["input_ids"], expected_input_ids)

inputs = processor(text=text, images=self.image1)
expected_input_ids = [[self.bos_token_id] + tokenized_sentence["input_ids"] + splitted_image1_tokens]
expected_input_ids = [[self.bos_token_id] + tokenized_sentence["input_ids"] + split_image1_tokens]
self.assertEqual(inputs["input_ids"], expected_input_ids)
# fmt: on

Expand Down

0 comments on commit 39d88b2

Please sign in to comment.