From 7d93bb5b4bacf8bc5e66233a50d837ca953f8c71 Mon Sep 17 00:00:00 2001 From: Niels Date: Wed, 28 Aug 2024 18:28:30 +0200 Subject: [PATCH] First draft --- .../blip_2/convert_blip_2_original_to_pytorch.py | 2 ++ .../pipelines/zero_shot_image_classification.py | 12 ++---------- .../test_pipelines_zero_shot_image_classification.py | 2 -- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/src/transformers/models/blip_2/convert_blip_2_original_to_pytorch.py b/src/transformers/models/blip_2/convert_blip_2_original_to_pytorch.py index 5f972353c4f41e..f571be20e13577 100644 --- a/src/transformers/models/blip_2/convert_blip_2_original_to_pytorch.py +++ b/src/transformers/models/blip_2/convert_blip_2_original_to_pytorch.py @@ -155,6 +155,8 @@ def convert_blip2_checkpoint( else: tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xl") + tokenizer.model_input_names = ["input_ids", "attention_mask"] + if "itm" in model_name: eos_token_id = None else: diff --git a/src/transformers/pipelines/zero_shot_image_classification.py b/src/transformers/pipelines/zero_shot_image_classification.py index b7e13e782e78cc..210ef4a7e67827 100644 --- a/src/transformers/pipelines/zero_shot_image_classification.py +++ b/src/transformers/pipelines/zero_shot_image_classification.py @@ -97,9 +97,6 @@ def __call__(self, images: Union[str, List[str], "Image", List["Image"]], **kwar The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and the call may block forever. - tokenizer_kwargs (`dict`, *optional*): - Additional dictionary of keyword arguments passed along to the tokenizer. - Return: A list of dictionaries containing one entry per proposed label. Each dictionary contains the following keys: @@ -109,7 +106,7 @@ def __call__(self, images: Union[str, List[str], "Image", List["Image"]], **kwar """ return super().__call__(images, **kwargs) - def _sanitize_parameters(self, tokenizer_kwargs=None, **kwargs): + def _sanitize_parameters(self, **kwargs): preprocess_params = {} if "candidate_labels" in kwargs: preprocess_params["candidate_labels"] = kwargs["candidate_labels"] @@ -117,8 +114,6 @@ def _sanitize_parameters(self, tokenizer_kwargs=None, **kwargs): preprocess_params["timeout"] = kwargs["timeout"] if "hypothesis_template" in kwargs: preprocess_params["hypothesis_template"] = kwargs["hypothesis_template"] - if tokenizer_kwargs is not None: - preprocess_params["tokenizer_kwargs"] = tokenizer_kwargs return preprocess_params, {}, {} @@ -128,10 +123,7 @@ def preprocess( candidate_labels=None, hypothesis_template="This is a photo of {}.", timeout=None, - tokenizer_kwargs=None, ): - if tokenizer_kwargs is None: - tokenizer_kwargs = {} image = load_image(image, timeout=timeout) inputs = self.image_processor(images=[image], return_tensors=self.framework) if self.framework == "pt": @@ -139,7 +131,7 @@ def preprocess( inputs["candidate_labels"] = candidate_labels sequences = [hypothesis_template.format(x) for x in candidate_labels] padding = "max_length" if self.model.config.model_type == "siglip" else True - text_inputs = self.tokenizer(sequences, return_tensors=self.framework, padding=padding, **tokenizer_kwargs) + text_inputs = self.tokenizer(sequences, return_tensors=self.framework, padding=padding) inputs["text_inputs"] = [text_inputs] return inputs diff --git a/tests/pipelines/test_pipelines_zero_shot_image_classification.py b/tests/pipelines/test_pipelines_zero_shot_image_classification.py index b57adf609d1e09..660d089c4b8180 100644 --- a/tests/pipelines/test_pipelines_zero_shot_image_classification.py +++ b/tests/pipelines/test_pipelines_zero_shot_image_classification.py @@ -292,7 +292,6 @@ def test_blip2_model_pt(self): output = image_classifier( image, candidate_labels=["2 cats", "a plane", "a remote"], - tokenizer_kwargs={"return_token_type_ids": False}, ) self.assertEqual( @@ -308,7 +307,6 @@ def test_blip2_model_pt(self): [image] * 5, candidate_labels=["2 cats", "a plane", "a remote"], batch_size=2, - tokenizer_kwargs={"return_token_type_ids": False}, ) self.assertEqual(