Skip to content

Commit

Permalink
Refactor _validate_images_text_input_order
Browse files Browse the repository at this point in the history
  • Loading branch information
yonigozlan committed Sep 13, 2024
1 parent 04918e7 commit 3d8ec3d
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 5 deletions.
13 changes: 10 additions & 3 deletions src/transformers/models/llava/processing_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from ...feature_extraction_utils import BatchFeature
from ...image_utils import ImageInput, get_image_size, to_numpy_array
from ...processing_utils import ProcessingKwargs, ProcessorMixin, _check_reversed_images_text_for_vlms
from ...processing_utils import ProcessingKwargs, ProcessorMixin, _validate_images_text_input_order
from ...tokenization_utils_base import PreTokenizedInput, TextInput
from ...utils import logging

Expand Down Expand Up @@ -109,6 +109,12 @@ def __call__(
The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
(pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
`is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
return_tensors (`str` or [`~utils.TensorType`], *optional*):
If set, will return tensors of a particular framework. Acceptable values are:
- `'tf'`: Return TensorFlow `tf.constant` objects.
- `'pt'`: Return PyTorch `torch.Tensor` objects.
- `'np'`: Return NumPy `np.ndarray` objects.
- `'jax'`: Return JAX `jnp.ndarray` objects.
Returns:
[`BatchFeature`]: A [`BatchFeature`] with the following fields:
Expand All @@ -120,10 +126,11 @@ def __call__(
- **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
"""
if images is None and text is None:
raise ValueError("You have to specify at least images or text.")
raise ValueError("You have to specify at least one of `images` or `text`.")

# check if images and text inputs are reversed for BC
images, text = _check_reversed_images_text_for_vlms(images, text)
text, images = images, text
images, text = _validate_images_text_input_order(images, text)

output_kwargs = self._merge_kwargs(
LlavaProcessorKwargs,
Expand Down
5 changes: 3 additions & 2 deletions tests/models/llava/test_processor_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@
import tempfile
import unittest

from transformers import AutoProcessor, AutoTokenizer, LlamaTokenizerFast, LlavaProcessor
from transformers.testing_utils import require_torch, require_vision
from transformers.utils import is_vision_available

from ...test_processing_common import ProcessorTesterMixin


if is_vision_available():
from transformers import AutoProcessor, AutoTokenizer, CLIPImageProcessor, LlamaTokenizerFast, LlavaProcessor
from transformers import CLIPImageProcessor


@require_vision
Expand All @@ -34,7 +35,7 @@ def setUp(self):
image_processor = CLIPImageProcessor(do_center_crop=False)
tokenizer = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b")

processor = LlavaProcessor(image_processor, tokenizer)
processor = LlavaProcessor(image_processor=image_processor, tokenizer=tokenizer)

processor.save_pretrained(self.tmpdirname)

Expand Down

0 comments on commit 3d8ec3d

Please sign in to comment.