diff --git a/src/transformers/models/llava_onevision/processing_llava_onevision.py b/src/transformers/models/llava_onevision/processing_llava_onevision.py index e050ec3f31deea..87e716b5855fe9 100644 --- a/src/transformers/models/llava_onevision/processing_llava_onevision.py +++ b/src/transformers/models/llava_onevision/processing_llava_onevision.py @@ -51,11 +51,11 @@ class LlavaOnevisionProcessor(ProcessorMixin): r""" Constructs a LLaVa-Onevision processor which wraps a LLaVa-Onevision video processor, LLaVa-NeXT image processor and a LLaMa tokenizer into a single processor. - [`LlavaNextProcessor`] offers all the functionalities of [`LlavaOnevisionVideoProcessor`], [`LlavaNextImageProcessor`] and [`LlamaTokenizerFast`]. See the + [`LlavaNextProcessor`] offers all the functionalities of [`LlavaOnevisionVideoProcessor`], [`LlavaOnevisionImageProcessor`] and [`LlamaTokenizerFast`]. See the [`~LlavaOnevisionVideoProcessor.__call__`], [`~LlavaNextProcessor.__call__`] and [`~LlavaNextProcessor.decode`] for more information. Args: - image_processor ([`LlavaNextImageProcessor`], *optional*): + image_processor ([`LlavaOnevisionImageProcessor`], *optional*): The image processor is a required input. tokenizer ([`LlamaTokenizerFast`], *optional*): The tokenizer is a required input. @@ -82,7 +82,7 @@ class LlavaOnevisionProcessor(ProcessorMixin): "image_token", "video_token", ] - image_processor_class = "AutoImageProcessor" + image_processor_class = "LlavaOnevisionImageProcessor" tokenizer_class = "AutoTokenizer" video_processor_class = "LlavaOnevisionVideoProcessor" @@ -96,7 +96,6 @@ def __init__( chat_template=None, image_token="", video_token="