huggingface · Rocketknight1 · Sep 12, 2024 · Sep 12, 2024 · Sep 13, 2024 · Sep 13, 2024
diff --git a/src/transformers/pipelines/audio_classification.py b/src/transformers/pipelines/audio_classification.py
@@ -126,6 +126,11 @@ def __call__(
                 The number of top labels that will be returned by the pipeline. If the provided number is `None` or
                 higher than the number of labels available in the model configuration, it will default to the number of
                 labels.
+            function_to_apply(`str`, *optional*, defaults to "softmax"):
+                The function to apply to the model output. By default, the pipeline will apply the softmax function to
+                the output of the model. Valid options: ["softmax", "sigmoid", "none"]. Note that passing Python's
+                built-in `None` will default to "softmax", so you need to pass the string "none" to disable any
+                post-processing.
 
         Return:
             A list of `dict` with the following keys:
@@ -135,13 +140,22 @@ def __call__(
         """
         return super().__call__(inputs, **kwargs)
 
-    def _sanitize_parameters(self, top_k=None, **kwargs):
+    def _sanitize_parameters(self, top_k=None, function_to_apply=None, **kwargs):
         # No parameters on this pipeline right now
         postprocess_params = {}
         if top_k is not None:
             if top_k > self.model.config.num_labels:
                 top_k = self.model.config.num_labels
             postprocess_params["top_k"] = top_k
+        if function_to_apply is not None:
+            if function_to_apply not in ["softmax", "sigmoid", "none"]:
+                raise ValueError(
+                    f"Invalid value for `function_to_apply`: {function_to_apply}. "
+                    "Valid options are ['softmax', 'sigmoid', 'none']"
+                )
+            postprocess_params["function_to_apply"] = function_to_apply
+        else:
+            postprocess_params["function_to_apply"] = "softmax"
         return {}, {}, postprocess_params
 
     def preprocess(self, inputs):
@@ -203,8 +217,13 @@ def _forward(self, model_inputs):
         model_outputs = self.model(**model_inputs)
         return model_outputs
 
-    def postprocess(self, model_outputs, top_k=5):
-        probs = model_outputs.logits[0].softmax(-1)
+    def postprocess(self, model_outputs, top_k=5, function_to_apply="softmax"):
+        if function_to_apply == "softmax":
+            probs = model_outputs.logits[0].softmax(-1)
+        elif function_to_apply == "sigmoid":
+            probs = model_outputs.logits[0].sigmoid()
+        else:
+            probs = model_outputs.logits[0]
         scores, ids = probs.topk(top_k)
 
         scores = scores.tolist()

diff --git a/src/transformers/pipelines/automatic_speech_recognition.py b/src/transformers/pipelines/automatic_speech_recognition.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import warnings
 from collections import defaultdict
 from typing import TYPE_CHECKING, Dict, Optional, Union
 
@@ -269,8 +270,6 @@ def __call__(
                 The dictionary of ad-hoc parametrization of `generate_config` to be used for the generation call. For a
                 complete overview of generate, check the [following
                 guide](https://huggingface.co/docs/transformers/en/main_classes/text_generation).
-            max_new_tokens (`int`, *optional*):
-                The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt.
 
         Return:
             `Dict`: A dictionary with the following keys:
@@ -310,6 +309,10 @@ def _sanitize_parameters(
 
         forward_params = defaultdict(dict)
         if max_new_tokens is not None:
+            warnings.warn(
+                "`max_new_tokens` is deprecated and will be removed in version 5 of Transformers. To remove this warning, pass `max_new_tokens` a keyword argument inside `generate_kwargs` instead.",
+                FutureWarning,
+            )
             forward_params["max_new_tokens"] = max_new_tokens
         if generate_kwargs is not None:
             if max_new_tokens is not None and "max_new_tokens" in generate_kwargs:

diff --git a/src/transformers/pipelines/depth_estimation.py b/src/transformers/pipelines/depth_estimation.py
@@ -1,3 +1,4 @@
+import warnings
 from typing import List, Union
 
 import numpy as np
@@ -50,12 +51,12 @@ def __init__(self, *args, **kwargs):
         requires_backends(self, "vision")
         self.check_model_type(MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES)
 
-    def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Image"]], **kwargs):
+    def __call__(self, inputs: Union[str, List[str], "Image.Image", List["Image.Image"]] = None, **kwargs):
         """
         Predict the depth(s) of the image(s) passed as inputs.
 
         Args:
-            images (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`):
+            inputs (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`):
                 The pipeline handles three types of images:
 
                 - A string containing a http link pointing to an image
@@ -65,9 +66,10 @@ def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Imag
                 The pipeline accepts either a single image or a batch of images, which must then be passed as a string.
                 Images in a batch must all be in the same format: all as http links, all as local paths, or all as PIL
                 images.
-            timeout (`float`, *optional*, defaults to None):
-                The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
-                the call may block forever.
+            parameters (`Dict`, *optional*):
+                A dictionary of argument names to parameter values, to control pipeline behaviour.
+                The only parameter available right now is `timeout`, which is the length of time, in seconds,
+                that the pipeline should wait before giving up on trying to download an image.
 
         Return:
             A dictionary or a list of dictionaries containing result. If the input is a single image, will return a
@@ -79,12 +81,26 @@ def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Imag
             - **predicted_depth** (`torch.Tensor`) -- The predicted depth by the model as a `torch.Tensor`.
             - **depth** (`PIL.Image`) -- The predicted depth by the model as a `PIL.Image`.
         """
-        return super().__call__(images, **kwargs)
-
-    def _sanitize_parameters(self, timeout=None, **kwargs):
+        # After deprecation of this is completed, remove the default `None` value for `images`
+        if "images" in kwargs:
+            warnings.warn(
+                "The `images` argument has been renamed to `inputs`. In version 5 of Transformers, `images` will no longer be accepted",
+                FutureWarning,
+            )
+            inputs = kwargs.pop("images")
+        if inputs is None:
+            raise ValueError("Cannot call the depth-estimation pipeline without an inputs argument!")
+        return super().__call__(inputs, **kwargs)
+
+    def _sanitize_parameters(self, timeout=None, parameters=None, **kwargs):
         preprocess_params = {}
         if timeout is not None:
+            warnings.warn(
+                "The `timeout` argument is deprecated and will be removed in version 5 of Transformers", FutureWarning
+            )
             preprocess_params["timeout"] = timeout
+        if isinstance(parameters, dict) and "timeout" in parameters:
+            preprocess_params["timeout"] = parameters["timeout"]
         return preprocess_params, {}, {}
 
     def preprocess(self, image, timeout=None):

diff --git a/src/transformers/pipelines/document_question_answering.py b/src/transformers/pipelines/document_question_answering.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import re
+import warnings
 from typing import List, Optional, Tuple, Union
 
 import numpy as np
@@ -44,6 +45,7 @@
     TESSERACT_LOADED = True
     import pytesseract
 
+
 logger = logging.get_logger(__name__)
 
 
@@ -245,11 +247,6 @@ def __call__(
                 Whether or not we accept impossible as an answer.
             lang (`str`, *optional*):
                 Language to use while running OCR. Defaults to english.
-            tesseract_config (`str`, *optional*):
-                Additional flags to pass to tesseract while running OCR.
-            timeout (`float`, *optional*, defaults to None):
-                The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
-                the call may block forever.
 
         Return:
             A `dict` or a list of `dict`: Each result comes as a dictionary with the following keys:
@@ -291,6 +288,15 @@ def preprocess(
 
         image = None
         image_features = {}
+        if timeout is not None:
+            warnings.warn(
+                "The `timeout` argument is deprecated and will be removed in version 5 of Transformers", FutureWarning
+            )
+        if tesseract_config:
+            warnings.warn(
+                "The `tesseract_config` argument is deprecated and will be removed in version 5 of Transformers",
+                FutureWarning,
+            )
         if input.get("image", None) is not None:
             image = load_image(input["image"], timeout=timeout)
             if self.image_processor is not None:

diff --git a/src/transformers/pipelines/feature_extraction.py b/src/transformers/pipelines/feature_extraction.py
@@ -37,7 +37,9 @@ class FeatureExtractionPipeline(Pipeline):
     [huggingface.co/models](https://huggingface.co/models).
     """
 
-    def _sanitize_parameters(self, truncation=None, tokenize_kwargs=None, return_tensors=None, **kwargs):
+    def _sanitize_parameters(
+        self, truncation=None, truncation_direction=None, tokenize_kwargs=None, return_tensors=None, **kwargs
+    ):
         if tokenize_kwargs is None:
             tokenize_kwargs = {}
 
@@ -47,6 +49,13 @@ def _sanitize_parameters(self, truncation=None, tokenize_kwargs=None, return_ten
                     "truncation parameter defined twice (given as keyword argument as well as in tokenize_kwargs)"
                 )
             tokenize_kwargs["truncation"] = truncation
+        if truncation_direction is not None:
+            if "truncation_side" in tokenize_kwargs:
+                raise ValueError(
+                    "truncation_side parameter defined twice (given as keyword argument as well as in tokenize_kwargs)"
+                )
+            # The JS spec uses title-case, transformers uses lower, so we normalize
+            tokenize_kwargs["truncation_side"] = truncation_direction.lower()
 
         preprocess_params = tokenize_kwargs
 
@@ -73,14 +82,19 @@ def postprocess(self, model_outputs, return_tensors=False):
         elif self.framework == "tf":
             return model_outputs[0].numpy().tolist()
 
-    def __call__(self, *args, **kwargs):
+    def __call__(self, *inputs, **kwargs):
         """
         Extract the features of the input(s).
 
         Args:
-            args (`str` or `List[str]`): One or several texts (or one list of texts) to get the features of.
+            inputs (`str` or `List[str]`): One or several texts (or one list of texts) to get the features of.
+            truncate(`bool`, *optional*, defaults to `None`):
+                Whether to truncate the input to max length or not. Overrides the value passed when initializing the
+                pipeline.
+            truncation_direction (`str`, *optional*, defaults to `None`): The side to truncate from the input sequence
+                if truncation is enabled. Can be 'left' or 'right'.
 
         Return:
             A nested list of `float`: The features computed by the model.
         """
-        return super().__call__(*args, **kwargs)
+        return super().__call__(*inputs, **kwargs)
diff --git a/src/transformers/pipelines/fill_mask.py b/src/transformers/pipelines/fill_mask.py
@@ -245,12 +245,12 @@ def _sanitize_parameters(self, top_k=None, targets=None, tokenizer_kwargs=None):
             )
         return preprocess_params, {}, postprocess_params
 
-    def __call__(self, inputs, *args, **kwargs):
+    def __call__(self, *inputs, **kwargs):
         """
         Fill the masked token in the text(s) given as inputs.
 
         Args:
-            args (`str` or `List[str]`):
+            inputs (`str` or `List[str]`):
                 One or several texts (or one list of prompts) with masked tokens.
             targets (`str` or `List[str]`, *optional*):
                 When passed, the model will limit the scores to the passed targets instead of looking up in the whole

diff --git a/src/transformers/pipelines/image_classification.py b/src/transformers/pipelines/image_classification.py
@@ -1,3 +1,4 @@
+import warnings
 from typing import List, Union
 
 import numpy as np
@@ -99,6 +100,9 @@ def __init__(self, *args, **kwargs):
     def _sanitize_parameters(self, top_k=None, function_to_apply=None, timeout=None):
         preprocess_params = {}
         if timeout is not None:
+            warnings.warn(
+                "The `timeout` argument is deprecated and will be removed in version 5 of Transformers", FutureWarning
+            )
             preprocess_params["timeout"] = timeout
         postprocess_params = {}
         if top_k is not None:
@@ -109,12 +113,12 @@ def _sanitize_parameters(self, top_k=None, function_to_apply=None, timeout=None)
             postprocess_params["function_to_apply"] = function_to_apply
         return preprocess_params, {}, postprocess_params
 
-    def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Image"]], **kwargs):
+    def __call__(self, inputs: Union[str, List[str], "Image.Image", List["Image.Image"]] = None, **kwargs):
         """
         Assign labels to the image(s) passed as inputs.
 
         Args:
-            images (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`):
+            inputs (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`):
                 The pipeline handles three types of images:
 
                 - A string containing a http link pointing to an image
@@ -142,9 +146,6 @@ def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Imag
             top_k (`int`, *optional*, defaults to 5):
                 The number of top labels that will be returned by the pipeline. If the provided number is higher than
                 the number of labels available in the model configuration, it will default to the number of labels.
-            timeout (`float`, *optional*, defaults to None):
-                The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
-                the call may block forever.
 
         Return:
             A dictionary or a list of dictionaries containing result. If the input is a single image, will return a
@@ -156,7 +157,16 @@ def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Imag
             - **label** (`str`) -- The label identified by the model.
             - **score** (`int`) -- The score attributed by the model for that label.
         """
-        return super().__call__(images, **kwargs)
+        # After deprecation of this is completed, remove the default `None` value for `images`
+        if "images" in kwargs:
+            warnings.warn(
+                "The `images` argument has been renamed to `inputs`. In version 5 of Transformers, `images` will no longer be accepted",
+                FutureWarning,
+            )
+            inputs = kwargs.pop("images")
+        if inputs is None:
+            raise ValueError("Cannot call the image-classification pipeline without an inputs argument!")
+        return super().__call__(inputs, **kwargs)
 
     def preprocess(self, image, timeout=None):
         image = load_image(image, timeout=timeout)

diff --git a/src/transformers/pipelines/image_segmentation.py b/src/transformers/pipelines/image_segmentation.py
@@ -1,3 +1,4 @@
+import warnings
 from typing import Any, Dict, List, Union
 
 import numpy as np
@@ -90,16 +91,19 @@ def _sanitize_parameters(self, **kwargs):
         if "overlap_mask_area_threshold" in kwargs:
             postprocess_kwargs["overlap_mask_area_threshold"] = kwargs["overlap_mask_area_threshold"]
         if "timeout" in kwargs:
+            warnings.warn(
+                "The `timeout` argument is deprecated and will be removed in version 5 of Transformers", FutureWarning
+            )
             preprocess_kwargs["timeout"] = kwargs["timeout"]
 
         return preprocess_kwargs, {}, postprocess_kwargs
 
-    def __call__(self, images, **kwargs) -> Union[Predictions, List[Prediction]]:
+    def __call__(self, inputs=None, **kwargs) -> Union[Predictions, List[Prediction]]:
         """
         Perform segmentation (detect masks & classes) in the image(s) passed as inputs.
 
         Args:
-            images (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`):
+            inputs (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`):
                 The pipeline handles three types of images:
 
                 - A string containing an HTTP(S) link pointing to an image
@@ -118,9 +122,6 @@ def __call__(self, images, **kwargs) -> Union[Predictions, List[Prediction]]:
                 Threshold to use when turning the predicted masks into binary values.
             overlap_mask_area_threshold (`float`, *optional*, defaults to 0.5):
                 Mask overlap threshold to eliminate small, disconnected segments.
-            timeout (`float`, *optional*, defaults to None):
-                The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
-                the call may block forever.
 
         Return:
             A dictionary or a list of dictionaries containing the result. If the input is a single image, will return a
@@ -136,7 +137,16 @@ def __call__(self, images, **kwargs) -> Union[Predictions, List[Prediction]]:
             - **score** (*optional* `float`) -- Optionally, when the model is capable of estimating a confidence of the
               "object" described by the label and the mask.
         """
-        return super().__call__(images, **kwargs)
+        # After deprecation of this is completed, remove the default `None` value for `images`
+        if "images" in kwargs:
+            warnings.warn(
+                "The `images` argument has been renamed to `inputs`. In version 5 of Transformers, `images` will no longer be accepted",
+                FutureWarning,
+            )
+            inputs = kwargs.pop("images")
+        if inputs is None:
+            raise ValueError("Cannot call the image-classification pipeline without an inputs argument!")
+        return super().__call__(inputs, **kwargs)
 
     def preprocess(self, image, subtask=None, timeout=None):
         image = load_image(image, timeout=timeout)