Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add equivalence testing between transformers pipelines + huggingface.js tasks #33452

Closed
wants to merge 35 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
7a6090b
Initial commit
Rocketknight1 Sep 12, 2024
5561454
Move to gpt2 file so it gets tested
Rocketknight1 Sep 12, 2024
9b3d877
Stop doing the git clone
Rocketknight1 Sep 13, 2024
aa4d9c4
Cleanup the test
Rocketknight1 Sep 13, 2024
f7e5087
First pass at testing all the pipelines
Rocketknight1 Sep 13, 2024
d951519
First pass at testing all the pipelines
Rocketknight1 Sep 13, 2024
71569fd
Cleanup error display
Rocketknight1 Sep 13, 2024
80a578d
Try patching up audio-classification
Rocketknight1 Sep 13, 2024
8d2a10b
Bring image-classification pipeline into sync
Rocketknight1 Sep 17, 2024
3983fcc
Bring visual-question-answering pipeline into sync
Rocketknight1 Sep 17, 2024
298fb37
Bring object-detection pipeline into sync
Rocketknight1 Sep 17, 2024
046f436
Bring zero-shot-classification pipeline into sync
Rocketknight1 Sep 17, 2024
6c7de21
make fixup
Rocketknight1 Sep 17, 2024
c4b5c1f
only recurse one level
Rocketknight1 Sep 18, 2024
04fe7ad
Update test
Rocketknight1 Sep 18, 2024
021ae20
Bring ASR pipeline into sync
Rocketknight1 Sep 18, 2024
c6069d7
Bring text-to-audio pipeline into sync
Rocketknight1 Sep 18, 2024
bd84dc2
make fixup
Rocketknight1 Sep 18, 2024
3fd294d
Bring fill-mask pipeline into sync
Rocketknight1 Sep 18, 2024
98b5dd4
Bring image-to-text and depth-estimation (mostly) into sync
Rocketknight1 Sep 18, 2024
9ce2faa
depth-estimation, image-to-text, video-classification
Rocketknight1 Sep 18, 2024
12fcfcf
audio-classification fixup
Rocketknight1 Sep 18, 2024
6db83ab
zero-shot-object-detection
Rocketknight1 Sep 18, 2024
3d2c477
zero-shot-object-detection
Rocketknight1 Sep 18, 2024
ddeb679
zero-shot-image-classification
Rocketknight1 Sep 18, 2024
b15cf4f
zero-shot-image-classification
Rocketknight1 Sep 18, 2024
2221cef
Bring table-question-answering into sync
Rocketknight1 Sep 19, 2024
8f7050f
token-classification
Rocketknight1 Sep 19, 2024
bdfde76
Update handling of generate args in test
Rocketknight1 Sep 19, 2024
35d0592
question-answering
Rocketknight1 Sep 19, 2024
5e040e6
text2text, summarization and translation
Rocketknight1 Sep 19, 2024
a0bcb23
last patch to depth-estimation
Rocketknight1 Sep 19, 2024
123a9e6
Small pipeline test fix
Rocketknight1 Sep 19, 2024
4c3fb8f
feature-extraction (partially)
Rocketknight1 Sep 20, 2024
1f93b12
Typo in test output
Rocketknight1 Sep 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions src/transformers/pipelines/audio_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,11 @@ def __call__(
The number of top labels that will be returned by the pipeline. If the provided number is `None` or
higher than the number of labels available in the model configuration, it will default to the number of
labels.
function_to_apply(`str`, *optional*, defaults to "softmax"):
The function to apply to the model output. By default, the pipeline will apply the softmax function to
the output of the model. Valid options: ["softmax", "sigmoid", "none"]. Note that passing Python's
built-in `None` will default to "softmax", so you need to pass the string "none" to disable any
post-processing.

Return:
A list of `dict` with the following keys:
Expand All @@ -135,13 +140,22 @@ def __call__(
"""
return super().__call__(inputs, **kwargs)

def _sanitize_parameters(self, top_k=None, **kwargs):
def _sanitize_parameters(self, top_k=None, function_to_apply=None, **kwargs):
# No parameters on this pipeline right now
postprocess_params = {}
if top_k is not None:
if top_k > self.model.config.num_labels:
top_k = self.model.config.num_labels
postprocess_params["top_k"] = top_k
if function_to_apply is not None:
if function_to_apply not in ["softmax", "sigmoid", "none"]:
raise ValueError(
f"Invalid value for `function_to_apply`: {function_to_apply}. "
"Valid options are ['softmax', 'sigmoid', 'none']"
)
postprocess_params["function_to_apply"] = function_to_apply
else:
postprocess_params["function_to_apply"] = "softmax"
return {}, {}, postprocess_params

def preprocess(self, inputs):
Expand Down Expand Up @@ -203,8 +217,13 @@ def _forward(self, model_inputs):
model_outputs = self.model(**model_inputs)
return model_outputs

def postprocess(self, model_outputs, top_k=5):
probs = model_outputs.logits[0].softmax(-1)
def postprocess(self, model_outputs, top_k=5, function_to_apply="softmax"):
if function_to_apply == "softmax":
probs = model_outputs.logits[0].softmax(-1)
elif function_to_apply == "sigmoid":
probs = model_outputs.logits[0].sigmoid()
else:
probs = model_outputs.logits[0]
scores, ids = probs.topk(top_k)

scores = scores.tolist()
Expand Down
7 changes: 5 additions & 2 deletions src/transformers/pipelines/automatic_speech_recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
from collections import defaultdict
from typing import TYPE_CHECKING, Dict, Optional, Union

Expand Down Expand Up @@ -269,8 +270,6 @@ def __call__(
The dictionary of ad-hoc parametrization of `generate_config` to be used for the generation call. For a
complete overview of generate, check the [following
guide](https://huggingface.co/docs/transformers/en/main_classes/text_generation).
max_new_tokens (`int`, *optional*):
The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt.

Return:
`Dict`: A dictionary with the following keys:
Expand Down Expand Up @@ -310,6 +309,10 @@ def _sanitize_parameters(

forward_params = defaultdict(dict)
if max_new_tokens is not None:
warnings.warn(
"`max_new_tokens` is deprecated and will be removed in version 5 of Transformers. To remove this warning, pass `max_new_tokens` a keyword argument inside `generate_kwargs` instead.",
FutureWarning,
)
forward_params["max_new_tokens"] = max_new_tokens
if generate_kwargs is not None:
if max_new_tokens is not None and "max_new_tokens" in generate_kwargs:
Expand Down
32 changes: 24 additions & 8 deletions src/transformers/pipelines/depth_estimation.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from typing import List, Union

import numpy as np
Expand Down Expand Up @@ -50,12 +51,12 @@ def __init__(self, *args, **kwargs):
requires_backends(self, "vision")
self.check_model_type(MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES)

def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Image"]], **kwargs):
def __call__(self, inputs: Union[str, List[str], "Image.Image", List["Image.Image"]] = None, **kwargs):
"""
Predict the depth(s) of the image(s) passed as inputs.

Args:
images (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`):
inputs (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`):
The pipeline handles three types of images:

- A string containing a http link pointing to an image
Expand All @@ -65,9 +66,10 @@ def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Imag
The pipeline accepts either a single image or a batch of images, which must then be passed as a string.
Images in a batch must all be in the same format: all as http links, all as local paths, or all as PIL
images.
timeout (`float`, *optional*, defaults to None):
The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
the call may block forever.
parameters (`Dict`, *optional*):
A dictionary of argument names to parameter values, to control pipeline behaviour.
The only parameter available right now is `timeout`, which is the length of time, in seconds,
that the pipeline should wait before giving up on trying to download an image.

Return:
A dictionary or a list of dictionaries containing result. If the input is a single image, will return a
Expand All @@ -79,12 +81,26 @@ def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Imag
- **predicted_depth** (`torch.Tensor`) -- The predicted depth by the model as a `torch.Tensor`.
- **depth** (`PIL.Image`) -- The predicted depth by the model as a `PIL.Image`.
"""
return super().__call__(images, **kwargs)

def _sanitize_parameters(self, timeout=None, **kwargs):
# After deprecation of this is completed, remove the default `None` value for `images`
if "images" in kwargs:
warnings.warn(
"The `images` argument has been renamed to `inputs`. In version 5 of Transformers, `images` will no longer be accepted",
FutureWarning,
)
inputs = kwargs.pop("images")
if inputs is None:
raise ValueError("Cannot call the depth-estimation pipeline without an inputs argument!")
return super().__call__(inputs, **kwargs)

def _sanitize_parameters(self, timeout=None, parameters=None, **kwargs):
preprocess_params = {}
if timeout is not None:
warnings.warn(
"The `timeout` argument is deprecated and will be removed in version 5 of Transformers", FutureWarning
)
preprocess_params["timeout"] = timeout
if isinstance(parameters, dict) and "timeout" in parameters:
preprocess_params["timeout"] = parameters["timeout"]
return preprocess_params, {}, {}

def preprocess(self, image, timeout=None):
Expand Down
16 changes: 11 additions & 5 deletions src/transformers/pipelines/document_question_answering.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import re
import warnings
from typing import List, Optional, Tuple, Union

import numpy as np
Expand Down Expand Up @@ -44,6 +45,7 @@
TESSERACT_LOADED = True
import pytesseract


logger = logging.get_logger(__name__)


Expand Down Expand Up @@ -245,11 +247,6 @@ def __call__(
Whether or not we accept impossible as an answer.
lang (`str`, *optional*):
Language to use while running OCR. Defaults to english.
tesseract_config (`str`, *optional*):
Additional flags to pass to tesseract while running OCR.
timeout (`float`, *optional*, defaults to None):
The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
the call may block forever.

Return:
A `dict` or a list of `dict`: Each result comes as a dictionary with the following keys:
Expand Down Expand Up @@ -291,6 +288,15 @@ def preprocess(

image = None
image_features = {}
if timeout is not None:
warnings.warn(
"The `timeout` argument is deprecated and will be removed in version 5 of Transformers", FutureWarning
)
if tesseract_config:
warnings.warn(
"The `tesseract_config` argument is deprecated and will be removed in version 5 of Transformers",
FutureWarning,
)
if input.get("image", None) is not None:
image = load_image(input["image"], timeout=timeout)
if self.image_processor is not None:
Expand Down
22 changes: 18 additions & 4 deletions src/transformers/pipelines/feature_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ class FeatureExtractionPipeline(Pipeline):
[huggingface.co/models](https://huggingface.co/models).
"""

def _sanitize_parameters(self, truncation=None, tokenize_kwargs=None, return_tensors=None, **kwargs):
def _sanitize_parameters(
self, truncation=None, truncation_direction=None, tokenize_kwargs=None, return_tensors=None, **kwargs
):
if tokenize_kwargs is None:
tokenize_kwargs = {}

Expand All @@ -47,6 +49,13 @@ def _sanitize_parameters(self, truncation=None, tokenize_kwargs=None, return_ten
"truncation parameter defined twice (given as keyword argument as well as in tokenize_kwargs)"
)
tokenize_kwargs["truncation"] = truncation
if truncation_direction is not None:
if "truncation_side" in tokenize_kwargs:
raise ValueError(
"truncation_side parameter defined twice (given as keyword argument as well as in tokenize_kwargs)"
)
# The JS spec uses title-case, transformers uses lower, so we normalize
tokenize_kwargs["truncation_side"] = truncation_direction.lower()

preprocess_params = tokenize_kwargs

Expand All @@ -73,14 +82,19 @@ def postprocess(self, model_outputs, return_tensors=False):
elif self.framework == "tf":
return model_outputs[0].numpy().tolist()

def __call__(self, *args, **kwargs):
def __call__(self, *inputs, **kwargs):
"""
Extract the features of the input(s).

Args:
args (`str` or `List[str]`): One or several texts (or one list of texts) to get the features of.
inputs (`str` or `List[str]`): One or several texts (or one list of texts) to get the features of.
truncate(`bool`, *optional*, defaults to `None`):
Whether to truncate the input to max length or not. Overrides the value passed when initializing the
pipeline.
truncation_direction (`str`, *optional*, defaults to `None`): The side to truncate from the input sequence
if truncation is enabled. Can be 'left' or 'right'.

Return:
A nested list of `float`: The features computed by the model.
"""
return super().__call__(*args, **kwargs)
return super().__call__(*inputs, **kwargs)
4 changes: 2 additions & 2 deletions src/transformers/pipelines/fill_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,12 +245,12 @@ def _sanitize_parameters(self, top_k=None, targets=None, tokenizer_kwargs=None):
)
return preprocess_params, {}, postprocess_params

def __call__(self, inputs, *args, **kwargs):
def __call__(self, *inputs, **kwargs):
"""
Fill the masked token in the text(s) given as inputs.

Args:
args (`str` or `List[str]`):
inputs (`str` or `List[str]`):
One or several texts (or one list of prompts) with masked tokens.
targets (`str` or `List[str]`, *optional*):
When passed, the model will limit the scores to the passed targets instead of looking up in the whole
Expand Down
22 changes: 16 additions & 6 deletions src/transformers/pipelines/image_classification.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from typing import List, Union

import numpy as np
Expand Down Expand Up @@ -99,6 +100,9 @@ def __init__(self, *args, **kwargs):
def _sanitize_parameters(self, top_k=None, function_to_apply=None, timeout=None):
preprocess_params = {}
if timeout is not None:
warnings.warn(
"The `timeout` argument is deprecated and will be removed in version 5 of Transformers", FutureWarning
)
preprocess_params["timeout"] = timeout
postprocess_params = {}
if top_k is not None:
Expand All @@ -109,12 +113,12 @@ def _sanitize_parameters(self, top_k=None, function_to_apply=None, timeout=None)
postprocess_params["function_to_apply"] = function_to_apply
return preprocess_params, {}, postprocess_params

def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Image"]], **kwargs):
def __call__(self, inputs: Union[str, List[str], "Image.Image", List["Image.Image"]] = None, **kwargs):
"""
Assign labels to the image(s) passed as inputs.

Args:
images (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`):
inputs (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`):
The pipeline handles three types of images:

- A string containing a http link pointing to an image
Expand Down Expand Up @@ -142,9 +146,6 @@ def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Imag
top_k (`int`, *optional*, defaults to 5):
The number of top labels that will be returned by the pipeline. If the provided number is higher than
the number of labels available in the model configuration, it will default to the number of labels.
timeout (`float`, *optional*, defaults to None):
The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
the call may block forever.

Return:
A dictionary or a list of dictionaries containing result. If the input is a single image, will return a
Expand All @@ -156,7 +157,16 @@ def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Imag
- **label** (`str`) -- The label identified by the model.
- **score** (`int`) -- The score attributed by the model for that label.
"""
return super().__call__(images, **kwargs)
# After deprecation of this is completed, remove the default `None` value for `images`
if "images" in kwargs:
warnings.warn(
"The `images` argument has been renamed to `inputs`. In version 5 of Transformers, `images` will no longer be accepted",
FutureWarning,
)
inputs = kwargs.pop("images")
if inputs is None:
raise ValueError("Cannot call the image-classification pipeline without an inputs argument!")
return super().__call__(inputs, **kwargs)

def preprocess(self, image, timeout=None):
image = load_image(image, timeout=timeout)
Expand Down
22 changes: 16 additions & 6 deletions src/transformers/pipelines/image_segmentation.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from typing import Any, Dict, List, Union

import numpy as np
Expand Down Expand Up @@ -90,16 +91,19 @@ def _sanitize_parameters(self, **kwargs):
if "overlap_mask_area_threshold" in kwargs:
postprocess_kwargs["overlap_mask_area_threshold"] = kwargs["overlap_mask_area_threshold"]
if "timeout" in kwargs:
warnings.warn(
"The `timeout` argument is deprecated and will be removed in version 5 of Transformers", FutureWarning
)
preprocess_kwargs["timeout"] = kwargs["timeout"]

return preprocess_kwargs, {}, postprocess_kwargs

def __call__(self, images, **kwargs) -> Union[Predictions, List[Prediction]]:
def __call__(self, inputs=None, **kwargs) -> Union[Predictions, List[Prediction]]:
"""
Perform segmentation (detect masks & classes) in the image(s) passed as inputs.

Args:
images (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`):
inputs (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`):
The pipeline handles three types of images:

- A string containing an HTTP(S) link pointing to an image
Expand All @@ -118,9 +122,6 @@ def __call__(self, images, **kwargs) -> Union[Predictions, List[Prediction]]:
Threshold to use when turning the predicted masks into binary values.
overlap_mask_area_threshold (`float`, *optional*, defaults to 0.5):
Mask overlap threshold to eliminate small, disconnected segments.
timeout (`float`, *optional*, defaults to None):
The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
the call may block forever.

Return:
A dictionary or a list of dictionaries containing the result. If the input is a single image, will return a
Expand All @@ -136,7 +137,16 @@ def __call__(self, images, **kwargs) -> Union[Predictions, List[Prediction]]:
- **score** (*optional* `float`) -- Optionally, when the model is capable of estimating a confidence of the
"object" described by the label and the mask.
"""
return super().__call__(images, **kwargs)
# After deprecation of this is completed, remove the default `None` value for `images`
if "images" in kwargs:
warnings.warn(
"The `images` argument has been renamed to `inputs`. In version 5 of Transformers, `images` will no longer be accepted",
FutureWarning,
)
inputs = kwargs.pop("images")
if inputs is None:
raise ValueError("Cannot call the image-classification pipeline without an inputs argument!")
return super().__call__(inputs, **kwargs)

def preprocess(self, image, subtask=None, timeout=None):
image = load_image(image, timeout=timeout)
Expand Down
Loading
Loading