diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6b5587825..53c99dc38 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -61,7 +61,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install '.[train, onnx, openvino, dev]' + python -m pip install '.[train, onnx, openvino, ipex, dev]' - name: Install model2vec run: python -m pip install model2vec diff --git a/docs/installation.md b/docs/installation.md index 77efb1568..116cd2c92 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -4,6 +4,7 @@ We recommend **Python 3.9+**, **[PyTorch 1.11.0+](https://pytorch.org/get-starte * **Default:** This allows for loading, saving, and inference (i.e., getting embeddings) of models. * **ONNX:** This allows for loading, saving, inference, optimizing, and quantizing of models using the ONNX backend. * **OpenVINO:** This allows for loading, saving, and inference of models using the OpenVINO backend. +* **IPEX:** This allows for loading, saving, and inference of models using the IPEX backend. * **Default and Training**: Like **Default**, plus training. * **Development**: All of the above plus some dependencies for developing Sentence Transformers, see [Editable Install](#editable-install). @@ -37,6 +38,12 @@ Note that you can mix and match the various extras, e.g. ``pip install -U "sente pip install -U "sentence-transformers[openvino]" +.. tab:: IPEX + + :: + + pip install -U "sentence-transformers[ipex]" + .. tab:: Default and Training :: @@ -87,6 +94,12 @@ Note that you can mix and match the various extras, e.g. ``pip install -U "sente pip install -U "sentence-transformers[openvino]" +.. tab:: IPEX + + :: + + pip install -U "sentence-transformers[ipex]" + .. tab:: Default and Training :: @@ -139,6 +152,12 @@ You can install ``sentence-transformers`` directly from source to take advantage pip install -U "sentence-transformers[openvino] @ git+https://github.com/UKPLab/sentence-transformers.git" +.. tab:: IPEX + + :: + + pip install -U "sentence-transformers[ipex] @ git+https://github.com/UKPLab/sentence-transformers.git" + .. tab:: Default and Training :: diff --git a/pyproject.toml b/pyproject.toml index f6e2cf867..54d140283 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ train = ["datasets", "accelerate>=0.20.3"] onnx = ["optimum[onnxruntime]>=1.23.1"] onnx-gpu = ["optimum[onnxruntime-gpu]>=1.23.1"] openvino = ["optimum-intel[openvino]>=1.20.0"] +ipex = ["optimum-intel[ipex]>=1.21.0"] dev = ["datasets", "accelerate>=0.20.3", "pre-commit", "pytest", "pytest-cov", "peft"] [build-system] diff --git a/sentence_transformers/SentenceTransformer.py b/sentence_transformers/SentenceTransformer.py index ba02f8bfa..7b215665f 100644 --- a/sentence_transformers/SentenceTransformer.py +++ b/sentence_transformers/SentenceTransformer.py @@ -128,7 +128,7 @@ class SentenceTransformer(nn.Sequential, FitMixin, PeftAdapterMixin): model_card_data (:class:`~sentence_transformers.model_card.SentenceTransformerModelCardData`, optional): A model card data object that contains information about the model. This is used to generate a model card when saving the model. If not set, a default model card data object is created. - backend (str): The backend to use for inference. Can be one of "torch" (default), "onnx", or "openvino". + backend (str): The backend to use for inference. Can be one of "torch" (default), "onnx", "openvino", or "ipex". See https://sbert.net/docs/sentence_transformer/usage/efficiency.html for benchmarking information on the different backends. @@ -177,7 +177,7 @@ def __init__( tokenizer_kwargs: dict[str, Any] | None = None, config_kwargs: dict[str, Any] | None = None, model_card_data: SentenceTransformerModelCardData | None = None, - backend: Literal["torch", "onnx", "openvino"] = "torch", + backend: Literal["torch", "onnx", "openvino", "ipex"] = "torch", ) -> None: # Note: self._load_sbert_model can also update `self.prompts` and `self.default_prompt_name` self.prompts = prompts or {} @@ -382,8 +382,8 @@ def __init__( # Pass the model to the model card data for later use in generating a model card upon saving this model self.model_card_data.register_model(self) - def get_backend(self) -> Literal["torch", "onnx", "openvino"]: - """Return the backend used for inference, which can be one of "torch", "onnx", or "openvino". + def get_backend(self) -> Literal["torch", "onnx", "openvino", "ipex"]: + """Return the backend used for inference, which can be one of "torch", "onnx", "openvino" or "ipex". Returns: str: The backend used for inference. diff --git a/sentence_transformers/models/Transformer.py b/sentence_transformers/models/Transformer.py index 61c14e36c..99ccaf9d3 100644 --- a/sentence_transformers/models/Transformer.py +++ b/sentence_transformers/models/Transformer.py @@ -49,7 +49,7 @@ class Transformer(nn.Module): tokenizer_name_or_path: Name or path of the tokenizer. When None, then model_name_or_path is used backend: Backend used for model inference. Can be `torch`, `onnx`, - or `openvino`. Default is `torch`. + `openvino`, or `ipex`. Default is `torch`. """ save_in_root: bool = True @@ -187,8 +187,12 @@ def _load_model( self._load_onnx_model(model_name_or_path, config, cache_dir, **model_args) elif backend == "openvino": self._load_openvino_model(model_name_or_path, config, cache_dir, **model_args) + elif backend == "ipex": + self._load_ipex_model(model_name_or_path, config, cache_dir, **model_args) else: - raise ValueError(f"Unsupported backend '{backend}'. `backend` should be `torch`, `onnx`, or `openvino`.") + raise ValueError( + f"Unsupported backend '{backend}'. `backend` should be `torch`, `onnx`, `openvino`, or `ipex`." + ) def _load_peft_model(self, model_name_or_path: str, config: PeftConfig, cache_dir: str, **model_args) -> None: from peft import PeftModel @@ -254,6 +258,24 @@ def _load_openvino_model( if export: self._backend_warn_to_save(model_name_or_path, is_local, backend_name) + def _load_ipex_model(self, model_name_or_path, config, cache_dir, **model_args) -> None: + try: + from optimum.intel import IPEXModel + except ModuleNotFoundError: + raise Exception( + "Using the IPEX backend requires installing Optimum and IPEX. " + "You can install them with pip: `pip install optimum-intel[ipex]`." + ) + + self.auto_model: IPEXModel = IPEXModel.from_pretrained( + model_name_or_path, + config=config, + cache_dir=cache_dir, + **model_args, + ) + # Wrap the save_pretrained method to save the model in the correct subfolder + self.auto_model._save_pretrained = _save_pretrained_wrapper(self.auto_model._save_pretrained, self.backend) + def _load_onnx_model( self, model_name_or_path: str, config: PretrainedConfig, cache_dir: str, **model_args ) -> None: diff --git a/tests/test_backends.py b/tests/test_backends.py index 887de96e1..0e3bd13c4 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -15,16 +15,18 @@ except ImportError: pytest.skip("OpenVINO and ONNX backends are not available", allow_module_level=True) +try: + from optimum.intel import IPEXModel +except ImportError: + pytest.skip("IPEX backend is not available", allow_module_level=True) + from sentence_transformers import SentenceTransformer ## Testing exporting: @pytest.mark.parametrize( ["backend", "expected_auto_model_class"], - [ - ("onnx", ORTModelForFeatureExtraction), - ("openvino", OVModelForFeatureExtraction), - ], + [("onnx", ORTModelForFeatureExtraction), ("openvino", OVModelForFeatureExtraction), ("ipex", IPEXModel)], ) @pytest.mark.parametrize( "model_kwargs", [{}, {"file_name": "wrong_file_name"}]