From 1ee86b5041713193d927f089632ef2efa082285f Mon Sep 17 00:00:00 2001
From: Sebastian Husch Lee <sjrl@users.noreply.github.com>
Date: Tue, 4 Feb 2025 05:51:06 -0800
Subject: [PATCH 1/5] fix: Fix filters to handle date times with timezones
 (loading and comparison) (#8800)

* Fix on date time parsing with timezones. And comparing naive and aware date times.

* Add release note

* Add more filter tests
---
 .../components/routers/metadata_router.py     |  10 +-
 haystack/utils/filters.py                     |  43 ++++-
 .../fix-date-comparison-ced1d6ef64534951.yaml |   6 +
 .../routers/test_metadata_router.py           |  27 +++
 test/utils/test_filters.py                    | 168 ++----------------
 5 files changed, 87 insertions(+), 167 deletions(-)
 create mode 100644 releasenotes/notes/fix-date-comparison-ced1d6ef64534951.yaml

diff --git a/haystack/components/routers/metadata_router.py b/haystack/components/routers/metadata_router.py
index 7c6c007d16..f0604d912d 100644
--- a/haystack/components/routers/metadata_router.py
+++ b/haystack/components/routers/metadata_router.py
@@ -76,6 +76,11 @@ def __init__(self, rules: Dict[str, Dict]):
             ```
         """
         self.rules = rules
+        for rule in self.rules.values():
+            if "operator" not in rule:
+                raise ValueError(
+                    "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
+                )
         component.set_output_types(self, unmatched=List[Document], **{edge: List[Document] for edge in rules})
 
     def run(self, documents: List[Document]):
@@ -95,11 +100,6 @@ def run(self, documents: List[Document]):
         for document in documents:
             cur_document_matched = False
             for edge, rule in self.rules.items():
-                if "operator" not in rule:
-                    raise ValueError(
-                        "Invalid filter syntax. "
-                        "See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
-                    )
                 if document_matches_filter(rule, document):
                     output[edge].append(document)
                     cur_document_matched = True
diff --git a/haystack/utils/filters.py b/haystack/utils/filters.py
index bddb422efe..d9178b62d7 100644
--- a/haystack/utils/filters.py
+++ b/haystack/utils/filters.py
@@ -6,6 +6,7 @@
 from datetime import datetime
 from typing import Any, Dict, List, Optional
 
+import dateutil.parser
 import pandas as pd
 
 from haystack.dataclasses import Document
@@ -69,18 +70,48 @@ def _greater_than(document_value: Any, filter_value: Any) -> bool:
 
     if isinstance(document_value, str) or isinstance(filter_value, str):
         try:
-            document_value = datetime.fromisoformat(document_value)
-            filter_value = datetime.fromisoformat(filter_value)
+            document_value = _parse_date(document_value)
+            filter_value = _parse_date(filter_value)
+            document_value, filter_value = _ensure_both_dates_naive_or_aware(document_value, filter_value)
+        except FilterError as exc:
+            raise exc
+    if type(filter_value) in [list, pd.DataFrame]:
+        msg = f"Filter value can't be of type {type(filter_value)} using operators '>', '>=', '<', '<='"
+        raise FilterError(msg)
+    return document_value > filter_value
+
+
+def _parse_date(value):
+    """Try parsing the value as an ISO format date, then fall back to dateutil.parser."""
+    try:
+        return datetime.fromisoformat(value)
+    except (ValueError, TypeError):
+        try:
+            return dateutil.parser.parse(value)
         except (ValueError, TypeError) as exc:
             msg = (
                 "Can't compare strings using operators '>', '>=', '<', '<='. "
                 "Strings are only comparable if they are ISO formatted dates."
             )
             raise FilterError(msg) from exc
-    if type(filter_value) in [list, pd.DataFrame]:
-        msg = f"Filter value can't be of type {type(filter_value)} using operators '>', '>=', '<', '<='"
-        raise FilterError(msg)
-    return document_value > filter_value
+
+
+def _ensure_both_dates_naive_or_aware(date1: datetime, date2: datetime):
+    """Ensure that both dates are either naive or aware."""
+    # Both naive
+    if date1.tzinfo is None and date2.tzinfo is None:
+        return date1, date2
+
+    # Both aware
+    if date1.tzinfo is not None and date2.tzinfo is not None:
+        return date1, date2
+
+    # One naive, one aware
+    if date1.tzinfo is None:
+        date1 = date1.replace(tzinfo=date2.tzinfo)
+    else:
+        date2 = date2.replace(tzinfo=date1.tzinfo)
+    return date1, date2
 
 
 def _greater_than_equal(document_value: Any, filter_value: Any) -> bool:
diff --git a/releasenotes/notes/fix-date-comparison-ced1d6ef64534951.yaml b/releasenotes/notes/fix-date-comparison-ced1d6ef64534951.yaml
new file mode 100644
index 0000000000..b6e071ecf6
--- /dev/null
+++ b/releasenotes/notes/fix-date-comparison-ced1d6ef64534951.yaml
@@ -0,0 +1,6 @@
+---
+enhancements:
+  - |
+    Enhancements to Date Filtering in MetadataRouter
+    - Improved date parsing in filter utilities by introducing `_parse_date`, which first attempts `datetime.fromisoformat(value)` for backward compatibility and then falls back to dateutil.parser.parse() for broader ISO 8601 support.
+    - Resolved a common issue where comparing naive and timezone-aware datetimes resulted in TypeError. Added `_ensure_both_dates_naive_or_aware`, which ensures both datetimes are either naive or aware. If one is missing a timezone, it is assigned the timezone of the other for consistency.
diff --git a/test/components/routers/test_metadata_router.py b/test/components/routers/test_metadata_router.py
index d9e8ce9875..2b7e964705 100644
--- a/test/components/routers/test_metadata_router.py
+++ b/test/components/routers/test_metadata_router.py
@@ -35,3 +35,30 @@ def test_run(self):
         assert output["edge_1"][0].meta["created_at"] == "2023-02-01"
         assert output["edge_2"][0].meta["created_at"] == "2023-05-01"
         assert output["unmatched"][0].meta["created_at"] == "2023-08-01"
+
+    def test_run_wrong_filter(self):
+        rules = {
+            "edge_1": {"field": "meta.created_at", "operator": ">=", "value": "2023-01-01"},
+            "wrong_filter": {"wrong_value": "meta.created_at == 2023-04-01"},
+        }
+        with pytest.raises(ValueError):
+            MetadataRouter(rules=rules)
+
+    def test_run_datetime_with_timezone(self):
+        rules = {
+            "edge_1": {
+                "operator": "AND",
+                "conditions": [{"field": "meta.created_at", "operator": ">=", "value": "2025-02-01"}],
+            }
+        }
+        router = MetadataRouter(rules=rules)
+        documents = [
+            Document(meta={"created_at": "2025-02-03T12:45:46.435816Z"}),
+            Document(meta={"created_at": "2025-02-01T12:45:46.435816Z"}),
+            Document(meta={"created_at": "2025-01-03T12:45:46.435816Z"}),
+        ]
+        output = router.run(documents=documents)
+        assert len(output["edge_1"]) == 2
+        assert output["edge_1"][0].meta["created_at"] == "2025-02-03T12:45:46.435816Z"
+        assert output["edge_1"][1].meta["created_at"] == "2025-02-01T12:45:46.435816Z"
+        assert output["unmatched"][0].meta["created_at"] == "2025-01-03T12:45:46.435816Z"
diff --git a/test/utils/test_filters.py b/test/utils/test_filters.py
index 708e50cb7b..7c3da7cc4e 100644
--- a/test/utils/test_filters.py
+++ b/test/utils/test_filters.py
@@ -485,6 +485,18 @@
         True,
         id="NOT operator with Document matching no condition",
     ),
+    pytest.param(
+        {"field": "meta.date", "operator": "==", "value": "2025-02-03T12:45:46.435816Z"},
+        Document(meta={"date": "2025-02-03T12:45:46.435816Z"}),
+        True,
+        id="== operator with ISO 8601 datetime Document value",
+    ),
+    pytest.param(
+        {"field": "meta.date", "operator": ">=", "value": "2025-02-01"},
+        Document(meta={"date": "2025-02-03T12:45:46.435816Z"}),
+        True,
+        id=">= operator with naive and aware ISO 8601 datetime Document value",
+    ),
 ]
 
 
@@ -552,159 +564,3 @@ def test_document_matches_filter_raises_error(filter):
     with pytest.raises(FilterError):
         document = Document(meta={"page": 10})
         document_matches_filter(filter, document)
-
-
-filters_data = [
-    pytest.param(
-        {
-            "$and": {
-                "type": {"$eq": "article"},
-                "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"},
-                "rating": {"$gte": 3},
-                "$or": {"genre": {"$in": ["economy", "politics"]}, "publisher": {"$eq": "nytimes"}},
-            }
-        },
-        {
-            "operator": "AND",
-            "conditions": [
-                {"field": "type", "operator": "==", "value": "article"},
-                {"field": "date", "operator": ">=", "value": "2015-01-01"},
-                {"field": "date", "operator": "<", "value": "2021-01-01"},
-                {"field": "rating", "operator": ">=", "value": 3},
-                {
-                    "operator": "OR",
-                    "conditions": [
-                        {"field": "genre", "operator": "in", "value": ["economy", "politics"]},
-                        {"field": "publisher", "operator": "==", "value": "nytimes"},
-                    ],
-                },
-            ],
-        },
-        id="All operators explicit",
-    ),
-    pytest.param(
-        {
-            "type": "article",
-            "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"},
-            "rating": {"$gte": 3},
-            "$or": {"genre": ["economy", "politics"], "publisher": "nytimes"},
-        },
-        {
-            "operator": "AND",
-            "conditions": [
-                {"field": "type", "operator": "==", "value": "article"},
-                {"field": "date", "operator": ">=", "value": "2015-01-01"},
-                {"field": "date", "operator": "<", "value": "2021-01-01"},
-                {"field": "rating", "operator": ">=", "value": 3},
-                {
-                    "operator": "OR",
-                    "conditions": [
-                        {"field": "genre", "operator": "in", "value": ["economy", "politics"]},
-                        {"field": "publisher", "operator": "==", "value": "nytimes"},
-                    ],
-                },
-            ],
-        },
-        id="Root $and implicit",
-    ),
-    pytest.param(
-        {
-            "$or": [
-                {"Type": "News Paper", "Date": {"$lt": "2019-01-01"}},
-                {"Type": "Blog Post", "Date": {"$gte": "2019-01-01"}},
-            ]
-        },
-        {
-            "operator": "OR",
-            "conditions": [
-                {
-                    "operator": "AND",
-                    "conditions": [
-                        {"field": "Type", "operator": "==", "value": "News Paper"},
-                        {"field": "Date", "operator": "<", "value": "2019-01-01"},
-                    ],
-                },
-                {
-                    "operator": "AND",
-                    "conditions": [
-                        {"field": "Type", "operator": "==", "value": "Blog Post"},
-                        {"field": "Date", "operator": ">=", "value": "2019-01-01"},
-                    ],
-                },
-            ],
-        },
-        id="Root $or with list and multiple comparisons",
-    ),
-    pytest.param(
-        {"text": "A Foo Document 1"},
-        {"operator": "AND", "conditions": [{"field": "text", "operator": "==", "value": "A Foo Document 1"}]},
-        id="Implicit root $and and field $eq",
-    ),
-    pytest.param(
-        {"$or": {"name": {"$or": [{"$eq": "name_0"}, {"$eq": "name_1"}]}, "number": {"$lt": 1.0}}},
-        {
-            "operator": "OR",
-            "conditions": [
-                {
-                    "operator": "OR",
-                    "conditions": [
-                        {"field": "name", "operator": "==", "value": "name_0"},
-                        {"field": "name", "operator": "==", "value": "name_1"},
-                    ],
-                },
-                {"field": "number", "operator": "<", "value": 1.0},
-            ],
-        },
-        id="Root $or with dict and field $or with list",
-    ),
-    pytest.param(
-        {"number": {"$lte": 2, "$gte": 0}, "name": ["name_0", "name_1"]},
-        {
-            "operator": "AND",
-            "conditions": [
-                {"field": "number", "operator": "<=", "value": 2},
-                {"field": "number", "operator": ">=", "value": 0},
-                {"field": "name", "operator": "in", "value": ["name_0", "name_1"]},
-            ],
-        },
-        id="Implicit $and and field $in",
-    ),
-    pytest.param(
-        {"number": {"$and": [{"$lte": 2}, {"$gte": 0}]}},
-        {
-            "operator": "AND",
-            "conditions": [
-                {"field": "number", "operator": "<=", "value": 2},
-                {"field": "number", "operator": ">=", "value": 0},
-            ],
-        },
-        id="Implicit root $and and field $and with list",
-    ),
-    pytest.param(
-        {
-            "$not": {
-                "number": {"$lt": 1.0},
-                "$and": {"name": {"$in": ["name_0", "name_1"]}, "$not": {"chapter": {"$eq": "intro"}}},
-            }
-        },
-        {
-            "operator": "NOT",
-            "conditions": [
-                {"field": "number", "operator": "<", "value": 1.0},
-                {
-                    "operator": "AND",
-                    "conditions": [
-                        {"field": "name", "operator": "in", "value": ["name_0", "name_1"]},
-                        {"operator": "NOT", "conditions": [{"field": "chapter", "operator": "==", "value": "intro"}]},
-                    ],
-                },
-            ],
-        },
-        id="Root explicit $not",
-    ),
-    pytest.param(
-        {"page": {"$not": 102}},
-        {"operator": "NOT", "conditions": [{"field": "page", "operator": "==", "value": 102}]},
-        id="Explicit $not with implicit $eq",
-    ),
-]

From f1679f1dca11355769bfec0c4acb56286155c3e5 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 4 Feb 2025 15:01:54 +0100
Subject: [PATCH 2/5] build(deps): bump fossas/fossa-action from 1.4.0 to 1.5.0
 (#8771)

Bumps [fossas/fossa-action](https://github.com/fossas/fossa-action) from 1.4.0 to 1.5.0.
- [Release notes](https://github.com/fossas/fossa-action/releases)
- [Commits](https://github.com/fossas/fossa-action/compare/v1.4.0...v1.5.0)

---
updated-dependencies:
- dependency-name: fossas/fossa-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/license_compliance.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/license_compliance.yml b/.github/workflows/license_compliance.yml
index 1bf55ee536..969b2ecbd0 100644
--- a/.github/workflows/license_compliance.yml
+++ b/.github/workflows/license_compliance.yml
@@ -47,7 +47,7 @@ jobs:
 
       # We keep the license inventory on FOSSA
       - name: Send license report to Fossa
-        uses: fossas/fossa-action@v1.4.0
+        uses: fossas/fossa-action@v1.5.0
         continue-on-error: true # not critical
         with:
           api-key: ${{ secrets.FOSSA_LICENSE_SCAN_TOKEN }}

From 5ae94886b2b937a13c648c6525b17422ddbaf731 Mon Sep 17 00:00:00 2001
From: Stefano Fiorucci <stefanofiorucci@gmail.com>
Date: Tue, 4 Feb 2025 19:08:37 +0100
Subject: [PATCH 3/5] fix: fix test failures with Transformers models in PRs
 from forks (#8809)

* trigger

* try pinning sentence transformers

* make integr tests run right away

* pin transformers instead

* older transformers version

* rm transformers pin

* try ignoring cache

* change ubuntu version

* try removing token

* try again

* more HF_API_TOKEN local deletions

* restore test priority

* rm leftover

* more deletions

* moreee

* more

* deletions

* restore jobs order
---
 .../test_zero_shot_document_classifier.py         |  3 ++-
 .../test_sentence_transformers_text_embedder.py   |  3 ++-
 test/components/evaluators/test_sas_evaluator.py  | 15 ++++++++++-----
 .../generators/chat/test_hugging_face_local.py    |  3 ++-
 .../test_hugging_face_local_generator.py          |  6 ++++--
 .../test_sentence_transformers_diversity.py       |  9 ++++++---
 test/components/readers/test_extractive.py        |  9 ++++++---
 .../routers/test_transformers_text_router.py      |  6 ++++--
 .../routers/test_zero_shot_text_router.py         |  3 ++-
 9 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/test/components/classifiers/test_zero_shot_document_classifier.py b/test/components/classifiers/test_zero_shot_document_classifier.py
index be4d04a9fe..35948e8dda 100644
--- a/test/components/classifiers/test_zero_shot_document_classifier.py
+++ b/test/components/classifiers/test_zero_shot_document_classifier.py
@@ -137,7 +137,8 @@ def test_run_unit(self, hf_pipeline_mock):
         assert result["documents"][1].to_dict()["classification"]["label"] == "negative"
 
     @pytest.mark.integration
-    def test_run(self):
+    def test_run(self, monkeypatch):
+        monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
         component = TransformersZeroShotDocumentClassifier(
             model="cross-encoder/nli-deberta-v3-xsmall", labels=["positive", "negative"]
         )
diff --git a/test/components/embedders/test_sentence_transformers_text_embedder.py b/test/components/embedders/test_sentence_transformers_text_embedder.py
index 195ee8efd1..293b07aada 100644
--- a/test/components/embedders/test_sentence_transformers_text_embedder.py
+++ b/test/components/embedders/test_sentence_transformers_text_embedder.py
@@ -261,10 +261,11 @@ def test_run_wrong_input_format(self):
             embedder.run(text=list_integers_input)
 
     @pytest.mark.integration
-    def test_run_trunc(self):
+    def test_run_trunc(self, monkeypatch):
         """
         sentence-transformers/paraphrase-albert-small-v2 maps sentences & paragraphs to a 768 dimensional dense vector space
         """
+        monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
         checkpoint = "sentence-transformers/paraphrase-albert-small-v2"
         text = "a nice text to embed"
 
diff --git a/test/components/evaluators/test_sas_evaluator.py b/test/components/evaluators/test_sas_evaluator.py
index d1dad15151..cab8581ad7 100644
--- a/test/components/evaluators/test_sas_evaluator.py
+++ b/test/components/evaluators/test_sas_evaluator.py
@@ -104,7 +104,8 @@ def test_run_not_warmed_up(self):
             evaluator.run(ground_truth_answers=ground_truths, predicted_answers=predictions)
 
     @pytest.mark.integration
-    def test_run_with_matching_predictions(self):
+    def test_run_with_matching_predictions(self, monkeypatch):
+        monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
         evaluator = SASEvaluator()
         ground_truths = [
             "A construction budget of US $2.3 billion",
@@ -124,7 +125,8 @@ def test_run_with_matching_predictions(self):
         assert result["individual_scores"] == pytest.approx([1.0, 1.0, 1.0])
 
     @pytest.mark.integration
-    def test_run_with_single_prediction(self):
+    def test_run_with_single_prediction(self, monkeypatch):
+        monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
         evaluator = SASEvaluator()
 
         ground_truths = ["US $2.3 billion"]
@@ -137,7 +139,8 @@ def test_run_with_single_prediction(self):
         assert result["individual_scores"] == pytest.approx([0.689089], abs=1e-5)
 
     @pytest.mark.integration
-    def test_run_with_mismatched_predictions(self):
+    def test_run_with_mismatched_predictions(self, monkeypatch):
+        monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
         evaluator = SASEvaluator()
         ground_truths = [
             "US $2.3 billion",
@@ -156,7 +159,8 @@ def test_run_with_mismatched_predictions(self):
         assert result["individual_scores"] == pytest.approx([0.689089, 0.870389, 0.908679], abs=1e-5)
 
     @pytest.mark.integration
-    def test_run_with_bi_encoder_model(self):
+    def test_run_with_bi_encoder_model(self, monkeypatch):
+        monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
         evaluator = SASEvaluator(model="sentence-transformers/all-mpnet-base-v2")
         ground_truths = [
             "A construction budget of US $2.3 billion",
@@ -175,7 +179,8 @@ def test_run_with_bi_encoder_model(self):
         assert result["individual_scores"] == pytest.approx([1.0, 1.0, 1.0])
 
     @pytest.mark.integration
-    def test_run_with_cross_encoder_model(self):
+    def test_run_with_cross_encoder_model(self, monkeypatch):
+        monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
         evaluator = SASEvaluator(model="cross-encoder/ms-marco-MiniLM-L-6-v2")
         ground_truths = [
             "A construction budget of US $2.3 billion",
diff --git a/test/components/generators/chat/test_hugging_face_local.py b/test/components/generators/chat/test_hugging_face_local.py
index 7c2e05df3a..1f6b478370 100644
--- a/test/components/generators/chat/test_hugging_face_local.py
+++ b/test/components/generators/chat/test_hugging_face_local.py
@@ -293,7 +293,8 @@ def test_messages_conversion_is_called(self, mock_convert, model_info_mock):
 
     @pytest.mark.integration
     @pytest.mark.flaky(reruns=3, reruns_delay=10)
-    def test_live_run(self):
+    def test_live_run(self, monkeypatch):
+        monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
         messages = [ChatMessage.from_user("Please create a summary about the following topic: Climate change")]
 
         llm = HuggingFaceLocalChatGenerator(
diff --git a/test/components/generators/test_hugging_face_local_generator.py b/test/components/generators/test_hugging_face_local_generator.py
index 8d64bc44fc..d6432a360b 100644
--- a/test/components/generators/test_hugging_face_local_generator.py
+++ b/test/components/generators/test_hugging_face_local_generator.py
@@ -454,8 +454,9 @@ def test_stop_words_criteria_using_hf_tokenizer(self):
         assert criteria(generated_text_ids, scores=None) is True
 
     @pytest.mark.integration
-    def test_hf_pipeline_runs_with_our_criteria(self):
+    def test_hf_pipeline_runs_with_our_criteria(self, monkeypatch):
         """Test that creating our own StopWordsCriteria and passing it to a Huggingface pipeline works."""
+        monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
         generator = HuggingFaceLocalGenerator(
             model="google/flan-t5-small", task="text2text-generation", stop_words=["unambiguously"]
         )
@@ -466,7 +467,8 @@ def test_hf_pipeline_runs_with_our_criteria(self):
 
     @pytest.mark.integration
     @pytest.mark.flaky(reruns=3, reruns_delay=10)
-    def test_live_run(self):
+    def test_live_run(self, monkeypatch):
+        monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
         llm = HuggingFaceLocalGenerator(model="Qwen/Qwen2.5-0.5B-Instruct", generation_kwargs={"max_new_tokens": 50})
         llm.warm_up()
 
diff --git a/test/components/rankers/test_sentence_transformers_diversity.py b/test/components/rankers/test_sentence_transformers_diversity.py
index 018b443987..a4dcd7e89f 100644
--- a/test/components/rankers/test_sentence_transformers_diversity.py
+++ b/test/components/rankers/test_sentence_transformers_diversity.py
@@ -574,10 +574,11 @@ def test_pipeline_serialise_deserialise(self):
 
     @pytest.mark.integration
     @pytest.mark.parametrize("similarity", ["dot_product", "cosine"])
-    def test_run(self, similarity):
+    def test_run(self, similarity, monkeypatch):
         """
         Tests that run method returns documents in the correct order
         """
+        monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
         ranker = SentenceTransformersDiversityRanker(
             model="sentence-transformers/all-MiniLM-L6-v2", similarity=similarity
         )
@@ -601,7 +602,8 @@ def test_run(self, similarity):
 
     @pytest.mark.integration
     @pytest.mark.parametrize("similarity", ["dot_product", "cosine"])
-    def test_run_real_world_use_case(self, similarity):
+    def test_run_real_world_use_case(self, similarity, monkeypatch):
+        monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
         ranker = SentenceTransformersDiversityRanker(
             model="sentence-transformers/all-MiniLM-L6-v2", similarity=similarity
         )
@@ -673,7 +675,8 @@ def test_run_real_world_use_case(self, similarity):
 
     @pytest.mark.integration
     @pytest.mark.parametrize("similarity", ["dot_product", "cosine"])
-    def test_run_with_maximum_margin_relevance_strategy(self, similarity):
+    def test_run_with_maximum_margin_relevance_strategy(self, similarity, monkeypatch):
+        monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
         query = "renewable energy sources"
         docs = [
             Document(content="18th-century French literature"),
diff --git a/test/components/readers/test_extractive.py b/test/components/readers/test_extractive.py
index a2f658b79b..2c33e51f9c 100644
--- a/test/components/readers/test_extractive.py
+++ b/test/components/readers/test_extractive.py
@@ -776,7 +776,8 @@ def test_deduplicate_by_overlap(
 
 
 @pytest.mark.integration
-def test_t5():
+def test_t5(monkeypatch):
+    monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
     reader = ExtractiveReader("sjrhuschlee/flan-t5-base-squad2")
     reader.warm_up()
     answers = reader.run(example_queries[0], example_documents[0], top_k=2)[
@@ -800,7 +801,8 @@ def test_t5():
 
 
 @pytest.mark.integration
-def test_roberta():
+def test_roberta(monkeypatch):
+    monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
     reader = ExtractiveReader("deepset/tinyroberta-squad2")
     reader.warm_up()
     answers = reader.run(example_queries[0], example_documents[0], top_k=2)[
@@ -829,7 +831,8 @@ def test_roberta():
 
 
 @pytest.mark.integration
-def test_matches_hf_pipeline():
+def test_matches_hf_pipeline(monkeypatch):
+    monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
     reader = ExtractiveReader(
         "deepset/tinyroberta-squad2", device=ComponentDevice.from_str("cpu"), overlap_threshold=None
     )
diff --git a/test/components/routers/test_transformers_text_router.py b/test/components/routers/test_transformers_text_router.py
index 67ec163524..a539e0d3c3 100644
--- a/test/components/routers/test_transformers_text_router.py
+++ b/test/components/routers/test_transformers_text_router.py
@@ -172,7 +172,8 @@ def test_run_unit(self, hf_pipeline_mock, mock_auto_config_from_pretrained):
         assert out == {"en": "What is the color of the sky?"}
 
     @pytest.mark.integration
-    def test_run(self):
+    def test_run(self, monkeypatch):
+        monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
         router = TransformersTextRouter(model="papluca/xlm-roberta-base-language-detection")
         router.warm_up()
         out = router.run("What is the color of the sky?")
@@ -202,7 +203,8 @@ def test_run(self):
         assert out == {"en": "What is the color of the sky?"}
 
     @pytest.mark.integration
-    def test_wrong_labels(self):
+    def test_wrong_labels(self, monkeypatch):
+        monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
         router = TransformersTextRouter(model="papluca/xlm-roberta-base-language-detection", labels=["en", "de"])
         with pytest.raises(ValueError):
             router.warm_up()
diff --git a/test/components/routers/test_zero_shot_text_router.py b/test/components/routers/test_zero_shot_text_router.py
index 3b931c39bb..874ede64d5 100644
--- a/test/components/routers/test_zero_shot_text_router.py
+++ b/test/components/routers/test_zero_shot_text_router.py
@@ -106,7 +106,8 @@ def test_run_unit(self, hf_pipeline_mock):
         assert out == {"query": "What is the color of the sky?"}
 
     @pytest.mark.integration
-    def test_run(self):
+    def test_run(self, monkeypatch):
+        monkeypatch.delenv("HF_API_TOKEN", raising=False)  # https://github.com/deepset-ai/haystack/issues/8811
         router = TransformersZeroShotTextRouter(labels=["query", "passage"])
         router.warm_up()
         out = router.run("What is the color of the sky?")

From 2828d9e4aeeef1f6d95928a8cfc9e47fa29a5866 Mon Sep 17 00:00:00 2001
From: Stefano Fiorucci <stefanofiorucci@gmail.com>
Date: Wed, 5 Feb 2025 14:43:19 +0100
Subject: [PATCH 4/5] refactor!: `DOCXToDocument` converter - store DOCX
 metadata as a dict (#8804)

* DOCXToDocument - store DOCX metadata as a dict

* do not export DOCXMetadata to converters package
---
 haystack/components/converters/__init__.py    |   3 +-
 haystack/components/converters/docx.py        |   4 +-
 ...docxmetadata-as-dict-20cf2ef0abf7af8a.yaml |   6 +
 .../converters/test_docx_file_to_document.py  | 136 +++++++++---------
 4 files changed, 77 insertions(+), 72 deletions(-)
 create mode 100644 releasenotes/notes/docxmetadata-as-dict-20cf2ef0abf7af8a.yaml

diff --git a/haystack/components/converters/__init__.py b/haystack/components/converters/__init__.py
index 2c7ed33505..d0057ea33d 100644
--- a/haystack/components/converters/__init__.py
+++ b/haystack/components/converters/__init__.py
@@ -4,7 +4,7 @@
 
 from haystack.components.converters.azure import AzureOCRDocumentConverter
 from haystack.components.converters.csv import CSVToDocument
-from haystack.components.converters.docx import DOCXMetadata, DOCXToDocument
+from haystack.components.converters.docx import DOCXToDocument
 from haystack.components.converters.html import HTMLToDocument
 from haystack.components.converters.json import JSONConverter
 from haystack.components.converters.markdown import MarkdownToDocument
@@ -28,7 +28,6 @@
     "OpenAPIServiceToFunctions",
     "OutputAdapter",
     "DOCXToDocument",
-    "DOCXMetadata",
     "PPTXToDocument",
     "CSVToDocument",
     "JSONConverter",
diff --git a/haystack/components/converters/docx.py b/haystack/components/converters/docx.py
index 8f9a58004d..3607ae4544 100644
--- a/haystack/components/converters/docx.py
+++ b/haystack/components/converters/docx.py
@@ -5,7 +5,7 @@
 import csv
 import io
 import os
-from dataclasses import dataclass
+from dataclasses import asdict, dataclass
 from enum import Enum
 from io import StringIO
 from pathlib import Path
@@ -189,7 +189,7 @@ def run(
                 )
                 continue
 
-            docx_metadata = self._get_docx_metadata(document=docx_document)
+            docx_metadata = asdict(self._get_docx_metadata(document=docx_document))
             merged_metadata = {**bytestream.meta, **metadata, "docx": docx_metadata}
 
             if not self.store_full_path and "file_path" in bytestream.meta:
diff --git a/releasenotes/notes/docxmetadata-as-dict-20cf2ef0abf7af8a.yaml b/releasenotes/notes/docxmetadata-as-dict-20cf2ef0abf7af8a.yaml
new file mode 100644
index 0000000000..60f4c17ce3
--- /dev/null
+++ b/releasenotes/notes/docxmetadata-as-dict-20cf2ef0abf7af8a.yaml
@@ -0,0 +1,6 @@
+---
+upgrade:
+  - |
+    The `DOCXToDocument` converter now returns a `Document` object with DOCX metadata stored in the `meta` field as a
+    dictionary under the key `docx`. Previously, the metadata was represented as a `DOCXMetadata` dataclass.
+    This change does not impact reading from or writing to a Document Store.
diff --git a/test/components/converters/test_docx_file_to_document.py b/test/components/converters/test_docx_file_to_document.py
index c013759938..2de6bfefbe 100644
--- a/test/components/converters/test_docx_file_to_document.py
+++ b/test/components/converters/test_docx_file_to_document.py
@@ -121,23 +121,23 @@ def test_run(self, test_files_path, docx_converter):
         assert docs[0].meta.keys() == {"file_path", "docx"}
         assert docs[0].meta == {
             "file_path": os.path.basename(paths[0]),
-            "docx": DOCXMetadata(
-                author="Microsoft Office User",
-                category="",
-                comments="",
-                content_status="",
-                created="2024-06-09T21:17:00+00:00",
-                identifier="",
-                keywords="",
-                language="",
-                last_modified_by="Carlos Fernández Lorán",
-                last_printed=None,
-                modified="2024-06-09T21:27:00+00:00",
-                revision=2,
-                subject="",
-                title="",
-                version="",
-            ),
+            "docx": {
+                "author": "Microsoft Office User",
+                "category": "",
+                "comments": "",
+                "content_status": "",
+                "created": "2024-06-09T21:17:00+00:00",
+                "identifier": "",
+                "keywords": "",
+                "language": "",
+                "last_modified_by": "Carlos Fernández Lorán",
+                "last_printed": None,
+                "modified": "2024-06-09T21:27:00+00:00",
+                "revision": 2,
+                "subject": "",
+                "title": "",
+                "version": "",
+            },
         }
 
     def test_run_with_table(self, test_files_path):
@@ -153,23 +153,23 @@ def test_run_with_table(self, test_files_path):
         assert docs[0].meta.keys() == {"file_path", "docx"}
         assert docs[0].meta == {
             "file_path": os.path.basename(paths[0]),
-            "docx": DOCXMetadata(
-                author="Saha, Anirban",
-                category="",
-                comments="",
-                content_status="",
-                created="2020-07-14T08:14:00+00:00",
-                identifier="",
-                keywords="",
-                language="",
-                last_modified_by="Saha, Anirban",
-                last_printed=None,
-                modified="2020-07-14T08:16:00+00:00",
-                revision=1,
-                subject="",
-                title="",
-                version="",
-            ),
+            "docx": {
+                "author": "Saha, Anirban",
+                "category": "",
+                "comments": "",
+                "content_status": "",
+                "created": "2020-07-14T08:14:00+00:00",
+                "identifier": "",
+                "keywords": "",
+                "language": "",
+                "last_modified_by": "Saha, Anirban",
+                "last_printed": None,
+                "modified": "2020-07-14T08:16:00+00:00",
+                "revision": 1,
+                "subject": "",
+                "title": "",
+                "version": "",
+            },
         }
         # let's now detect that the table markdown is correctly added and that order of elements is correct
         content_parts = docs[0].content.split("\n\n")
@@ -193,23 +193,23 @@ def test_run_with_store_full_path_false(self, test_files_path):
         assert docs[0].meta.keys() == {"file_path", "docx"}
         assert docs[0].meta == {
             "file_path": "sample_docx_1.docx",
-            "docx": DOCXMetadata(
-                author="Microsoft Office User",
-                category="",
-                comments="",
-                content_status="",
-                created="2024-06-09T21:17:00+00:00",
-                identifier="",
-                keywords="",
-                language="",
-                last_modified_by="Carlos Fernández Lorán",
-                last_printed=None,
-                modified="2024-06-09T21:27:00+00:00",
-                revision=2,
-                subject="",
-                title="",
-                version="",
-            ),
+            "docx": {
+                "author": "Microsoft Office User",
+                "category": "",
+                "comments": "",
+                "content_status": "",
+                "created": "2024-06-09T21:17:00+00:00",
+                "identifier": "",
+                "keywords": "",
+                "language": "",
+                "last_modified_by": "Carlos Fernández Lorán",
+                "last_printed": None,
+                "modified": "2024-06-09T21:27:00+00:00",
+                "revision": 2,
+                "subject": "",
+                "title": "",
+                "version": "",
+            },
         }
 
     @pytest.mark.parametrize("table_format", ["markdown", "csv"])
@@ -285,23 +285,23 @@ def test_run_with_additional_meta(self, test_files_path, docx_converter):
         doc = output["documents"][0]
         assert doc.meta == {
             "file_path": os.path.basename(paths[0]),
-            "docx": DOCXMetadata(
-                author="Microsoft Office User",
-                category="",
-                comments="",
-                content_status="",
-                created="2024-06-09T21:17:00+00:00",
-                identifier="",
-                keywords="",
-                language="",
-                last_modified_by="Carlos Fernández Lorán",
-                last_printed=None,
-                modified="2024-06-09T21:27:00+00:00",
-                revision=2,
-                subject="",
-                title="",
-                version="",
-            ),
+            "docx": {
+                "author": "Microsoft Office User",
+                "category": "",
+                "comments": "",
+                "content_status": "",
+                "created": "2024-06-09T21:17:00+00:00",
+                "identifier": "",
+                "keywords": "",
+                "language": "",
+                "last_modified_by": "Carlos Fernández Lorán",
+                "last_printed": None,
+                "modified": "2024-06-09T21:27:00+00:00",
+                "revision": 2,
+                "subject": "",
+                "title": "",
+                "version": "",
+            },
             "language": "it",
             "author": "test_author",
         }

From d2348ad46285dc435b4f7904f2e3dc5e606b7f56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gy=C3=B6rgy=20Orosz?= <gyorgy@orosz.link>
Date: Wed, 5 Feb 2025 17:09:35 +0100
Subject: [PATCH 5/5] feat: SentenceTransformersDocumentEmbedder and
 SentenceTransformersTextEmbedder can accept and pass any arguments to
 SentenceTransformer.encode (#8806)

* feat: SentenceTransformersDocumentEmbedder and SentenceTransformersTextEmbedder can accept and pass any arguments to SentenceTransformer.encode

* refactor: encode_kwargs parameter of SentenceTransformersDocumentEmbedder and SentenceTransformersTextEmbedder mae to be the last positional parameter for backward compatibility reasons

* docs: added explanation for encode_kwargs in SentenceTransformersTextEmbedder and SentenceTransformersDocumentEmbedder

* test: added tests for encode_kwargs in SentenceTransformersTextEmbedder and SentenceTransformersDocumentEmbedder

* doc: removed empty lines from docstrings of SentenceTransformersTextEmbedder and SentenceTransformersDocumentEmbedder

* refactor: encode_kwargs parameter of SentenceTransformersDocumentEmbedder and SentenceTransformersTextEmbedder mae to be the last positional parameter for backward compatibility (part II.)
---
 ...sentence_transformers_document_embedder.py |  8 +++++++
 .../sentence_transformers_text_embedder.py    |  8 +++++++
 ...entence-transformers-f4d885f6c5b1706f.yaml |  6 ++++++
 ...sentence_transformers_document_embedder.py | 19 ++++++++++++++++-
 ...est_sentence_transformers_text_embedder.py | 21 +++++++++++++++++--
 5 files changed, 59 insertions(+), 3 deletions(-)
 create mode 100644 releasenotes/notes/add-encode-kwargs-sentence-transformers-f4d885f6c5b1706f.yaml

diff --git a/haystack/components/embedders/sentence_transformers_document_embedder.py b/haystack/components/embedders/sentence_transformers_document_embedder.py
index a5eaa9ae8b..0125be3f5c 100644
--- a/haystack/components/embedders/sentence_transformers_document_embedder.py
+++ b/haystack/components/embedders/sentence_transformers_document_embedder.py
@@ -56,6 +56,7 @@ def __init__(  # noqa: PLR0913 # pylint: disable=too-many-positional-arguments
         tokenizer_kwargs: Optional[Dict[str, Any]] = None,
         config_kwargs: Optional[Dict[str, Any]] = None,
         precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = "float32",
+        encode_kwargs: Optional[Dict[str, Any]] = None,
     ):
         """
         Creates a SentenceTransformersDocumentEmbedder component.
@@ -104,6 +105,10 @@ def __init__(  # noqa: PLR0913 # pylint: disable=too-many-positional-arguments
             All non-float32 precisions are quantized embeddings.
             Quantized embeddings are smaller and faster to compute, but may have a lower accuracy.
             They are useful for reducing the size of the embeddings of a corpus for semantic search, among other tasks.
+        :param encode_kwargs:
+            Additional keyword arguments for `SentenceTransformer.encode` when embedding documents.
+            This parameter is provided for fine customization. Be careful not to clash with already set parameters and
+            avoid passing parameters that change the output type.
         """
 
         self.model = model
@@ -121,6 +126,7 @@ def __init__(  # noqa: PLR0913 # pylint: disable=too-many-positional-arguments
         self.model_kwargs = model_kwargs
         self.tokenizer_kwargs = tokenizer_kwargs
         self.config_kwargs = config_kwargs
+        self.encode_kwargs = encode_kwargs
         self.embedding_backend = None
         self.precision = precision
 
@@ -155,6 +161,7 @@ def to_dict(self) -> Dict[str, Any]:
             tokenizer_kwargs=self.tokenizer_kwargs,
             config_kwargs=self.config_kwargs,
             precision=self.precision,
+            encode_kwargs=self.encode_kwargs,
         )
         if serialization_dict["init_parameters"].get("model_kwargs") is not None:
             serialize_hf_model_kwargs(serialization_dict["init_parameters"]["model_kwargs"])
@@ -232,6 +239,7 @@ def run(self, documents: List[Document]):
             show_progress_bar=self.progress_bar,
             normalize_embeddings=self.normalize_embeddings,
             precision=self.precision,
+            **(self.encode_kwargs if self.encode_kwargs else {}),
         )
 
         for doc, emb in zip(documents, embeddings):
diff --git a/haystack/components/embedders/sentence_transformers_text_embedder.py b/haystack/components/embedders/sentence_transformers_text_embedder.py
index 0785caddbd..2cb1622e97 100644
--- a/haystack/components/embedders/sentence_transformers_text_embedder.py
+++ b/haystack/components/embedders/sentence_transformers_text_embedder.py
@@ -50,6 +50,7 @@ def __init__(  # noqa: PLR0913 # pylint: disable=too-many-positional-arguments
         tokenizer_kwargs: Optional[Dict[str, Any]] = None,
         config_kwargs: Optional[Dict[str, Any]] = None,
         precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = "float32",
+        encode_kwargs: Optional[Dict[str, Any]] = None,
     ):
         """
         Create a SentenceTransformersTextEmbedder component.
@@ -94,6 +95,10 @@ def __init__(  # noqa: PLR0913 # pylint: disable=too-many-positional-arguments
             All non-float32 precisions are quantized embeddings.
             Quantized embeddings are smaller in size and faster to compute, but may have a lower accuracy.
             They are useful for reducing the size of the embeddings of a corpus for semantic search, among other tasks.
+        :param encode_kwargs:
+            Additional keyword arguments for `SentenceTransformer.encode` when embedding texts.
+            This parameter is provided for fine customization. Be careful not to clash with already set parameters and
+            avoid passing parameters that change the output type.
         """
 
         self.model = model
@@ -109,6 +114,7 @@ def __init__(  # noqa: PLR0913 # pylint: disable=too-many-positional-arguments
         self.model_kwargs = model_kwargs
         self.tokenizer_kwargs = tokenizer_kwargs
         self.config_kwargs = config_kwargs
+        self.encode_kwargs = encode_kwargs
         self.embedding_backend = None
         self.precision = precision
 
@@ -141,6 +147,7 @@ def to_dict(self) -> Dict[str, Any]:
             tokenizer_kwargs=self.tokenizer_kwargs,
             config_kwargs=self.config_kwargs,
             precision=self.precision,
+            encode_kwargs=self.encode_kwargs,
         )
         if serialization_dict["init_parameters"].get("model_kwargs") is not None:
             serialize_hf_model_kwargs(serialization_dict["init_parameters"]["model_kwargs"])
@@ -209,5 +216,6 @@ def run(self, text: str):
             show_progress_bar=self.progress_bar,
             normalize_embeddings=self.normalize_embeddings,
             precision=self.precision,
+            **(self.encode_kwargs if self.encode_kwargs else {}),
         )[0]
         return {"embedding": embedding}
diff --git a/releasenotes/notes/add-encode-kwargs-sentence-transformers-f4d885f6c5b1706f.yaml b/releasenotes/notes/add-encode-kwargs-sentence-transformers-f4d885f6c5b1706f.yaml
new file mode 100644
index 0000000000..407a1b7ae7
--- /dev/null
+++ b/releasenotes/notes/add-encode-kwargs-sentence-transformers-f4d885f6c5b1706f.yaml
@@ -0,0 +1,6 @@
+---
+enhancements:
+  - |
+    Enhanced `SentenceTransformersDocumentEmbedder` and `SentenceTransformersTextEmbedder` to accept
+    an additional parameter, which is passed directly to the underlying `SentenceTransformer.encode` method
+    for greater flexibility in embedding customization.
diff --git a/test/components/embedders/test_sentence_transformers_document_embedder.py b/test/components/embedders/test_sentence_transformers_document_embedder.py
index d8813f36f4..205feac10f 100644
--- a/test/components/embedders/test_sentence_transformers_document_embedder.py
+++ b/test/components/embedders/test_sentence_transformers_document_embedder.py
@@ -1,9 +1,9 @@
 # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
+import random
 from unittest.mock import MagicMock, patch
 
-import random
 import pytest
 import torch
 
@@ -79,6 +79,7 @@ def test_to_dict(self):
                 "truncate_dim": None,
                 "model_kwargs": None,
                 "tokenizer_kwargs": None,
+                "encode_kwargs": None,
                 "config_kwargs": None,
                 "precision": "float32",
             },
@@ -102,6 +103,7 @@ def test_to_dict_with_custom_init_parameters(self):
             tokenizer_kwargs={"model_max_length": 512},
             config_kwargs={"use_memory_efficient_attention": True},
             precision="int8",
+            encode_kwargs={"task": "clustering"},
         )
         data = component.to_dict()
 
@@ -124,6 +126,7 @@ def test_to_dict_with_custom_init_parameters(self):
                 "tokenizer_kwargs": {"model_max_length": 512},
                 "config_kwargs": {"use_memory_efficient_attention": True},
                 "precision": "int8",
+                "encode_kwargs": {"task": "clustering"},
             },
         }
 
@@ -316,6 +319,20 @@ def test_embed_metadata(self):
             precision="float32",
         )
 
+    def test_embed_encode_kwargs(self):
+        embedder = SentenceTransformersDocumentEmbedder(model="model", encode_kwargs={"task": "retrieval.passage"})
+        embedder.embedding_backend = MagicMock()
+        documents = [Document(content=f"document number {i}") for i in range(5)]
+        embedder.run(documents=documents)
+        embedder.embedding_backend.embed.assert_called_once_with(
+            ["document number 0", "document number 1", "document number 2", "document number 3", "document number 4"],
+            batch_size=32,
+            show_progress_bar=True,
+            normalize_embeddings=False,
+            precision="float32",
+            task="retrieval.passage",
+        )
+
     def test_prefix_suffix(self):
         embedder = SentenceTransformersDocumentEmbedder(
             model="model",
diff --git a/test/components/embedders/test_sentence_transformers_text_embedder.py b/test/components/embedders/test_sentence_transformers_text_embedder.py
index 293b07aada..6d0e239a15 100644
--- a/test/components/embedders/test_sentence_transformers_text_embedder.py
+++ b/test/components/embedders/test_sentence_transformers_text_embedder.py
@@ -1,11 +1,11 @@
 # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
+import random
 from unittest.mock import MagicMock, patch
 
-import torch
-import random
 import pytest
+import torch
 
 from haystack.components.embedders.sentence_transformers_text_embedder import SentenceTransformersTextEmbedder
 from haystack.utils import ComponentDevice, Secret
@@ -70,6 +70,7 @@ def test_to_dict(self):
                 "truncate_dim": None,
                 "model_kwargs": None,
                 "tokenizer_kwargs": None,
+                "encode_kwargs": None,
                 "config_kwargs": None,
                 "precision": "float32",
             },
@@ -91,6 +92,7 @@ def test_to_dict_with_custom_init_parameters(self):
             tokenizer_kwargs={"model_max_length": 512},
             config_kwargs={"use_memory_efficient_attention": False},
             precision="int8",
+            encode_kwargs={"task": "clustering"},
         )
         data = component.to_dict()
         assert data == {
@@ -110,6 +112,7 @@ def test_to_dict_with_custom_init_parameters(self):
                 "tokenizer_kwargs": {"model_max_length": 512},
                 "config_kwargs": {"use_memory_efficient_attention": False},
                 "precision": "int8",
+                "encode_kwargs": {"task": "clustering"},
             },
         }
 
@@ -297,3 +300,17 @@ def test_run_quantization(self):
 
         assert len(embedding_def) == 768
         assert all(isinstance(el, int) for el in embedding_def)
+
+    def test_embed_encode_kwargs(self):
+        embedder = SentenceTransformersTextEmbedder(model="model", encode_kwargs={"task": "retrieval.query"})
+        embedder.embedding_backend = MagicMock()
+        text = "a nice text to embed"
+        embedder.run(text=text)
+        embedder.embedding_backend.embed.assert_called_once_with(
+            [text],
+            batch_size=32,
+            show_progress_bar=True,
+            normalize_embeddings=False,
+            precision="float32",
+            task="retrieval.query",
+        )