From 6ee9af381f48b192886049787740095affd9c6c4 Mon Sep 17 00:00:00 2001
From: alvinttang <alvintang@pm.me>
Date: Sat, 6 Jun 2026 20:44:57 +0800
Subject: [PATCH] fix(HuggingFaceLocalGenerator): remove stop_words
 cross-product in reply post-processing

With N replies and M stop_words, the previous nested-comprehension
produced N*M replies instead of N. Half of the extra replies still
contained the stop word because each iteration only stripped one.

Switching to a sequential loop (already what the chat sibling at
chat/hugging_face_local.py:660 does) keeps the count at N and removes
every stop word from every reply.

Refs #11409
---
 .../generators/hugging_face_local.py          |  7 +++--
 .../test_hugging_face_local_generator.py      | 28 +++++++++++++++++++
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/haystack/components/generators/hugging_face_local.py b/haystack/components/generators/hugging_face_local.py
index 290bc1a11b..e86946fd56 100644
--- a/haystack/components/generators/hugging_face_local.py
+++ b/haystack/components/generators/hugging_face_local.py
@@ -259,7 +259,10 @@ def run(
         replies = [o["generated_text"] for o in output if "generated_text" in o]
 
         if self.stop_words:
-            # the output of the pipeline includes the stop word
-            replies = [reply.replace(stop_word, "").rstrip() for reply in replies for stop_word in self.stop_words]
+            # The output of the pipeline includes the stop word. Strip each stop word from each
+            # reply in sequence — the previous double-loop comprehension was a cross-product that
+            # produced N*M replies (half still containing a stop word) instead of N. See #11409.
+            for stop_word in self.stop_words:
+                replies = [reply.replace(stop_word, "").rstrip() for reply in replies]
 
         return {"replies": replies}
diff --git a/test/components/generators/test_hugging_face_local_generator.py b/test/components/generators/test_hugging_face_local_generator.py
index 62779e855b..ebc1233192 100644
--- a/test/components/generators/test_hugging_face_local_generator.py
+++ b/test/components/generators/test_hugging_face_local_generator.py
@@ -420,6 +420,34 @@ def test_run_stop_words_removal(self):
         results = generator.run(prompt="irrelevant")
         assert results == {"replies": ["Hello"]}
 
+    def test_run_stop_words_removal_with_multiple_stop_words(self):
+        """Regression for #11409: with N replies and M stop words, the cross-product comprehension
+        produced N*M replies (half still containing a stop word). The result must stay at N replies,
+        each with every stop word removed."""
+        generator = HuggingFaceLocalGenerator(
+            model="Qwen/Qwen3-0.6B", task="text-generation", stop_words=["STOP", "END"]
+        )
+        generator.pipeline = Mock(
+            return_value=[
+                {"generated_text": "Paris is the capital. STOP"},
+                {"generated_text": "France is in Europe. END"},
+            ]
+        )
+        generator.stopping_criteria_list = Mock()
+        results = generator.run(prompt="irrelevant")
+        assert results == {"replies": ["Paris is the capital.", "France is in Europe."]}
+
+    def test_run_stop_words_removal_all_stop_words_removed_from_each_reply(self):
+        """Every stop word is removed from every reply, not just the first matching one."""
+        generator = HuggingFaceLocalGenerator(
+            model="Qwen/Qwen3-0.6B", task="text-generation", stop_words=["STOP", "END"]
+        )
+        # Reply contains BOTH stop words
+        generator.pipeline = Mock(return_value=[{"generated_text": "Hello STOP world END"}])
+        generator.stopping_criteria_list = Mock()
+        results = generator.run(prompt="irrelevant")
+        assert results == {"replies": ["Hello  world"]}
+
     @pytest.mark.integration
     def test_stop_words_criteria_using_hf_tokenizer(self):
         """