From 6ee9af381f48b192886049787740095affd9c6c4 Mon Sep 17 00:00:00 2001 From: alvinttang Date: Sat, 6 Jun 2026 20:44:57 +0800 Subject: [PATCH] fix(HuggingFaceLocalGenerator): remove stop_words cross-product in reply post-processing With N replies and M stop_words, the previous nested-comprehension produced N*M replies instead of N. Half of the extra replies still contained the stop word because each iteration only stripped one. Switching to a sequential loop (already what the chat sibling at chat/hugging_face_local.py:660 does) keeps the count at N and removes every stop word from every reply. Refs #11409 --- .../generators/hugging_face_local.py | 7 +++-- .../test_hugging_face_local_generator.py | 28 +++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/haystack/components/generators/hugging_face_local.py b/haystack/components/generators/hugging_face_local.py index 290bc1a11b..e86946fd56 100644 --- a/haystack/components/generators/hugging_face_local.py +++ b/haystack/components/generators/hugging_face_local.py @@ -259,7 +259,10 @@ def run( replies = [o["generated_text"] for o in output if "generated_text" in o] if self.stop_words: - # the output of the pipeline includes the stop word - replies = [reply.replace(stop_word, "").rstrip() for reply in replies for stop_word in self.stop_words] + # The output of the pipeline includes the stop word. Strip each stop word from each + # reply in sequence — the previous double-loop comprehension was a cross-product that + # produced N*M replies (half still containing a stop word) instead of N. See #11409. + for stop_word in self.stop_words: + replies = [reply.replace(stop_word, "").rstrip() for reply in replies] return {"replies": replies} diff --git a/test/components/generators/test_hugging_face_local_generator.py b/test/components/generators/test_hugging_face_local_generator.py index 62779e855b..ebc1233192 100644 --- a/test/components/generators/test_hugging_face_local_generator.py +++ b/test/components/generators/test_hugging_face_local_generator.py @@ -420,6 +420,34 @@ def test_run_stop_words_removal(self): results = generator.run(prompt="irrelevant") assert results == {"replies": ["Hello"]} + def test_run_stop_words_removal_with_multiple_stop_words(self): + """Regression for #11409: with N replies and M stop words, the cross-product comprehension + produced N*M replies (half still containing a stop word). The result must stay at N replies, + each with every stop word removed.""" + generator = HuggingFaceLocalGenerator( + model="Qwen/Qwen3-0.6B", task="text-generation", stop_words=["STOP", "END"] + ) + generator.pipeline = Mock( + return_value=[ + {"generated_text": "Paris is the capital. STOP"}, + {"generated_text": "France is in Europe. END"}, + ] + ) + generator.stopping_criteria_list = Mock() + results = generator.run(prompt="irrelevant") + assert results == {"replies": ["Paris is the capital.", "France is in Europe."]} + + def test_run_stop_words_removal_all_stop_words_removed_from_each_reply(self): + """Every stop word is removed from every reply, not just the first matching one.""" + generator = HuggingFaceLocalGenerator( + model="Qwen/Qwen3-0.6B", task="text-generation", stop_words=["STOP", "END"] + ) + # Reply contains BOTH stop words + generator.pipeline = Mock(return_value=[{"generated_text": "Hello STOP world END"}]) + generator.stopping_criteria_list = Mock() + results = generator.run(prompt="irrelevant") + assert results == {"replies": ["Hello world"]} + @pytest.mark.integration def test_stop_words_criteria_using_hf_tokenizer(self): """