From 5c69f0fd2830ec250b1e801e0e2120dd53cfd966 Mon Sep 17 00:00:00 2001 From: ryker Date: Wed, 27 May 2026 11:23:31 +0800 Subject: [PATCH] Fix HuggingFaceLocalGenerator stop word deduplication --- .../components/generators/hugging_face_local.py | 6 ++++-- ...op-words-cross-product-b9f320441e4714a5.yaml | 4 ++++ .../test_hugging_face_local_generator.py | 17 +++++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/fix-hf-local-generator-stop-words-cross-product-b9f320441e4714a5.yaml diff --git a/haystack/components/generators/hugging_face_local.py b/haystack/components/generators/hugging_face_local.py index 290bc1a11b..5d3078e4d7 100644 --- a/haystack/components/generators/hugging_face_local.py +++ b/haystack/components/generators/hugging_face_local.py @@ -259,7 +259,9 @@ def run( replies = [o["generated_text"] for o in output if "generated_text" in o] if self.stop_words: - # the output of the pipeline includes the stop word - replies = [reply.replace(stop_word, "").rstrip() for reply in replies for stop_word in self.stop_words] + # The output of the pipeline includes the stop word, so strip each stop word + # from each reply without duplicating replies when multiple stop words are set. + for stop_word in self.stop_words: + replies = [reply.replace(stop_word, "").rstrip() for reply in replies] return {"replies": replies} diff --git a/releasenotes/notes/fix-hf-local-generator-stop-words-cross-product-b9f320441e4714a5.yaml b/releasenotes/notes/fix-hf-local-generator-stop-words-cross-product-b9f320441e4714a5.yaml new file mode 100644 index 0000000000..ad7f99aa53 --- /dev/null +++ b/releasenotes/notes/fix-hf-local-generator-stop-words-cross-product-b9f320441e4714a5.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + Fixes `HuggingFaceLocalGenerator` so using multiple `stop_words` no longer duplicates replies while stripping stop words from generated text. diff --git a/test/components/generators/test_hugging_face_local_generator.py b/test/components/generators/test_hugging_face_local_generator.py index 62779e855b..64fedcd90c 100644 --- a/test/components/generators/test_hugging_face_local_generator.py +++ b/test/components/generators/test_hugging_face_local_generator.py @@ -420,6 +420,23 @@ def test_run_stop_words_removal(self): results = generator.run(prompt="irrelevant") assert results == {"replies": ["Hello"]} + def test_run_stop_words_removal_multiple_entries(self): + """Test that multiple stop words are removed sequentially without duplicating replies.""" + generator = HuggingFaceLocalGenerator( + model="Qwen/Qwen3-0.6B", task="text-generation", stop_words=[" STOP", " END"] + ) + generator.pipeline = Mock( + return_value=[ + {"generated_text": "Paris is the capital. STOP"}, + {"generated_text": "France is in Europe. END"}, + ] + ) + generator.stopping_criteria_list = Mock() + + results = generator.run(prompt="irrelevant") + + assert results == {"replies": ["Paris is the capital.", "France is in Europe."]} + @pytest.mark.integration def test_stop_words_criteria_using_hf_tokenizer(self): """