Skip to content

Commit

Permalink
fix: fix test failures with Transformers models in PRs from forks (#8809
Browse files Browse the repository at this point in the history
)

* trigger

* try pinning sentence transformers

* make integr tests run right away

* pin transformers instead

* older transformers version

* rm transformers pin

* try ignoring cache

* change ubuntu version

* try removing token

* try again

* more HF_API_TOKEN local deletions

* restore test priority

* rm leftover

* more deletions

* moreee

* more

* deletions

* restore jobs order
  • Loading branch information
anakin87 authored Feb 4, 2025
1 parent f1679f1 commit 5ae9488
Show file tree
Hide file tree
Showing 9 changed files with 38 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,8 @@ def test_run_unit(self, hf_pipeline_mock):
assert result["documents"][1].to_dict()["classification"]["label"] == "negative"

@pytest.mark.integration
def test_run(self):
def test_run(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
component = TransformersZeroShotDocumentClassifier(
model="cross-encoder/nli-deberta-v3-xsmall", labels=["positive", "negative"]
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -261,10 +261,11 @@ def test_run_wrong_input_format(self):
embedder.run(text=list_integers_input)

@pytest.mark.integration
def test_run_trunc(self):
def test_run_trunc(self, monkeypatch):
"""
sentence-transformers/paraphrase-albert-small-v2 maps sentences & paragraphs to a 768 dimensional dense vector space
"""
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
checkpoint = "sentence-transformers/paraphrase-albert-small-v2"
text = "a nice text to embed"

Expand Down
15 changes: 10 additions & 5 deletions test/components/evaluators/test_sas_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ def test_run_not_warmed_up(self):
evaluator.run(ground_truth_answers=ground_truths, predicted_answers=predictions)

@pytest.mark.integration
def test_run_with_matching_predictions(self):
def test_run_with_matching_predictions(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
evaluator = SASEvaluator()
ground_truths = [
"A construction budget of US $2.3 billion",
Expand All @@ -124,7 +125,8 @@ def test_run_with_matching_predictions(self):
assert result["individual_scores"] == pytest.approx([1.0, 1.0, 1.0])

@pytest.mark.integration
def test_run_with_single_prediction(self):
def test_run_with_single_prediction(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
evaluator = SASEvaluator()

ground_truths = ["US $2.3 billion"]
Expand All @@ -137,7 +139,8 @@ def test_run_with_single_prediction(self):
assert result["individual_scores"] == pytest.approx([0.689089], abs=1e-5)

@pytest.mark.integration
def test_run_with_mismatched_predictions(self):
def test_run_with_mismatched_predictions(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
evaluator = SASEvaluator()
ground_truths = [
"US $2.3 billion",
Expand All @@ -156,7 +159,8 @@ def test_run_with_mismatched_predictions(self):
assert result["individual_scores"] == pytest.approx([0.689089, 0.870389, 0.908679], abs=1e-5)

@pytest.mark.integration
def test_run_with_bi_encoder_model(self):
def test_run_with_bi_encoder_model(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
evaluator = SASEvaluator(model="sentence-transformers/all-mpnet-base-v2")
ground_truths = [
"A construction budget of US $2.3 billion",
Expand All @@ -175,7 +179,8 @@ def test_run_with_bi_encoder_model(self):
assert result["individual_scores"] == pytest.approx([1.0, 1.0, 1.0])

@pytest.mark.integration
def test_run_with_cross_encoder_model(self):
def test_run_with_cross_encoder_model(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
evaluator = SASEvaluator(model="cross-encoder/ms-marco-MiniLM-L-6-v2")
ground_truths = [
"A construction budget of US $2.3 billion",
Expand Down
3 changes: 2 additions & 1 deletion test/components/generators/chat/test_hugging_face_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,8 @@ def test_messages_conversion_is_called(self, mock_convert, model_info_mock):

@pytest.mark.integration
@pytest.mark.flaky(reruns=3, reruns_delay=10)
def test_live_run(self):
def test_live_run(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
messages = [ChatMessage.from_user("Please create a summary about the following topic: Climate change")]

llm = HuggingFaceLocalChatGenerator(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -454,8 +454,9 @@ def test_stop_words_criteria_using_hf_tokenizer(self):
assert criteria(generated_text_ids, scores=None) is True

@pytest.mark.integration
def test_hf_pipeline_runs_with_our_criteria(self):
def test_hf_pipeline_runs_with_our_criteria(self, monkeypatch):
"""Test that creating our own StopWordsCriteria and passing it to a Huggingface pipeline works."""
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
generator = HuggingFaceLocalGenerator(
model="google/flan-t5-small", task="text2text-generation", stop_words=["unambiguously"]
)
Expand All @@ -466,7 +467,8 @@ def test_hf_pipeline_runs_with_our_criteria(self):

@pytest.mark.integration
@pytest.mark.flaky(reruns=3, reruns_delay=10)
def test_live_run(self):
def test_live_run(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
llm = HuggingFaceLocalGenerator(model="Qwen/Qwen2.5-0.5B-Instruct", generation_kwargs={"max_new_tokens": 50})
llm.warm_up()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -574,10 +574,11 @@ def test_pipeline_serialise_deserialise(self):

@pytest.mark.integration
@pytest.mark.parametrize("similarity", ["dot_product", "cosine"])
def test_run(self, similarity):
def test_run(self, similarity, monkeypatch):
"""
Tests that run method returns documents in the correct order
"""
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
ranker = SentenceTransformersDiversityRanker(
model="sentence-transformers/all-MiniLM-L6-v2", similarity=similarity
)
Expand All @@ -601,7 +602,8 @@ def test_run(self, similarity):

@pytest.mark.integration
@pytest.mark.parametrize("similarity", ["dot_product", "cosine"])
def test_run_real_world_use_case(self, similarity):
def test_run_real_world_use_case(self, similarity, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
ranker = SentenceTransformersDiversityRanker(
model="sentence-transformers/all-MiniLM-L6-v2", similarity=similarity
)
Expand Down Expand Up @@ -673,7 +675,8 @@ def test_run_real_world_use_case(self, similarity):

@pytest.mark.integration
@pytest.mark.parametrize("similarity", ["dot_product", "cosine"])
def test_run_with_maximum_margin_relevance_strategy(self, similarity):
def test_run_with_maximum_margin_relevance_strategy(self, similarity, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
query = "renewable energy sources"
docs = [
Document(content="18th-century French literature"),
Expand Down
9 changes: 6 additions & 3 deletions test/components/readers/test_extractive.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,8 @@ def test_deduplicate_by_overlap(


@pytest.mark.integration
def test_t5():
def test_t5(monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
reader = ExtractiveReader("sjrhuschlee/flan-t5-base-squad2")
reader.warm_up()
answers = reader.run(example_queries[0], example_documents[0], top_k=2)[
Expand All @@ -800,7 +801,8 @@ def test_t5():


@pytest.mark.integration
def test_roberta():
def test_roberta(monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
reader = ExtractiveReader("deepset/tinyroberta-squad2")
reader.warm_up()
answers = reader.run(example_queries[0], example_documents[0], top_k=2)[
Expand Down Expand Up @@ -829,7 +831,8 @@ def test_roberta():


@pytest.mark.integration
def test_matches_hf_pipeline():
def test_matches_hf_pipeline(monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
reader = ExtractiveReader(
"deepset/tinyroberta-squad2", device=ComponentDevice.from_str("cpu"), overlap_threshold=None
)
Expand Down
6 changes: 4 additions & 2 deletions test/components/routers/test_transformers_text_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,8 @@ def test_run_unit(self, hf_pipeline_mock, mock_auto_config_from_pretrained):
assert out == {"en": "What is the color of the sky?"}

@pytest.mark.integration
def test_run(self):
def test_run(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
router = TransformersTextRouter(model="papluca/xlm-roberta-base-language-detection")
router.warm_up()
out = router.run("What is the color of the sky?")
Expand Down Expand Up @@ -202,7 +203,8 @@ def test_run(self):
assert out == {"en": "What is the color of the sky?"}

@pytest.mark.integration
def test_wrong_labels(self):
def test_wrong_labels(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
router = TransformersTextRouter(model="papluca/xlm-roberta-base-language-detection", labels=["en", "de"])
with pytest.raises(ValueError):
router.warm_up()
3 changes: 2 additions & 1 deletion test/components/routers/test_zero_shot_text_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ def test_run_unit(self, hf_pipeline_mock):
assert out == {"query": "What is the color of the sky?"}

@pytest.mark.integration
def test_run(self):
def test_run(self, monkeypatch):
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
router = TransformersZeroShotTextRouter(labels=["query", "passage"])
router.warm_up()
out = router.run("What is the color of the sky?")
Expand Down

0 comments on commit 5ae9488

Please sign in to comment.