Skip to content

Commit

Permalink
fix: EvaluationRunResult.score_report() is missing the metrics co…
Browse files Browse the repository at this point in the history
…lumn (#7817)

* fixing the DataFrame with the aggregated scores

* fixing tests
  • Loading branch information
davidsbatista authored and silvanocerza committed Jun 13, 2024
1 parent d748f47 commit 407d092
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 4 deletions.
4 changes: 3 additions & 1 deletion haystack/evaluation/eval_run_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,9 @@ def __init__(self, run_name: str, inputs: Dict[str, List[Any]], results: Dict[st

def score_report(self) -> DataFrame: # noqa: D102
results = {k: v["score"] for k, v in self.results.items()}
return DataFrame.from_dict(results, orient="index", columns=["score"])
df = DataFrame.from_dict(results, orient="index", columns=["score"]).reset_index()
df.columns = ["metrics", "score"]
return df

def to_pandas(self) -> DataFrame: # noqa: D102
inputs_columns = list(self.inputs.keys())
Expand Down
7 changes: 4 additions & 3 deletions test/evaluation/test_eval_run_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,11 @@ def test_score_report():

result = EvaluationRunResult("testing_pipeline_1", inputs=data["inputs"], results=data["metrics"])
report = result.score_report().to_json()

assert report == (
'{"score":{"reciprocal_rank":0.476932,"single_hit":0.75,"multi_hit":0.46428375,'
'"context_relevance":0.58177975,"faithfulness":0.40585375,'
'"semantic_answer_similarity":0.53757075}}'
'{"metrics":{"0":"reciprocal_rank","1":"single_hit","2":"multi_hit","3":"context_relevance",'
'"4":"faithfulness","5":"semantic_answer_similarity"},'
'"score":{"0":0.476932,"1":0.75,"2":0.46428375,"3":0.58177975,"4":0.40585375,"5":0.53757075}}'
)


Expand Down

0 comments on commit 407d092

Please sign in to comment.