Skip to content

Commit

Permalink
Merge pull request #467 from parea-ai/feat-add-levenshtein
Browse files Browse the repository at this point in the history
feat: add levenshtein
  • Loading branch information
joschkabraun committed Feb 15, 2024
2 parents 4e6f2e2 + 43f2e30 commit 33068ea
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 38 deletions.
60 changes: 28 additions & 32 deletions parea/cookbook/run_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,48 +3,44 @@
from dotenv import load_dotenv

from parea import Parea, trace
from parea.evals import call_openai
from parea.schemas import Log
from parea.evals.general import levenshtein

load_dotenv()

p = Parea(api_key=os.getenv("PAREA_API_KEY"))


# Evaluation function(s)
def is_between_1_and_n(log: Log) -> float:
"""Evaluates if the number is between 1 and n"""
n = log.inputs["n"]
try:
return 1.0 if 1.0 <= float(log.output) <= float(n) else 0.0
except ValueError:
return 0.0


# annotate the function with the trace decorator and pass the evaluation function(s)
@trace(eval_funcs=[is_between_1_and_n])
def generate_random_number(n: str) -> str:
return call_openai(
[
{"role": "user", "content": f"Generate a number between 1 and {n}."},
],
model="gpt-3.5-turbo",
)


# Define the experiment
# You can use the CLI command "parea experiment parea/cookbook/run_experiment.py" to execute this experiment
# or call `.run()`
# p.experiment(
# data=[{"n": "11"}], # Data to run the experiment on (list of dicts)
# func=generate_random_number, # Function to run (callable)
# n_trials=1, # Number of times to run the experiment on the same data
# )
@trace(eval_funcs=[levenshtein])
def greeting(name: str) -> str:
return f"Hello {name}"


data = [
{
"name": "Foo",
"target": "Hi Foo",
},
{
"name": "Bar",
"target": "Hello Bar",
},
] # test data to run the experiment on (list of dicts)


# # Define the experiment
# # You can use the CLI command "parea experiment parea/cookbook/run_experiment.py" to execute this experiment
# # or call `.run()`
# # p.experiment(
# # data=data, # Data to run the experiment on (list of dicts)
# # func=greeting, # Function to run (callable)
# # n_trials=1, # Number of times to run the experiment on the same data
# # )

# You can optionally run the experiment manually by calling `.run()`
if __name__ == "__main__":
p.experiment(
data=[{"n": "10"}],
func=generate_random_number,
data=data,
func=greeting,
n_trials=3,
).run()
1 change: 1 addition & 0 deletions parea/evals/general/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .answer_matches_target_llm_grader import answer_matches_target_llm_grader_factory
from .answer_matches_target_recall import answer_matches_target_recall
from .answer_relevancy import answer_relevancy_factory
from .levenshtein import levenshtein
from .llm_grader import llm_grader_factory, llm_grader_gpt3t, llm_grader_gpt4
from .lm_vs_lm import lm_vs_lm_factuality_factory, lm_vs_lm_factuality_gpt3t, lm_vs_lm_factuality_gpt4
from .self_check import self_check
Expand Down
19 changes: 19 additions & 0 deletions parea/evals/general/levenshtein.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from Levenshtein import distance

from parea.schemas import Log


def levenshtein(log: Log) -> float:
output = log.output
target = log.target
if target is None:
raise ValueError("LevenshteinScorer requires an target value")

output, target = str(output), str(target)
max_len = max(len(x) for x in [output, target])

score = 1
if max_len > 0:
score = 1 - (distance(output, target) / max_len)

return score
Loading

0 comments on commit 33068ea

Please sign in to comment.