Skip to content

Commit

Permalink
Merge pull request #448 from parea-ai/PAI-663-run-experiment-n-times
Browse files Browse the repository at this point in the history
feat(ntrails): run experiment on same data n times
  • Loading branch information
jalexanderII authored Feb 13, 2024
2 parents 2bdad25 + 865d238 commit a185421
Show file tree
Hide file tree
Showing 7 changed files with 336 additions and 289 deletions.
8 changes: 4 additions & 4 deletions parea/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,17 +307,17 @@ def add_test_cases(self, data: list[dict[str, Any]], name: str) -> None:
def project_uuid(self) -> str:
return self._project.uuid

def experiment(self, data: Union[str, Iterable[dict]], func: Callable):
def experiment(self, data: Union[str, Iterable[dict]], func: Callable, n_trials: int = 1):
"""
:param data: If your dataset is defined locally it should be an iterable of k/v
pairs matching the expected inputs of your function. To reference a test collection you
pairs matching the expected inputs of your function. To reference a dataset you
have saved on Parea, use the collection name as a string.
:param func: The function to run. This function should accept inputs that match the keys of the data field.
:param n_trials: The number of times to run the experiment on the same data.
"""
from parea import Experiment

return Experiment(data=data, func=func, p=self)
return Experiment(data=data, func=func, p=self, n_trials=n_trials)


_initialized_parea_wrapper = False
Expand Down
22 changes: 22 additions & 0 deletions parea/cookbook/enpoints_for_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import os

from dotenv import load_dotenv

from parea import Parea

load_dotenv()

p = Parea(api_key=os.getenv("PAREA_API_KEY"))


data = [{"problem": "1+2", "target": 3, "tags": ["easy"]}, {"problem": "Solve the differential equation dy/dx = 3y.", "target": "y = c * e^(3x)", "tags": ["hard"]}]

# this will create a new dataset on Parea named "Math problems".
# The dataset will have one column named "problem", and two columns using the reserved names "target" and "tags".
# when using this dataset the expected prompt template should have a placeholder for the varible problem.
p.create_test_collection(data, name="Math problems")

new_data = [{"problem": "Evaluate the integral ∫x^2 dx from 0 to 3.", "target": 9, "tags": ["hard"]}]
# this will add the new test cases to the existing "Math problems" dataset.
# New test cases must have the same columns as the existing dataset.
p.add_test_cases(new_data, name="Math problems")
6 changes: 4 additions & 2 deletions parea/cookbook/run_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,15 @@ def generate_random_number(n: str) -> str:
# You can use the CLI command "parea experiment parea/cookbook/run_experiment.py" to execute this experiment
# or call `.run()`
# p.experiment(
# data=[{"n": "10"}], # Data to run the experiment on (list of dicts)
# data=[{"n": "11"}], # Data to run the experiment on (list of dicts)
# func=generate_random_number, # Function to run (callable)
# n_trials=1, # Number of times to run the experiment on the same data
# )

# You can optionally run the experiment manually by calling `.run()`
if __name__ == "__main__":
p.experiment(
data=[{"n": "10"}, {"n": "11"}, {"n": "12"}],
data=[{"n": "12"}],
func=generate_random_number,
n_trials=3,
).run()
26 changes: 20 additions & 6 deletions parea/experiment/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from parea import Parea
from parea.constants import PAREA_OS_ENV_EXPERIMENT_UUID
from parea.experiment.dvc import save_results_to_dvc_if_init
from parea.helpers import gen_random_name
from parea.helpers import duplicate_dicts, gen_random_name
from parea.schemas.models import CreateExperimentRequest, ExperimentSchema, ExperimentStatsSchema
from parea.utils.trace_utils import thread_ids_running_evals
from parea.utils.universal_encoder import json_dumps
Expand Down Expand Up @@ -52,8 +52,14 @@ def async_wrapper(fn, **kwargs):
return asyncio.run(fn(**kwargs))


async def experiment(name: str, data: Union[str, Iterable[dict]], func: Callable, p: Parea) -> ExperimentStatsSchema:
"""Creates an experiment and runs the function on the data iterator."""
async def experiment(name: str, data: Union[str, Iterable[dict]], func: Callable, p: Parea, n_trials: int = 1) -> ExperimentStatsSchema:
"""Creates an experiment and runs the function on the data iterator.
param name: The name of the experiment. This name must be unique across experiment runs.
param data: The data to run the experiment on. This can be a list of dictionaries or a string representing the name of a dataset on Parea.
param func: The function to run. This function should accept inputs that match the keys of the data field.
param p: The Parea instance to use for running the experiment.
param n_trials: The number of times to run the experiment on the same data.
"""
if isinstance(data, str):
print(f"Fetching test collection: {data}")
test_collection = await p.aget_collection(data)
Expand All @@ -65,6 +71,12 @@ async def experiment(name: str, data: Union[str, Iterable[dict]], func: Callable
targets = [None] * len(data)
len_test_cases = len(data) if isinstance(data, list) else 0

if n_trials > 1:
data = duplicate_dicts(data, n_trials)
targets = targets * n_trials
len_test_cases = len(data) if isinstance(data, list) else 0
print(f"Running {n_trials} trials of the experiment \n")

experiment_schema: ExperimentSchema = p.create_experiment(CreateExperimentRequest(name=name))
experiment_uuid = experiment_schema.uuid
os.environ[PAREA_OS_ENV_EXPERIMENT_UUID] = experiment_uuid
Expand Down Expand Up @@ -107,14 +119,16 @@ async def limit_concurrency(data_input, target):
@define
class Experiment:
# If your dataset is defined locally it should be an iterable of k/v
# pairs matching the expected inputs of your function. To reference a test collection you
# have saved on Parea, use the collection name as a string.
# pairs matching the expected inputs of your function. To reference a dataset you
# have saved on Parea, use the dataset name as a string.
data: Union[str, Iterable[dict]]
# The function to run. This function should accept inputs that match the keys of the data field.
func: Callable = field()
experiment_stats: ExperimentStatsSchema = field(init=False, default=None)
p: Parea = field(default=None)
name: str = field(init=False)
# The number of times to run the experiment on the same data.
n_trials: int = field(default=1)

def __attrs_post_init__(self):
global _experiments
Expand All @@ -133,4 +147,4 @@ def run(self, name: Optional[str] = None) -> None:
If no name is provided a memorable name will be generated automatically.
"""
self._gen_name_if_none(name)
self.experiment_stats = asyncio.run(experiment(self.name, self.data, self.func, self.p))
self.experiment_stats = asyncio.run(experiment(self.name, self.data, self.func, self.p, self.n_trials))
6 changes: 6 additions & 0 deletions parea/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import random
import time
import uuid
from collections.abc import Iterable
from copy import deepcopy

from attr import asdict, fields_dict

Expand Down Expand Up @@ -48,3 +50,7 @@ def calculate_avg_as_string(values: list[Optional[float]]) -> str:
values = [x for x in values if x is not None]
avg = sum(values) / len(values)
return f"{avg:.2f}"


def duplicate_dicts(data: Iterable[dict], n: int) -> Iterable[dict]:
return [deepcopy(item) for item in data for _ in range(n)]
Loading

0 comments on commit a185421

Please sign in to comment.