Merge pull request #448 from parea-ai/PAI-663-run-experiment-n-times

feat(ntrails): run experiment on same data n times
parea-ai · Feb 13, 2024 · a185421 · a185421
2 parents 2bdad25 + 865d238
commit a185421
Show file tree

Hide file tree

Showing 7 changed files with 336 additions and 289 deletions.
diff --git a/parea/client.py b/parea/client.py
@@ -307,17 +307,17 @@ def add_test_cases(self, data: list[dict[str, Any]], name: str) -> None:
     def project_uuid(self) -> str:
         return self._project.uuid
 
-    def experiment(self, data: Union[str, Iterable[dict]], func: Callable):
+    def experiment(self, data: Union[str, Iterable[dict]], func: Callable, n_trials: int = 1):
         """
         :param data: If your dataset is defined locally it should be an iterable of k/v
-        pairs matching the expected inputs of your function. To reference a test collection you
+        pairs matching the expected inputs of your function. To reference a dataset you
         have saved on Parea, use the collection name as a string.
-
         :param func: The function to run. This function should accept inputs that match the keys of the data field.
+        :param n_trials: The number of times to run the experiment on the same data.
         """
         from parea import Experiment
 
-        return Experiment(data=data, func=func, p=self)
+        return Experiment(data=data, func=func, p=self, n_trials=n_trials)
 
 
 _initialized_parea_wrapper = False

diff --git a/parea/cookbook/enpoints_for_datasets.py b/parea/cookbook/enpoints_for_datasets.py
@@ -0,0 +1,22 @@
+import os
+
+from dotenv import load_dotenv
+
+from parea import Parea
+
+load_dotenv()
+
+p = Parea(api_key=os.getenv("PAREA_API_KEY"))
+
+
+data = [{"problem": "1+2", "target": 3, "tags": ["easy"]}, {"problem": "Solve the differential equation dy/dx = 3y.", "target": "y = c * e^(3x)", "tags": ["hard"]}]
+
+# this will create a new dataset on Parea named "Math problems".
+# The dataset will have one column named "problem", and two columns using the reserved names "target" and "tags".
+# when using this dataset the expected prompt template should have a placeholder for the varible problem.
+p.create_test_collection(data, name="Math problems")
+
+new_data = [{"problem": "Evaluate the integral ∫x^2 dx from 0 to 3.", "target": 9, "tags": ["hard"]}]
+# this will add the new test cases to the existing "Math problems" dataset.
+# New test cases must have the same columns as the existing dataset.
+p.add_test_cases(new_data, name="Math problems")
diff --git a/parea/cookbook/run_experiment.py b/parea/cookbook/run_experiment.py
@@ -36,13 +36,15 @@ def generate_random_number(n: str) -> str:
 # You can use the CLI command "parea experiment parea/cookbook/run_experiment.py" to execute this experiment
 # or call `.run()`
 # p.experiment(
-#     data=[{"n": "10"}],  # Data to run the experiment on (list of dicts)
+#     data=[{"n": "11"}],  # Data to run the experiment on (list of dicts)
 #     func=generate_random_number,  # Function to run (callable)
+#     n_trials=1, # Number of times to run the experiment on the same data
 # )
 
 # You can optionally run the experiment manually by calling `.run()`
 if __name__ == "__main__":
     p.experiment(
-        data=[{"n": "10"}, {"n": "11"}, {"n": "12"}],
+        data=[{"n": "12"}],
         func=generate_random_number,
+        n_trials=3,
     ).run()
diff --git a/parea/experiment/experiment.py b/parea/experiment/experiment.py
@@ -14,7 +14,7 @@
 from parea import Parea
 from parea.constants import PAREA_OS_ENV_EXPERIMENT_UUID
 from parea.experiment.dvc import save_results_to_dvc_if_init
-from parea.helpers import gen_random_name
+from parea.helpers import duplicate_dicts, gen_random_name
 from parea.schemas.models import CreateExperimentRequest, ExperimentSchema, ExperimentStatsSchema
 from parea.utils.trace_utils import thread_ids_running_evals
 from parea.utils.universal_encoder import json_dumps
@@ -52,8 +52,14 @@ def async_wrapper(fn, **kwargs):
     return asyncio.run(fn(**kwargs))
 
 
-async def experiment(name: str, data: Union[str, Iterable[dict]], func: Callable, p: Parea) -> ExperimentStatsSchema:
-    """Creates an experiment and runs the function on the data iterator."""
+async def experiment(name: str, data: Union[str, Iterable[dict]], func: Callable, p: Parea, n_trials: int = 1) -> ExperimentStatsSchema:
+    """Creates an experiment and runs the function on the data iterator.
+    param name: The name of the experiment. This name must be unique across experiment runs.
+    param data: The data to run the experiment on. This can be a list of dictionaries or a string representing the name of a dataset on Parea.
+    param func: The function to run. This function should accept inputs that match the keys of the data field.
+    param p: The Parea instance to use for running the experiment.
+    param n_trials: The number of times to run the experiment on the same data.
+    """
     if isinstance(data, str):
         print(f"Fetching test collection: {data}")
         test_collection = await p.aget_collection(data)
@@ -65,6 +71,12 @@ async def experiment(name: str, data: Union[str, Iterable[dict]], func: Callable
         targets = [None] * len(data)
         len_test_cases = len(data) if isinstance(data, list) else 0
 
+    if n_trials > 1:
+        data = duplicate_dicts(data, n_trials)
+        targets = targets * n_trials
+        len_test_cases = len(data) if isinstance(data, list) else 0
+        print(f"Running {n_trials} trials of the experiment \n")
+
     experiment_schema: ExperimentSchema = p.create_experiment(CreateExperimentRequest(name=name))
     experiment_uuid = experiment_schema.uuid
     os.environ[PAREA_OS_ENV_EXPERIMENT_UUID] = experiment_uuid
@@ -107,14 +119,16 @@ async def limit_concurrency(data_input, target):
 @define
 class Experiment:
     # If your dataset is defined locally it should be an iterable of k/v
-    # pairs matching the expected inputs of your function. To reference a test collection you
-    # have saved on Parea, use the collection name as a string.
+    # pairs matching the expected inputs of your function. To reference a dataset you
+    # have saved on Parea, use the dataset name as a string.
     data: Union[str, Iterable[dict]]
     # The function to run. This function should accept inputs that match the keys of the data field.
     func: Callable = field()
     experiment_stats: ExperimentStatsSchema = field(init=False, default=None)
     p: Parea = field(default=None)
     name: str = field(init=False)
+    # The number of times to run the experiment on the same data.
+    n_trials: int = field(default=1)
 
     def __attrs_post_init__(self):
         global _experiments
@@ -133,4 +147,4 @@ def run(self, name: Optional[str] = None) -> None:
         If no name is provided a memorable name will be generated automatically.
         """
         self._gen_name_if_none(name)
-        self.experiment_stats = asyncio.run(experiment(self.name, self.data, self.func, self.p))
+        self.experiment_stats = asyncio.run(experiment(self.name, self.data, self.func, self.p, self.n_trials))
diff --git a/parea/helpers.py b/parea/helpers.py
@@ -4,6 +4,8 @@
 import random
 import time
 import uuid
+from collections.abc import Iterable
+from copy import deepcopy
 
 from attr import asdict, fields_dict
 
@@ -48,3 +50,7 @@ def calculate_avg_as_string(values: list[Optional[float]]) -> str:
     values = [x for x in values if x is not None]
     avg = sum(values) / len(values)
     return f"{avg:.2f}"
+
+
+def duplicate_dicts(data: Iterable[dict], n: int) -> Iterable[dict]:
+    return [deepcopy(item) for item in data for _ in range(n)]