Skip to content

Commit

Permalink
Merge pull request #483 from parea-ai/PAI-672-get-or-use-datasets-by-…
Browse files Browse the repository at this point in the history
…id-or-name-in-sdks

Pai 672 get or use datasets by id or name in sdks
  • Loading branch information
jalexanderII authored Feb 19, 2024
2 parents 3f758cf + 2e99dda commit ec1d7ec
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 21 deletions.
26 changes: 13 additions & 13 deletions parea/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
EXPERIMENT_STATS_ENDPOINT = "/experiment/{experiment_uuid}/stats"
EXPERIMENT_FINISHED_ENDPOINT = "/experiment/{experiment_uuid}/finished"
PROJECT_ENDPOINT = "/project"
GET_COLLECTION_ENDPOINT = "/collection/{test_collection_name}"
GET_COLLECTION_ENDPOINT = "/collection/{test_collection_identifier}"
CREATE_COLLECTION_ENDPOINT = "/collection"
ADD_TEST_CASES_ENDPOINT = "/testcases"

Expand Down Expand Up @@ -83,6 +83,10 @@ def _add_project_uuid_to_data(self, data) -> dict:
data_dict["project_uuid"] = self._project.uuid
return data_dict

@property
def project_uuid(self) -> str:
return self._project.uuid

def completion(self, data: Completion) -> CompletionResponse:
data = self._update_data_and_trace(data)
r = self._client.request(
Expand Down Expand Up @@ -226,17 +230,17 @@ def _create_or_get_project(self, name: str) -> CreateGetProjectResponseSchema:
)
return structure(r.json(), CreateGetProjectResponseSchema)

def get_collection(self, test_collection_name: str) -> TestCaseCollection:
def get_collection(self, test_collection_identifier: Union[str, int]) -> TestCaseCollection:
r = self._client.request(
"GET",
GET_COLLECTION_ENDPOINT.format(test_collection_name=test_collection_name),
GET_COLLECTION_ENDPOINT.format(test_collection_identifier=test_collection_identifier),
)
return structure(r.json(), TestCaseCollection)

async def aget_collection(self, test_collection_name: str) -> TestCaseCollection:
async def aget_collection(self, test_collection_identifier: Union[str, int]) -> TestCaseCollection:
r = await self._client.request_async(
"GET",
GET_COLLECTION_ENDPOINT.format(test_collection_name=test_collection_name),
GET_COLLECTION_ENDPOINT.format(test_collection_identifier=test_collection_identifier),
)
return structure(r.json(), TestCaseCollection)

Expand All @@ -248,23 +252,19 @@ def create_test_collection(self, data: list[dict[str, Any]], name: Optional[str]
data=asdict(request),
)

def add_test_cases(self, data: list[dict[str, Any]], name: str) -> None:
request = CreateTestCases(name=name, test_cases=create_test_cases(data))
def add_test_cases(self, data: list[dict[str, Any]], name: Optional[str] = None, dataset_id: Optional[int] = None) -> None:
request = CreateTestCases(id=dataset_id, name=name, test_cases=create_test_cases(data))
self._client.request(
"POST",
ADD_TEST_CASES_ENDPOINT,
data=asdict(request),
)

@property
def project_uuid(self) -> str:
return self._project.uuid

def experiment(self, data: Union[str, Iterable[dict]], func: Callable, n_trials: int = 1, metadata: dict = None):
def experiment(self, data: Union[str, int, Iterable[dict]], func: Callable, n_trials: int = 1, metadata: Optional[dict[str, str]] = None):
"""
:param data: If your dataset is defined locally it should be an iterable of k/v
pairs matching the expected inputs of your function. To reference a dataset you
have saved on Parea, use the collection name as a string.
have saved on Parea, use the dataset name as a string or the dataset id as an int.
:param func: The function to run. This function should accept inputs that match the keys of the data field.
:param n_trials: The number of times to run the experiment on the same data.
:param metadata: Optional metadata to attach to the experiment.
Expand Down
2 changes: 2 additions & 0 deletions parea/cookbook/enpoints_for_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@
# this will add the new test cases to the existing "Math problems" dataset.
# New test cases must have the same columns as the existing dataset.
p.add_test_cases(new_data, name="Math problems")
# Or if you can use the dataset ID instead of the name
p.add_test_cases(new_data, dataset_id=121)
8 changes: 7 additions & 1 deletion parea/cookbook/run_experiment_using_saved_test_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ def func(lang: str, framework: str) -> str:

if __name__ == "__main__":
p.experiment(
data="Hello World Example", # this is the name of my Test Collection in Parea (TestHub page)
data="Hello World Example", # this is the name of your Dataset in Parea (Dataset page)
func=func,
).run(name="hello-world-example")

# Or use a dataset using its ID instead of the name
# p.experiment(
# data=121, # this is the id of your Dataset in Parea (Dataset page)
# func=func,
# ).run(name="hello-world-example")
13 changes: 8 additions & 5 deletions parea/experiment/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,20 @@ def async_wrapper(fn, **kwargs):
return asyncio.run(fn(**kwargs))


async def experiment(name: str, data: Union[str, Iterable[dict]], func: Callable, p: Parea, n_trials: int = 1, metadata: dict = None) -> ExperimentStatsSchema:
async def experiment(
name: str, data: Union[str, int, Iterable[dict]], func: Callable, p: Parea, n_trials: int = 1, metadata: Optional[dict[str, str]] = None
) -> ExperimentStatsSchema:
"""Creates an experiment and runs the function on the data iterator.
param name: The name of the experiment. This name must be unique across experiment runs.
param data: The data to run the experiment on. This can be a list of dictionaries or a string representing the name of a dataset on Parea.
param data: The data to run the experiment on. This can be a list of dictionaries,
a string representing the name of a dataset on Parea or an int representing the id of a dataset on Parea.
If it is a list of dictionaries, the key "target" is reserved for the target/expected output of that sample.
param func: The function to run. This function should accept inputs that match the keys of the data field.
param p: The Parea instance to use for running the experiment.
param n_trials: The number of times to run the experiment on the same data.
param metadata: A dictionary of metadata to attach to the experiment.
"""
if isinstance(data, str):
if isinstance(data, (str, int)):
print(f"Fetching test collection: {data}")
test_collection = await p.aget_collection(data)
len_test_cases = test_collection.num_test_cases()
Expand Down Expand Up @@ -125,8 +128,8 @@ async def limit_concurrency(sample):
class Experiment:
# If your dataset is defined locally it should be an iterable of k/v
# pairs matching the expected inputs of your function. To reference a dataset you
# have saved on Parea, use the dataset name as a string.
data: Union[str, Iterable[dict]]
# have saved on Parea, use the dataset name as a string or the id as an int.
data: Union[str, int, Iterable[dict]]
# The function to run. This function should accept inputs that match the keys of the data field.
func: Callable = field()
experiment_stats: ExperimentStatsSchema = field(init=False, default=None)
Expand Down
8 changes: 7 additions & 1 deletion parea/schemas/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,9 +245,15 @@ class CreateTestCase:

@define
class CreateTestCases:
name: str
id: Optional[int] = None
name: Optional[str] = None
test_cases: list[CreateTestCase] = field(factory=list)

@validators.optional
def id_or_name_is_set(self, attribute, value):
if not (self.id or self.name):
raise ValueError("One of id or name must be set.")


@define
class CreateTestCaseCollection(CreateTestCases):
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "parea-ai"
packages = [{ include = "parea" }]
version = "0.2.77"
version = "0.2.78"
description = "Parea python sdk"
readme = "README.md"
authors = ["joel-parea-ai <[email protected]>"]
Expand Down

0 comments on commit ec1d7ec

Please sign in to comment.