diff --git a/parea/experiment/experiment.py b/parea/experiment/experiment.py index 48299330..3bc9b729 100644 --- a/parea/experiment/experiment.py +++ b/parea/experiment/experiment.py @@ -2,6 +2,7 @@ import asyncio import inspect +import logging import os from collections import defaultdict from concurrent.futures import ThreadPoolExecutor @@ -23,6 +24,7 @@ from parea.utils.universal_encoder import json_dumps STAT_ATTRS = ["latency", "input_tokens", "output_tokens", "total_tokens", "cost"] +logger = logging.getLogger() def calculate_avg_std_for_experiment(experiment_stats: ExperimentStatsSchema) -> Dict[str, str]: @@ -63,7 +65,12 @@ def apply_dataset_eval(dataset_level_evals: List[Callable]) -> List[EvaluationRe results = [] for dataset_level_eval in dataset_level_evals: - result = dataset_level_eval(root_traces) + try: + result = dataset_level_eval(root_traces) + except Exception as e: + logger.exception(f"Error occurred calling dataset level eval function '{dataset_level_eval.__name__}': {e}", exc_info=e) + continue + if isinstance(result, EvaluationResult): results.append(result) elif isinstance(result, list): diff --git a/pyproject.toml b/pyproject.toml index e482d8ad..a2d503ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "parea-ai" packages = [{ include = "parea" }] -version = "0.2.161" +version = "0.2.162" description = "Parea python sdk" readme = "README.md" authors = ["joel-parea-ai "]