CodeForPhilly
diff --git a/‎server/api/services/README.md renamed to ‎evaluation/README.md
Lines changed: 5 additions & 3 deletions b/‎server/api/services/README.md renamed to ‎evaluation/README.md
Lines changed: 5 additions & 3 deletions
diff --git a/‎server/api/services/evals.py renamed to ‎evaluation/evals.py
Lines changed: 7 additions & 1 deletion b/‎server/api/services/evals.py renamed to ‎evaluation/evals.py
Lines changed: 7 additions & 1 deletion
diff --git a/‎server/api/services/test_evals.py renamed to ‎evaluation/test_evals.py b/‎server/api/services/test_evals.py renamed to ‎evaluation/test_evals.py
diff --git a/‎server/api/services/services.py renamed to ‎server/api/services/llm_services.py b/‎server/api/services/services.py renamed to ‎server/api/services/llm_services.py
@@ -1,11 +1,13 @@
 
-# LLM Output Evaluator
+# Evaluations
 
-This script evaluates the outputs of Large Language Models (LLMs) and estimates the associated token usage and cost.
+## LLM Output Evaluator
+
+The `evals` script evaluates the outputs of Large Language Models (LLMs) and estimates the associated token usage and cost.
 
 It supports batch evalaution via a configuration CSV and produces a detailed metrics report in CSV format.
 
-## Usage
+### Usage
 
 This script evaluates LLM outputs using the `lighteval` library: https://huggingface.co/docs/lighteval/en/metric-list#automatic-metrics-for-generative-tasks
 
 
@@ -4,14 +4,20 @@
 
 # TODO: Add tests on a small dummy dataset to confirm it handles errors gracefully and produces expected outputs
 
+import sys
+import os
+
+# Ensure the parent directory is in the path to import ModelFactory
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+
 import argparse
 import logging
 
 import pandas as pd
 from lighteval.tasks.requests import Doc
 from lighteval.metrics.metrics_sample import Extractiveness
 
-from services import ModelFactory
+from server.api.services.llm_services import ModelFactory
 
 logging.basicConfig(
     level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"