standardizing results folkder structure

socialfoundations · Jun 27, 2024 · c081a31 · c081a31
1 parent 620c0bc
commit c081a31
Show file tree

Hide file tree

Showing 4 changed files with 41 additions and 28 deletions.
diff --git a/folktexts/cli/_utils.py b/folktexts/cli/_utils.py
@@ -1,6 +1,20 @@
 """Utils for the folktexts cmd-line interface.
 """
 from __future__ import annotations
+import logging
+
+from pathlib import Path
+
+
+def get_or_create_results_dir(model_name: str, task_name: str, results_root_dir: str | Path) -> Path:
+    """Constructs a results directory path from model and task names."""
+    results_dir = Path(results_root_dir).expanduser().resolve()
+    results_dir /= f"model-{model_name}_task-{task_name}"
+    if not results_dir.exists():
+        logging.info(f"Creating results directory '{results_dir}'.")
+        results_dir.mkdir(parents=True, exist_ok=True)
+
+    return results_dir
 
 
 def cmd_line_args_to_kwargs(cmdline_args: list) -> dict:

diff --git a/folktexts/cli/eval_feature_importance.py b/folktexts/cli/eval_feature_importance.py
@@ -16,7 +16,7 @@
 
 DEFAULT_MODELS_DIR = DEFAULT_ROOT_DIR / "huggingface-models"
 DEFAULT_DATA_DIR = DEFAULT_ROOT_DIR / "data"
-DEFAULT_RESULTS_DIR = Path("folktexts-results")
+DEFAULT_RESULTS_DIR = Path(".")
 
 DEFAULT_TASK_NAME = "ACSIncome"
 
@@ -35,7 +35,7 @@ def setup_arg_parser() -> ArgumentParser:
     # List of command-line arguments, with type and helper string
     cli_args = [
         ("--model",         str, "[str] Model name or path to model saved on disk"),
-        ("--task",     str, "[str] Name of the ACS task to run the experiment on", False, DEFAULT_TASK_NAME),
+        ("--task",          str, "[str] Name of the ACS task to run the experiment on", False, DEFAULT_TASK_NAME),
         ("--results-dir",   str, "[str] Directory under which this experiment's results will be saved", False, DEFAULT_RESULTS_DIR),
         ("--data-dir",      str, "[str] Root folder to find datasets on", False, DEFAULT_DATA_DIR),
         ("--models-dir",    str, "[str] Root folder to find huggingface models on", False, DEFAULT_MODELS_DIR),
@@ -138,8 +138,13 @@ def main():
     model, tokenizer = load_model_tokenizer(model_folder_path)
 
     # Create results directory if needed
-    results_dir = Path(args.results_dir).expanduser().resolve()
-    results_dir.mkdir(parents=False, exist_ok=True)
+    # Set-up results directory
+    from ._utils import get_or_create_results_dir
+    results_dir = get_or_create_results_dir(
+        model_name=Path(args.model).name,
+        task_name=args.task,
+        results_root_dir=args.results_dir,
+    )
     logging.info(f"Saving results to {results_dir.as_posix()}")
 
     # Load Task and Dataset

diff --git a/folktexts/cli/launch_experiments_htcondor.py b/folktexts/cli/launch_experiments_htcondor.py
@@ -33,9 +33,6 @@
 # Models save directory
 MODELS_DIR = ROOT_DIR / "huggingface-models"
 
-# Path to the executable script to run
-# EXECUTABLE_PATH = Path(__file__).parent.resolve() / "run_acs_benchmark.py"
-# EXECUTABLE_PATH = Path(__file__).parent.resolve() / "eval_feature_importance.py"
 
 ##################
 # Global configs #
@@ -78,7 +75,7 @@ def make_llm_clf_experiment(
     executable_path: str,
     model_name: str,
     task: str,
-    results_root_dir: str,
+    results_dir: str,
     **kwargs,
 ) -> Experiment:
     """Create an experiment object to run.
@@ -114,33 +111,22 @@ def make_llm_clf_experiment(
         kwargs=dict(
             model=model_path,
             task=task,
+            results_dir=results_dir,
             **experiment_kwargs,
         ),
         **job_kwargs,
     )
 
     # Create LLM results directory
-    exp_results_dir = Path(results_root_dir) / get_llm_results_folder(exp)
-    exp_results_dir.mkdir(exist_ok=True, parents=True)
-    exp.kwargs["results_dir"] = exp_results_dir.as_posix()
     save_json(
         obj=exp.to_dict(),
-        path=exp_results_dir / "experiment.json",
+        path=Path(results_dir) / f"experiment.{exp.hash()}.json",
         overwrite=True,
     )
 
     return exp
 
 
-def get_llm_results_folder(exp: Experiment) -> str:
-    """Create a unique experiment name.
-    """
-    return (
-        f"model-{Path(exp.model).name}."
-        f"dataset-{exp.task}"
-    )
-
-
 def setup_arg_parser() -> argparse.ArgumentParser:
     # Init parser
     parser = argparse.ArgumentParser(description="Launch experiments to evaluate LLMs as classifiers.")
@@ -153,7 +139,7 @@ def setup_arg_parser() -> argparse.ArgumentParser:
     )
 
     parser.add_argument(
-        "--results-root-dir",
+        "--results-dir",
         type=str,
         help="[string] Directory under which results will be saved.",
         required=True,
@@ -208,6 +194,9 @@ def main():
     if not executable_path.exists() or not executable_path.is_file():
         raise FileNotFoundError(f"Executable script not found at '{executable_path}'.")
 
+    # Set-up results directory
+    Path(args.results_dir).mkdir(parents=True, exist_ok=True)
+
     # Load experiment from JSON file if provided
     if args.experiment_json:
         print(f"Launching job for experiment at '{args.experiment_json}'...")
@@ -221,7 +210,7 @@ def main():
                 executable_path=executable_path.as_posix(),
                 model_name=model,
                 task=task,
-                results_root_dir=args.results_root_dir,
+                results_dir=args.results_dir,
                 **extra_kwargs,
             )
             for model in models

diff --git a/folktexts/cli/run_acs_benchmark.py b/folktexts/cli/run_acs_benchmark.py
@@ -24,7 +24,7 @@ def list_of_strings(arg):
     # List of command-line arguments, with type and helper string
     cli_args = [
         ("--model",         str, "[str] Model name or path to model saved on disk"),
-        ("--task-name",     str, "[str] Name of the ACS task to run the experiment on"),
+        ("--task",          str, "[str] Name of the ACS task to run the experiment on"),
         ("--results-dir",   str, "[str] Directory under which this experiment's results will be saved"),
         ("--data-dir",      str, "[str] Root folder to find datasets on"),
         ("--few-shot",      int, "[int] Use few-shot prompting with the given number of shots", False),
@@ -144,15 +144,20 @@ def main():
     bench = CalibrationBenchmark.make_acs_benchmark(
         model=model,
         tokenizer=tokenizer,
-        task_name=args.task_name,
+        task_name=args.task,
         data_dir=args.data_dir,
         config=config,
         subsampling=args.subsampling,
     )
 
-    # Create results directory if needed
-    results_dir = Path(args.results_dir).expanduser().resolve()
-    results_dir.mkdir(parents=False, exist_ok=True)
+    # Set-up results directory
+    from ._utils import get_or_create_results_dir
+    results_dir = get_or_create_results_dir(
+        model_name=Path(args.model).name,
+        task_name=args.task,
+        results_root_dir=args.results_dir,
+    )
+    logging.info(f"Saving results to {results_dir.as_posix()}")
 
     # Run benchmark
     bench.run(results_root_dir=results_dir, fit_threshold=args.fit_threshold)