diff --git a/folktexts/cli/_utils.py b/folktexts/cli/_utils.py index d135583..33ebda9 100644 --- a/folktexts/cli/_utils.py +++ b/folktexts/cli/_utils.py @@ -1,6 +1,20 @@ """Utils for the folktexts cmd-line interface. """ from __future__ import annotations +import logging + +from pathlib import Path + + +def get_or_create_results_dir(model_name: str, task_name: str, results_root_dir: str | Path) -> Path: + """Constructs a results directory path from model and task names.""" + results_dir = Path(results_root_dir).expanduser().resolve() + results_dir /= f"model-{model_name}_task-{task_name}" + if not results_dir.exists(): + logging.info(f"Creating results directory '{results_dir}'.") + results_dir.mkdir(parents=True, exist_ok=True) + + return results_dir def cmd_line_args_to_kwargs(cmdline_args: list) -> dict: diff --git a/folktexts/cli/eval_feature_importance.py b/folktexts/cli/eval_feature_importance.py index 0f8cfa2..883c3d5 100755 --- a/folktexts/cli/eval_feature_importance.py +++ b/folktexts/cli/eval_feature_importance.py @@ -16,7 +16,7 @@ DEFAULT_MODELS_DIR = DEFAULT_ROOT_DIR / "huggingface-models" DEFAULT_DATA_DIR = DEFAULT_ROOT_DIR / "data" -DEFAULT_RESULTS_DIR = Path("folktexts-results") +DEFAULT_RESULTS_DIR = Path(".") DEFAULT_TASK_NAME = "ACSIncome" @@ -35,7 +35,7 @@ def setup_arg_parser() -> ArgumentParser: # List of command-line arguments, with type and helper string cli_args = [ ("--model", str, "[str] Model name or path to model saved on disk"), - ("--task", str, "[str] Name of the ACS task to run the experiment on", False, DEFAULT_TASK_NAME), + ("--task", str, "[str] Name of the ACS task to run the experiment on", False, DEFAULT_TASK_NAME), ("--results-dir", str, "[str] Directory under which this experiment's results will be saved", False, DEFAULT_RESULTS_DIR), ("--data-dir", str, "[str] Root folder to find datasets on", False, DEFAULT_DATA_DIR), ("--models-dir", str, "[str] Root folder to find huggingface models on", False, DEFAULT_MODELS_DIR), @@ -138,8 +138,13 @@ def main(): model, tokenizer = load_model_tokenizer(model_folder_path) # Create results directory if needed - results_dir = Path(args.results_dir).expanduser().resolve() - results_dir.mkdir(parents=False, exist_ok=True) + # Set-up results directory + from ._utils import get_or_create_results_dir + results_dir = get_or_create_results_dir( + model_name=Path(args.model).name, + task_name=args.task, + results_root_dir=args.results_dir, + ) logging.info(f"Saving results to {results_dir.as_posix()}") # Load Task and Dataset diff --git a/folktexts/cli/launch_experiments_htcondor.py b/folktexts/cli/launch_experiments_htcondor.py index d6e08dc..fece4bd 100755 --- a/folktexts/cli/launch_experiments_htcondor.py +++ b/folktexts/cli/launch_experiments_htcondor.py @@ -33,9 +33,6 @@ # Models save directory MODELS_DIR = ROOT_DIR / "huggingface-models" -# Path to the executable script to run -# EXECUTABLE_PATH = Path(__file__).parent.resolve() / "run_acs_benchmark.py" -# EXECUTABLE_PATH = Path(__file__).parent.resolve() / "eval_feature_importance.py" ################## # Global configs # @@ -78,7 +75,7 @@ def make_llm_clf_experiment( executable_path: str, model_name: str, task: str, - results_root_dir: str, + results_dir: str, **kwargs, ) -> Experiment: """Create an experiment object to run. @@ -114,33 +111,22 @@ def make_llm_clf_experiment( kwargs=dict( model=model_path, task=task, + results_dir=results_dir, **experiment_kwargs, ), **job_kwargs, ) # Create LLM results directory - exp_results_dir = Path(results_root_dir) / get_llm_results_folder(exp) - exp_results_dir.mkdir(exist_ok=True, parents=True) - exp.kwargs["results_dir"] = exp_results_dir.as_posix() save_json( obj=exp.to_dict(), - path=exp_results_dir / "experiment.json", + path=Path(results_dir) / f"experiment.{exp.hash()}.json", overwrite=True, ) return exp -def get_llm_results_folder(exp: Experiment) -> str: - """Create a unique experiment name. - """ - return ( - f"model-{Path(exp.model).name}." - f"dataset-{exp.task}" - ) - - def setup_arg_parser() -> argparse.ArgumentParser: # Init parser parser = argparse.ArgumentParser(description="Launch experiments to evaluate LLMs as classifiers.") @@ -153,7 +139,7 @@ def setup_arg_parser() -> argparse.ArgumentParser: ) parser.add_argument( - "--results-root-dir", + "--results-dir", type=str, help="[string] Directory under which results will be saved.", required=True, @@ -208,6 +194,9 @@ def main(): if not executable_path.exists() or not executable_path.is_file(): raise FileNotFoundError(f"Executable script not found at '{executable_path}'.") + # Set-up results directory + Path(args.results_dir).mkdir(parents=True, exist_ok=True) + # Load experiment from JSON file if provided if args.experiment_json: print(f"Launching job for experiment at '{args.experiment_json}'...") @@ -221,7 +210,7 @@ def main(): executable_path=executable_path.as_posix(), model_name=model, task=task, - results_root_dir=args.results_root_dir, + results_dir=args.results_dir, **extra_kwargs, ) for model in models diff --git a/folktexts/cli/run_acs_benchmark.py b/folktexts/cli/run_acs_benchmark.py index be3b9b7..b365ae3 100755 --- a/folktexts/cli/run_acs_benchmark.py +++ b/folktexts/cli/run_acs_benchmark.py @@ -24,7 +24,7 @@ def list_of_strings(arg): # List of command-line arguments, with type and helper string cli_args = [ ("--model", str, "[str] Model name or path to model saved on disk"), - ("--task-name", str, "[str] Name of the ACS task to run the experiment on"), + ("--task", str, "[str] Name of the ACS task to run the experiment on"), ("--results-dir", str, "[str] Directory under which this experiment's results will be saved"), ("--data-dir", str, "[str] Root folder to find datasets on"), ("--few-shot", int, "[int] Use few-shot prompting with the given number of shots", False), @@ -144,15 +144,20 @@ def main(): bench = CalibrationBenchmark.make_acs_benchmark( model=model, tokenizer=tokenizer, - task_name=args.task_name, + task_name=args.task, data_dir=args.data_dir, config=config, subsampling=args.subsampling, ) - # Create results directory if needed - results_dir = Path(args.results_dir).expanduser().resolve() - results_dir.mkdir(parents=False, exist_ok=True) + # Set-up results directory + from ._utils import get_or_create_results_dir + results_dir = get_or_create_results_dir( + model_name=Path(args.model).name, + task_name=args.task, + results_root_dir=args.results_dir, + ) + logging.info(f"Saving results to {results_dir.as_posix()}") # Run benchmark bench.run(results_root_dir=results_dir, fit_threshold=args.fit_threshold)