Skip to content

Commit

Permalink
standardizing results folkder structure
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreFCruz committed Jun 27, 2024
1 parent 620c0bc commit c081a31
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 28 deletions.
14 changes: 14 additions & 0 deletions folktexts/cli/_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,20 @@
"""Utils for the folktexts cmd-line interface.
"""
from __future__ import annotations
import logging

from pathlib import Path


def get_or_create_results_dir(model_name: str, task_name: str, results_root_dir: str | Path) -> Path:
"""Constructs a results directory path from model and task names."""
results_dir = Path(results_root_dir).expanduser().resolve()
results_dir /= f"model-{model_name}_task-{task_name}"
if not results_dir.exists():
logging.info(f"Creating results directory '{results_dir}'.")
results_dir.mkdir(parents=True, exist_ok=True)

return results_dir


def cmd_line_args_to_kwargs(cmdline_args: list) -> dict:
Expand Down
13 changes: 9 additions & 4 deletions folktexts/cli/eval_feature_importance.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

DEFAULT_MODELS_DIR = DEFAULT_ROOT_DIR / "huggingface-models"
DEFAULT_DATA_DIR = DEFAULT_ROOT_DIR / "data"
DEFAULT_RESULTS_DIR = Path("folktexts-results")
DEFAULT_RESULTS_DIR = Path(".")

DEFAULT_TASK_NAME = "ACSIncome"

Expand All @@ -35,7 +35,7 @@ def setup_arg_parser() -> ArgumentParser:
# List of command-line arguments, with type and helper string
cli_args = [
("--model", str, "[str] Model name or path to model saved on disk"),
("--task", str, "[str] Name of the ACS task to run the experiment on", False, DEFAULT_TASK_NAME),
("--task", str, "[str] Name of the ACS task to run the experiment on", False, DEFAULT_TASK_NAME),
("--results-dir", str, "[str] Directory under which this experiment's results will be saved", False, DEFAULT_RESULTS_DIR),
("--data-dir", str, "[str] Root folder to find datasets on", False, DEFAULT_DATA_DIR),
("--models-dir", str, "[str] Root folder to find huggingface models on", False, DEFAULT_MODELS_DIR),
Expand Down Expand Up @@ -138,8 +138,13 @@ def main():
model, tokenizer = load_model_tokenizer(model_folder_path)

# Create results directory if needed
results_dir = Path(args.results_dir).expanduser().resolve()
results_dir.mkdir(parents=False, exist_ok=True)
# Set-up results directory
from ._utils import get_or_create_results_dir
results_dir = get_or_create_results_dir(
model_name=Path(args.model).name,
task_name=args.task,
results_root_dir=args.results_dir,
)
logging.info(f"Saving results to {results_dir.as_posix()}")

# Load Task and Dataset
Expand Down
27 changes: 8 additions & 19 deletions folktexts/cli/launch_experiments_htcondor.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@
# Models save directory
MODELS_DIR = ROOT_DIR / "huggingface-models"

# Path to the executable script to run
# EXECUTABLE_PATH = Path(__file__).parent.resolve() / "run_acs_benchmark.py"
# EXECUTABLE_PATH = Path(__file__).parent.resolve() / "eval_feature_importance.py"

##################
# Global configs #
Expand Down Expand Up @@ -78,7 +75,7 @@ def make_llm_clf_experiment(
executable_path: str,
model_name: str,
task: str,
results_root_dir: str,
results_dir: str,
**kwargs,
) -> Experiment:
"""Create an experiment object to run.
Expand Down Expand Up @@ -114,33 +111,22 @@ def make_llm_clf_experiment(
kwargs=dict(
model=model_path,
task=task,
results_dir=results_dir,
**experiment_kwargs,
),
**job_kwargs,
)

# Create LLM results directory
exp_results_dir = Path(results_root_dir) / get_llm_results_folder(exp)
exp_results_dir.mkdir(exist_ok=True, parents=True)
exp.kwargs["results_dir"] = exp_results_dir.as_posix()
save_json(
obj=exp.to_dict(),
path=exp_results_dir / "experiment.json",
path=Path(results_dir) / f"experiment.{exp.hash()}.json",
overwrite=True,
)

return exp


def get_llm_results_folder(exp: Experiment) -> str:
"""Create a unique experiment name.
"""
return (
f"model-{Path(exp.model).name}."
f"dataset-{exp.task}"
)


def setup_arg_parser() -> argparse.ArgumentParser:
# Init parser
parser = argparse.ArgumentParser(description="Launch experiments to evaluate LLMs as classifiers.")
Expand All @@ -153,7 +139,7 @@ def setup_arg_parser() -> argparse.ArgumentParser:
)

parser.add_argument(
"--results-root-dir",
"--results-dir",
type=str,
help="[string] Directory under which results will be saved.",
required=True,
Expand Down Expand Up @@ -208,6 +194,9 @@ def main():
if not executable_path.exists() or not executable_path.is_file():
raise FileNotFoundError(f"Executable script not found at '{executable_path}'.")

# Set-up results directory
Path(args.results_dir).mkdir(parents=True, exist_ok=True)

# Load experiment from JSON file if provided
if args.experiment_json:
print(f"Launching job for experiment at '{args.experiment_json}'...")
Expand All @@ -221,7 +210,7 @@ def main():
executable_path=executable_path.as_posix(),
model_name=model,
task=task,
results_root_dir=args.results_root_dir,
results_dir=args.results_dir,
**extra_kwargs,
)
for model in models
Expand Down
15 changes: 10 additions & 5 deletions folktexts/cli/run_acs_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def list_of_strings(arg):
# List of command-line arguments, with type and helper string
cli_args = [
("--model", str, "[str] Model name or path to model saved on disk"),
("--task-name", str, "[str] Name of the ACS task to run the experiment on"),
("--task", str, "[str] Name of the ACS task to run the experiment on"),
("--results-dir", str, "[str] Directory under which this experiment's results will be saved"),
("--data-dir", str, "[str] Root folder to find datasets on"),
("--few-shot", int, "[int] Use few-shot prompting with the given number of shots", False),
Expand Down Expand Up @@ -144,15 +144,20 @@ def main():
bench = CalibrationBenchmark.make_acs_benchmark(
model=model,
tokenizer=tokenizer,
task_name=args.task_name,
task_name=args.task,
data_dir=args.data_dir,
config=config,
subsampling=args.subsampling,
)

# Create results directory if needed
results_dir = Path(args.results_dir).expanduser().resolve()
results_dir.mkdir(parents=False, exist_ok=True)
# Set-up results directory
from ._utils import get_or_create_results_dir
results_dir = get_or_create_results_dir(
model_name=Path(args.model).name,
task_name=args.task,
results_root_dir=args.results_dir,
)
logging.info(f"Saving results to {results_dir.as_posix()}")

# Run benchmark
bench.run(results_root_dir=results_dir, fit_threshold=args.fit_threshold)
Expand Down

0 comments on commit c081a31

Please sign in to comment.