From d8042233fc903e59149a9413fce180d6e8686963 Mon Sep 17 00:00:00 2001 From: AndreFCruz Date: Thu, 27 Jun 2024 11:33:14 +0200 Subject: [PATCH] cli script for evaluating feat imp --- .../cli/evaluate_llm_feature_importance.py | 94 ++++++++++++------- 1 file changed, 59 insertions(+), 35 deletions(-) diff --git a/folktexts/cli/evaluate_llm_feature_importance.py b/folktexts/cli/evaluate_llm_feature_importance.py index 12cf4b3..8792053 100755 --- a/folktexts/cli/evaluate_llm_feature_importance.py +++ b/folktexts/cli/evaluate_llm_feature_importance.py @@ -6,28 +6,28 @@ from lightgbm import LGBMClassifier from sklearn.inspection import permutation_importance +from folktexts.classifier import LLMClassifier +from folktexts.dataset import Dataset from folktexts.llm_utils import load_model_tokenizer, get_model_folder_path from folktexts._io import save_pickle # Local paths -ROOT_DIR = Path("/fast/groups/sf") # CLUSTER dir -# ROOT_DIR = Path("~").expanduser().resolve() # LOCAL dir +# DEFAULT_ROOT_DIR = Path("/fast/groups/sf") # CLUSTER dir +DEFAULT_ROOT_DIR = Path("~").expanduser().resolve() # LOCAL dir -MODELS_DIR = ROOT_DIR / "huggingface-models" -DATA_DIR = ROOT_DIR / "data" -RESULTS_ROOT_DIR = ROOT_DIR / "folktexts-results" +DEFAULT_MODELS_DIR = DEFAULT_ROOT_DIR / "huggingface-models" +DEFAULT_DATA_DIR = DEFAULT_ROOT_DIR / "data" +DEFAULT_RESULTS_DIR = Path("folktexts-results") -# MODEL_NAME = "meta-llama/Meta-Llama-3-8B" -MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct" -# MODEL_NAME = "google/gemma-2b" # NOTE: this is among the smallest models - -TASK_NAME = "ACSIncome" +DEFAULT_TASK_NAME = "ACSIncome" DEFAULT_CONTEXT_SIZE = 500 DEFAULT_BATCH_SIZE = 30 DEFAULT_SEED = 42 +DEFAULT_PERMUTATION_REPEATS = 5 + def setup_arg_parser() -> ArgumentParser: @@ -37,12 +37,15 @@ def setup_arg_parser() -> ArgumentParser: # List of command-line arguments, with type and helper string cli_args = [ ("--model", str, "[str] Model name or path to model saved on disk"), - ("--task-name", str, "[str] Name of the ACS task to run the experiment on"), - ("--results-dir", str, "[str] Directory under which this experiment's results will be saved"), - ("--data-dir", str, "[str] Root folder to find datasets on"), + ("--task-name", str, "[str] Name of the ACS task to run the experiment on", False, DEFAULT_TASK_NAME), + ("--results-dir", str, "[str] Directory under which this experiment's results will be saved", False, DEFAULT_RESULTS_DIR), + ("--data-dir", str, "[str] Root folder to find datasets on", False, DEFAULT_DATA_DIR), + ("--models-dir", str, "[str] Root folder to find huggingface models on", False, DEFAULT_MODELS_DIR), + ("--scorer", str, "[str] Name of the scorer to use for evaluation", False, "roc_auc"), ("--batch-size", int, "[int] The batch size to use for inference", False, DEFAULT_BATCH_SIZE), ("--context-size", int, "[int] The maximum context size when prompting the LLM", False, DEFAULT_CONTEXT_SIZE), ("--subsampling", float, "[float] Which fraction of the dataset to use (if omitted will use all data)", False), + ("--fit-threshold", int, "[int] Whether to fit the prediction threshold, and on how many samples", False), ("--seed", int, "[int] Random seed -- to set for reproducibility", False, DEFAULT_SEED), ] @@ -57,10 +60,18 @@ def setup_arg_parser() -> ArgumentParser: return parser -def compute_feature_importance(llm_clf, dataset): +def compute_feature_importance( + llm_clf: LLMClassifier, + dataset: Dataset, + scorer: str, + results_dir: Path, + fit_threshold=None, + seed=DEFAULT_SEED, +) -> dict: - # # Optionally, fit the LLM classifier's threshold on a few data samples. - # llm_clf.fit(*dataset[:1000]) + # Optionally, fit the LLM classifier's threshold on a few data samples. + if fit_threshold and isinstance(fit_threshold, int): + llm_clf.fit(*dataset[:fit_threshold]) # Get train and test data X_train, y_train = dataset.get_train() @@ -68,9 +79,9 @@ def compute_feature_importance(llm_clf, dataset): permutation_kwargs = dict( X=X_test, y=y_test, - scoring="roc_auc", - n_repeats=5, - random_state=SEED, + scoring=scorer, + n_repeats=DEFAULT_PERMUTATION_REPEATS, + random_state=seed, ) # Baseline: GBM feature importance @@ -78,13 +89,14 @@ def compute_feature_importance(llm_clf, dataset): gbm_clf.fit(X_train, y_train) r = permutation_importance(gbm_clf, **permutation_kwargs) - - save_pickle(obj=r, path=f"permutation-importance.{TASK_NAME}.GBM.pkl") + save_pickle( + obj=r, + path=results_dir / f"permutation-importance.{llm_clf.task.name}.GBM.pkl", + ) # Print results: print("GBM feature importance:") for i in r.importances_mean.argsort()[::-1]: - # if r.importances_mean[i] - 2 * r.importances_std[i] > 0: print( f"{X_test.columns[i]:<8}" f"{r.importances_mean[i]:.3f}" @@ -92,11 +104,13 @@ def compute_feature_importance(llm_clf, dataset): # LLM feature importance r = permutation_importance(llm_clf, **permutation_kwargs) - save_pickle(obj=r, path=f"permutation-importance.{TASK_NAME}.{llm_clf.model_name}.pkl") + save_pickle( + obj=r, + path=results_dir / f"permutation-importance.{llm_clf.task.name}.{llm_clf.model_name}.pkl", + ) print("LLM feature importance:") for i in r.importances_mean.argsort()[::-1]: - # if r.importances_mean[i] - 2 * r.importances_std[i] > 0: print( f"{X_test.columns[i]:<8}" f"{r.importances_mean[i]:.3f}" @@ -107,29 +121,31 @@ def compute_feature_importance(llm_clf, dataset): def main(): # Parse arguments from command line - args = setup_arg_parser().parse_args() # TODO: use args to set up the experiment + args = setup_arg_parser().parse_args() # Set logging level logging.getLogger().setLevel(logging.INFO) # Load model and tokenizer - model_folder_path = get_model_folder_path(model_name=MODEL_NAME, root_dir=MODELS_DIR) + model_folder_path = get_model_folder_path(model_name=args.model, root_dir=args.models_dir) model, tokenizer = load_model_tokenizer(model_folder_path) - results_dir = RESULTS_ROOT_DIR / Path(model_folder_path).name + # Set-up results directory + results_dir = Path(args.results_dir) / Path(model_folder_path).name results_dir.mkdir(exist_ok=True, parents=True) - results_dir + logging.info(f"Saving results to {results_dir.as_posix()}") # Load Task and Dataset from folktexts.acs import ACSTaskMetadata - task = ACSTaskMetadata.get_task(TASK_NAME) + task = ACSTaskMetadata.get_task(args.task_name) from folktexts.acs import ACSDataset - dataset = ACSDataset.make_from_task(task=task, cache_dir=DATA_DIR) + dataset = ACSDataset.make_from_task(task=task, cache_dir=args.data_dir) - # Optionally, subsample dataset # TODO: use command line argument - # dataset.subsample(0.1) - # print(f"{dataset.subsampling=}") + # Optionally, subsample dataset + if args.subsampling: + dataset.subsample(args.subsampling) # subsample in-place + logging.info(f"{dataset.subsampling=}") # Construct LLM Classifier from folktexts.classifier import LLMClassifier @@ -137,11 +153,19 @@ def main(): model=model, tokenizer=tokenizer, task=task, - batch_size=32, + batch_size=args.batch_size, + context_size=args.context_size, ) # Compute feature importance - compute_feature_importance(llm_clf, tokenizer, dataset) + compute_feature_importance( + llm_clf, + dataset=dataset, + scorer=args.scorer, + results_dir=results_dir, + fit_threshold=args.fit_threshold, + seed=args.seed, + ) if __name__ == "__main__":