-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathfinetuning_script.sh
executable file
·13 lines (12 loc) · 1.45 KB
/
finetuning_script.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
for behavior in "hallucination" "myopic-reward" "sycophancy" "survival-instinct" "refusal" "corrigible-neutral-HHH" "coordinate-other-ais"
do
# python finetune_llama.py --behavior $behavior --direction pos
# python finetune_llama.py --behavior $behavior --direction neg
# python prompting_with_steering.py --layers 13 --multipliers -1 0 1 --type open_ended --override_model_weights_path finetuned_models/${behavior}_pos_finetune_all.pt --behaviors $behavior
# python prompting_with_steering.py --layers 13 --multipliers -1 0 1 --type open_ended --override_model_weights_path finetuned_models/${behavior}_neg_finetune_all.pt --behaviors $behavior
# python scoring.py
python plot_results.py --layers 13 --multipliers -1 0 1 --type open_ended --override_weights finetuned_models/${behavior}_pos_finetune_all.pt finetuned_models/${behavior}_neg_finetune_all.pt --behaviors $behavior
# python prompting_with_steering.py --layers 13 --multipliers -1 0 1 --type ab --override_model_weights_path finetuned_models/${behavior}_pos_finetune_all.pt --behaviors $behavior
# python prompting_with_steering.py --layers 13 --multipliers -1 0 1 --type ab --override_model_weights_path finetuned_models/${behavior}_neg_finetune_all.pt --behaviors $behavior
python plot_results.py --layers 13 --multipliers -1 0 1 --type ab --override_weights finetuned_models/${behavior}_pos_finetune_all.pt finetuned_models/${behavior}_neg_finetune_all.pt --behaviors $behavior
done