Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: experiment make-status command v1 #129

Open
wants to merge 41 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
b904be6
status command v1 redo
gavin-ho1 Jul 27, 2024
49a4f16
implemented formatting
gavin-ho1 Jul 28, 2024
c47f3d9
fixed formatting
gavin-ho1 Jul 28, 2024
79ea6c1
refactored formatting to be cleaner
gavin-ho1 Jul 28, 2024
f8afa21
added total jobs counter and progress bar on the header
gavin-ho1 Jul 28, 2024
c81e73f
Final formatting and progress bars
gavin-ho1 Jul 28, 2024
5d7cf52
added color and bold formatting
gavin-ho1 Jul 28, 2024
1898295
minor fix to formatting
gavin-ho1 Jul 28, 2024
1706013
added bolding within filepath
gavin-ho1 Jul 28, 2024
8a298fa
update helper function
gavin-ho1 Jul 28, 2024
8493536
added color to number of jobs indicator
gavin-ho1 Jul 28, 2024
b604628
minor fix to number of jobs indicator
gavin-ho1 Jul 28, 2024
f1176a8
added comments to clarify code
gavin-ho1 Jul 28, 2024
c94eb6f
added --make argument
gavin-ho1 Jul 28, 2024
6c6cb78
Added implementation for --make argument
gavin-ho1 Jul 29, 2024
9aedd2f
edits to --make argument
gavin-ho1 Jul 29, 2024
33ddd78
reformatted title progress bar to be a dynamic length
gavin-ho1 Jul 29, 2024
0276c85
changed progress bar colors for better visibility
gavin-ho1 Jul 30, 2024
fa1f96e
fixed bug where no experiments made would error out
gavin-ho1 Jul 30, 2024
6c959df
removed --make subcommand
gavin-ho1 Jul 30, 2024
ba615be
added system argument
gavin-ho1 Jul 31, 2024
bac4cb8
filtered by completeness
gavin-ho1 Jul 31, 2024
0a6a3a6
bug fix
gavin-ho1 Jul 31, 2024
726a38d
redid filter for completeness
gavin-ho1 Jul 31, 2024
be168b6
added system filtering
gavin-ho1 Jul 31, 2024
71fd7e1
Aryav's changes
gavin-ho1 Aug 1, 2024
36230b8
refactored filter by completion
gavin-ho1 Aug 1, 2024
5fccf64
added make-status subcommand
gavin-ho1 Aug 1, 2024
d4b5a1d
added make_status implementation
gavin-ho1 Aug 1, 2024
13bb230
refactored colors to return strings instead of print (this is useful …
gavin-ho1 Aug 1, 2024
c2df7a2
fixed dynamic title loading bar size
gavin-ho1 Aug 1, 2024
6ac17fd
minor fix
gavin-ho1 Aug 1, 2024
d3f9f11
deleted make-status (implemented wrong), and should be seperate branch
gavin-ho1 Aug 1, 2024
41f41cf
bugfix: refactored some print statements in the main.py file that use…
gavin-ho1 Aug 2, 2024
f552e85
added command
gavin-ho1 Aug 9, 2024
6084b34
implemented list print
gavin-ho1 Aug 9, 2024
9802cb6
proof of concept to show that experiments already made are deleted wh…
gavin-ho1 Aug 9, 2024
f91b25f
v1 final with comments
gavin-ho1 Aug 9, 2024
78ef9c2
made the print prettier
gavin-ho1 Aug 9, 2024
3f1d96a
added hint
gavin-ho1 Aug 9, 2024
93db9ef
got rid of commented code from a while back
gavin-ho1 Aug 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
254 changes: 241 additions & 13 deletions experiments/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@

import argparse
import os
import json
from .utils import algo_dict, load_from_json, system_dict, challenge_dicts
from .main import load_data, run_challenge, make_plots, save_config, prGreen, prPink
from .main import load_data, run_challenge, make_plots, save_config, green, pink, cyan, red, loadingBar, bold
from dynadojo.challenges import FixedError, FixedComplexity, FixedTrainSize


Expand All @@ -64,10 +65,13 @@
plot_parser = subparsers.add_parser('plot', help='Plot an experiment results')
check_parser = subparsers.add_parser('check', help='Check for missing jobs')
scale_parser = subparsers.add_parser('scale', help='Temporary utility which rescales losses by dimensionality')
status_parser = subparsers.add_parser('status', help='List all available config.json files that you have already made')
make_status_parser = subparsers.add_parser('make_status', help='List all the available experiments you can call ‘make’ on.')

# Accept command line arguments
make_parser.add_argument('--algo', type=str, default='lr', help='Specify which algo to run')
make_parser.add_argument('--system', type=str, default='lds', choices=system_dict.keys(), help='Specify which system to run')

make_parser.add_argument('--challenge', type=str, default="fc", choices=["fc", "fts", "fe"], help='Specify which challenge to run')
make_parser.add_argument('--output_dir', type=str, default="experiments/outputs", help='where to save config')
make_parser.add_argument('--all', action='store_true', help='if True, make all params')
Expand All @@ -89,9 +93,15 @@

scale_parser.add_argument('--data_dir', type=str, help='where to load results from')

status_parser.add_argument('--is_complete', type=str, choices=['true','false'],help='filter by completed experiments')
status_parser.add_argument('--algo', type=str, help='Specify which algo to filter through')
status_parser.add_argument('--system', type=str, help='Specify which system to filter through')
status_parser.add_argument('--challenge', type=str, choices=["fc", "fts", "fe"], help='Specify which challenge to filter through')
status_parser.set_defaults(make=False)

args, rest = program.parse_known_args()

if args.command == 'make':
if args.command == 'make':
if args.all:
for c, chall_dict in challenge_dicts.values():
for s in chall_dict.keys():
Expand All @@ -100,7 +110,7 @@
if a != "default":
print(f"Making {c.__name__} {s} {a}")
config_file, total_jobs = save_config(s, a, challenge_cls=c, output_dir=args.output_dir)
prPink(f"{config_file} with {total_jobs} jobs")
print(pink(f"{config_file} with {total_jobs} jobs"))
else:
assert args.algo.split("_")[0] in algo_dict.keys(), f"algo {args.algo} must be in algo_dict"
if args.challenge == "fc":
Expand All @@ -110,7 +120,7 @@
else:
challenge_cls = FixedError
config_file, total_jobs = save_config(args.system, args.algo, challenge_cls, output_dir=args.output_dir)
prPink(f"{config_file} with {total_jobs} jobs")
print(pink(f"{config_file} with {total_jobs} jobs"))
if rest: #maybe parse more args
args = program.parse_args(rest)
if args.command == 'run':
Expand All @@ -126,15 +136,15 @@
total_jobs = config["total_jobs"]
_, data = load_data(os.path.join(args.output_dir, config["folder_path"]))
if data is None:
prGreen("No previous jobs found.")
print(green("No previous jobs found."))
args.jobs = None
else:
completed_jobs = data['job_id'].drop_duplicates().to_list()
missing_jobs = [i for i in range(total_jobs) if i not in completed_jobs]
if len(missing_jobs) == 0:
prGreen("All jobs already completed. Exiting.")
print(green("All jobs already completed. Exiting."))
exit(0)
prGreen(f"{len(missing_jobs)} missing jobs found. Only running missing jobs.")
print(green(f"{len(missing_jobs)} missing jobs found. Only running missing jobs."))
args.jobs = ','.join(map(str, missing_jobs))

if args.node is not None and args.total_nodes > 1:
Expand All @@ -147,7 +157,7 @@
jobs_filter=[int(j) for j in args.jobs.split(",")] if args.jobs else None
)
else: # run the whole challenge
prGreen(f"Running {len(args.jobs.split(',')) if args.jobs else 'all'} jobs.")
print(green(f"Running {len(args.jobs.split(',')) if args.jobs else 'all'} jobs."))
run_challenge(
config_file_path=args.config_file,
output_dir=args.output_dir,
Expand Down Expand Up @@ -178,12 +188,11 @@
completed_jobs = data['job_id'].drop_duplicates().to_list()
missing_jobs = [i for i in range(total_jobs) if i not in completed_jobs]
if len(missing_jobs) == 0:
prGreen("All jobs completed.")
print(green("All jobs completed."))
exit(0)
print(f"Num of missing jobs: \t {len(missing_jobs)} of {total_jobs}")
print(f"Missing jobs: \n{','.join(map(str, missing_jobs))}")


elif args.command == 'scale':
assert args.data_dir is not None, "must specify data directory"
files, data = load_data(args.data_dir)
Expand All @@ -192,18 +201,237 @@
try:
os.makedirs(data_dir_unscaled, exist_ok=False)
except FileExistsError:
prPink(f"Exiting...Already scaled data. {data_dir_unscaled} already exists. ")
print(pink(f"Exiting...Already scaled data. {data_dir_unscaled} already exists. "))
exit(0)

# move all csv files in data_dir to data_dir_unscaled
for filepath in files:
os.rename(filepath, data_dir_unscaled + "/" + os.path.basename(filepath))
prGreen(f"Original data moved to {data_dir_unscaled}")
print(green(f"Original data moved to {data_dir_unscaled}"))

# rescale all losses by dimensionality
data['error'] = data['error'] * data['latent_dim']
data['ood_error'] = data['ood_error'] * data['latent_dim']

# save the new data as csv file in data_dir
data.to_csv(args.data_dir + "/data.csv", index=False)
prGreen(f"Rescaled data saved to {args.data_dir}/data.csv")
print(green(f"Rescaled data saved to {args.data_dir}/data.csv"))

elif args.command == 'status':
experiment_list = [] # all the config.json files in the outputs folder
experiment_dict = {}
algo_filter = args.algo
system_filter = args.system
challenge_filter = args.challenge
complete_filter = args.is_complete

directory_path = 'experiments/outputs'

# Find all 'config.json' files, add filepath to a list, sorted by challenge type
for dirpath, dirnames, filenames in os.walk(directory_path):
for file in filenames:
if file.endswith('config.json'):
file_path = os.path.join(dirpath, file)
f = open(file_path, 'r')
experiment = json.load(f)

algo_cls = experiment.get('evaluate', {}).get('algo_cls', {})
algo_cls_name = algo_cls.get('class_name', '')
system_cls = experiment.get('challenge', {}).get('system_cls', {})
system_cls_name = system_cls.get('class_name', '')
challenge_cls = experiment.get('challenge_cls', {})
challenge_cls_name = challenge_cls.get('class_name', '')

# Check algorithm if filter is set
if algo_filter:
algo_filter_name = {
'lr': 'LinearRegression',
'dnn': 'DNN',
'sindy': 'SINDy'
}.get(algo_filter, '')
if algo_cls_name != algo_filter_name:
continue

# Check system if filter is set
if system_filter:
system_filter_name = {
'lds': 'LDSystem',
'lorenz': 'LorenzSystem',
'lv_p': 'PreyPredatorSystem',
'epi_1': 'LorenzSystem',
'nbody': 'LorenzSystem',
'heat': 'LorenzSystem',
'fbsnn_1': 'LorenzSystem',
'fbsnn_2': 'LorenzSystem',
'ctln': 'LorenzSystem',
'kura': 'LorenzSystem'
}.get(system_filter, '')
if system_cls_name != system_filter_name:
continue

# Check challenge if filter is set
if challenge_filter:
challenge_filter_name = {
'fc': 'FixedComplexity',
'fts': 'FixedTrainSize',
'fe': 'FixedError'
}.get(challenge_filter, '')
if challenge_cls_name != challenge_filter_name:
continue


experiment_type = experiment['challenge_cls']['class_name']
total_jobs = experiment.get("total_jobs", 0)
_, data = load_data(dirpath, print_status=False)
if data is None:
completed_jobs = []
else:
completed_jobs = data['job_id'].drop_duplicates().to_list()

complete_cls = str(len(completed_jobs) == total_jobs).lower()

#Check complete if filter is set
if complete_filter:
if complete_cls != complete_filter:
continue

# Sort
if experiment_type in experiment_dict.keys():
experiment_dict[experiment_type].append({
'total_jobs': total_jobs,
'complete_jobs': len(completed_jobs),
'folder_path': file_path
})
else:
experiment_dict[experiment['challenge_cls']['class_name']] = [{
'total_jobs': total_jobs,
'complete_jobs': len(completed_jobs),
'folder_path': file_path
}]

# Check if experiments made
if not experiment_dict:
print(cyan(bold('No experiments made')))
print(bold('Experiment configs available: 0'), end=' ')
print(loadingBar(0, 1, 40))

# Hints to make experiments
print('\033[1;31m'+'To make an experiment:'+'\033[0m')
print('\033[0;31m'+' python -m experiments make'+'\033[0m')

else:
# Determine max length for formatting, and count total jobs
max_length = 0
max_length_job = 0
all_jobs = 0
all_finished_jobs = 0
job_dict = {}

# Get max length for formatting
# Get all jobs for each experiment type (for progress bar)
for challenge_type in experiment_dict.keys():
output_list = [path for path in experiment_dict[challenge_type]]

# Format job numbers red or green depending on status
max_length = max(max_length, max(len(' ' + path['folder_path']) for path in output_list))
max_length_job = max(max_length_job, max(len(f"{path['complete_jobs']} / {path['total_jobs']} Jobs") for path in output_list))

all_jobs += sum(jobs['total_jobs'] for jobs in experiment_dict[challenge_type])
all_finished_jobs += sum(jobs['complete_jobs'] for jobs in experiment_dict[challenge_type])
job_dict[challenge_type] = {'all_jobs': all_jobs, 'all_completed_jobs': all_finished_jobs}

max_title = max(len(challenge_type) for challenge_type in experiment_dict.keys())

# Print Title
print(bold(f'Experiment configs available: {all_jobs}'), end=' ')
print(loadingBar(all_finished_jobs, all_jobs, max_length + max_length_job - len(f'Experiment configs available: {all_jobs}') + 9))
print('\033[1;31m'+'To run an experiment:'+'\033[0m')
print('\033[0;31m'+' python -m experiments run --config_file <name>\n'+'\033[0m')

# Print paths by Challenge Type
for challenge_type in experiment_dict.keys():
print(bold(f"{challenge_type}: " + ' ' * (max_title - len(challenge_type)) + str(len(experiment_dict[challenge_type]))), end=' ')
print(loadingBar(job_dict[challenge_type]['all_completed_jobs'], job_dict[challenge_type]['all_jobs'], 20))

output_list = [path for path in experiment_dict[challenge_type]]

# Print paths
for path in output_list:
output = path['folder_path']

# Bolding experiment part of the filepath
output_bold = str(output).split('/')
output_bold[-2] = bold(output_bold[-2], color='\033[96m')
output_str = ''
for out in output_bold:
output_str += out + '/'
output_str = cyan(output_str[0:-1])

print(' '+output_str+' '*((max_length-len(output)+(max_length_job-len(str(path['complete_jobs'])+' / '+str(path['total_jobs'])+' Jobs')))), end ='')

# Print number of jobs + progress bar
if path['complete_jobs'] == path['total_jobs']:
print(green(f'{path["complete_jobs"]}'), '/', green(f'{path["total_jobs"]}'), end=' ')
else:
print(red(f'{path["complete_jobs"]}'), '/', red(f'{path["total_jobs"]}'), end=' ')

print(loadingBar(path['complete_jobs'], path['total_jobs'], 10))
print()

elif args.command == 'make_status':
experiment_list = [] # all the config.json files in the outputs folder
experiment_dict = {}

directory_path = 'experiments/outputs'

#Pull challenge, system, and algos using same os.walk method from the status command
for dirpath, dirnames, filenames in os.walk(directory_path):
for file in filenames:
if file.endswith('config.json'):
file_path = os.path.join(dirpath, file)
split_path = file_path.split('/')
challenge = split_path[2]
system = split_path[3]
algo_split = split_path[4].split('_')
algo_split1 = algo_split[3].split('=')[0]
algo = f'{algo_split[2]}_{algo_split1}'
experiment_list.append({
'challenge' : challenge,
'system' : system,
'algo' : algo
})

#Fetch experiment information from params.py and utils.py, loop through the various lists and dictionaries to pull out all possible combinations of experiments

#Print Hint
print(red(bold('To make an experiment:')))
print(red(' python -m experiments make --challenge <challenge> --system <system> --algo <algo>\n'))

#Print Title
print(cyan(bold('List of all possible experiments that can be made:')))

chall_list = challenge_dicts.keys()
for challenge in chall_list:
_, sys_dict = challenge_dicts[challenge]
sys_list = sys_dict.keys()
for system in sys_list:
if system == 'default':
continue
algo_list = sys_dict[system].keys()
for algo in algo_list:
#Final Print
out_str = ''
if challenge != 'default':
out_str += f'--challenge = {challenge} '
if system != 'default':
out_str += f'--system = {system} '
if algo != 'default':
out_str += f'--algo = {algo} '

pr = True
for experiment in experiment_list:
if challenge == experiment['challenge'] and system == experiment['system'] and algo == experiment['algo']:
pr = False
if pr:
print(cyan(f' {out_str}'))

Loading
Loading