Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: CLI status command v2 #122

Open
wants to merge 34 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
22daf17
status command v1 redo
gavin-ho1 Jul 27, 2024
dcbe242
implemented formatting
gavin-ho1 Jul 28, 2024
943e9c2
fixed formatting
gavin-ho1 Jul 28, 2024
88e5930
refactored formatting to be cleaner
gavin-ho1 Jul 28, 2024
e84243f
added total jobs counter and progress bar on the header
gavin-ho1 Jul 28, 2024
7e3a85f
Final formatting and progress bars
gavin-ho1 Jul 28, 2024
ce538a7
added color and bold formatting
gavin-ho1 Jul 28, 2024
bbb06d4
minor fix to formatting
gavin-ho1 Jul 28, 2024
88ec488
added bolding within filepath
gavin-ho1 Jul 28, 2024
c9ee614
update helper function
gavin-ho1 Jul 28, 2024
4316021
added color to number of jobs indicator
gavin-ho1 Jul 28, 2024
a780b46
minor fix to number of jobs indicator
gavin-ho1 Jul 28, 2024
09efbc3
added comments to clarify code
gavin-ho1 Jul 28, 2024
e9d7c26
added --make argument
gavin-ho1 Jul 28, 2024
33b65a5
Added implementation for --make argument
gavin-ho1 Jul 29, 2024
0f2ed0a
edits to --make argument
gavin-ho1 Jul 29, 2024
e80ce00
reformatted title progress bar to be a dynamic length
gavin-ho1 Jul 29, 2024
da7c91f
changed progress bar colors for better visibility
gavin-ho1 Jul 30, 2024
a0cbcce
fixed bug where no experiments made would error out
gavin-ho1 Jul 30, 2024
637bd14
removed --make subcommand
gavin-ho1 Jul 30, 2024
5ed4a29
added system argument
gavin-ho1 Jul 31, 2024
7c700e3
filtered by completeness
gavin-ho1 Jul 31, 2024
288d26f
bug fix
gavin-ho1 Jul 31, 2024
ceb943e
redid filter for completeness
gavin-ho1 Jul 31, 2024
24eb98c
added system filtering
gavin-ho1 Jul 31, 2024
c8e9bb0
Aryav's changes
gavin-ho1 Aug 1, 2024
ebec61b
refactored filter by completion
gavin-ho1 Aug 1, 2024
f83d3ed
added make-status subcommand
gavin-ho1 Aug 1, 2024
329e180
added make_status implementation
gavin-ho1 Aug 1, 2024
bad53dd
refactored colors to return strings instead of print (this is useful …
gavin-ho1 Aug 1, 2024
8bf943b
fixed dynamic title loading bar size
gavin-ho1 Aug 1, 2024
0704bf9
minor fix
gavin-ho1 Aug 1, 2024
c17f210
deleted make-status (implemented wrong), and should be seperate branch
gavin-ho1 Aug 1, 2024
d40918d
bugfix: refactored some print statements in the main.py file that use…
gavin-ho1 Aug 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 183 additions & 13 deletions experiments/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@

import argparse
import os
import json
from .utils import algo_dict, load_from_json, system_dict, challenge_dicts
from .main import load_data, run_challenge, make_plots, save_config, prGreen, prPink
from .main import load_data, run_challenge, make_plots, save_config, green, pink, cyan, red, loadingBar, bold
from dynadojo.challenges import FixedError, FixedComplexity, FixedTrainSize


Expand All @@ -64,10 +65,12 @@
plot_parser = subparsers.add_parser('plot', help='Plot an experiment results')
check_parser = subparsers.add_parser('check', help='Check for missing jobs')
scale_parser = subparsers.add_parser('scale', help='Temporary utility which rescales losses by dimensionality')
status_parser =subparsers.add_parser('status', help='List all available config.json files that you have already made')

# Accept command line arguments
make_parser.add_argument('--algo', type=str, default='lr', help='Specify which algo to run')
make_parser.add_argument('--system', type=str, default='lds', choices=system_dict.keys(), help='Specify which system to run')

make_parser.add_argument('--challenge', type=str, default="fc", choices=["fc", "fts", "fe"], help='Specify which challenge to run')
make_parser.add_argument('--output_dir', type=str, default="experiments/outputs", help='where to save config')
make_parser.add_argument('--all', action='store_true', help='if True, make all params')
Expand All @@ -89,9 +92,16 @@

scale_parser.add_argument('--data_dir', type=str, help='where to load results from')

# status_parser.add_argument('--system', type=str, default=None, choices=system_dict.keys(), help='filter by system')
status_parser.add_argument('--is_complete', type=str, choices=['true','false'],help='filter by completed experiments')
status_parser.add_argument('--algo', type=str, help='Specify which algo to filter through')
status_parser.add_argument('--system', type=str, help='Specify which system to filter through')
status_parser.add_argument('--challenge', type=str, choices=["fc", "fts", "fe"], help='Specify which challenge to filter through')
status_parser.set_defaults(make=False)

args, rest = program.parse_known_args()

if args.command == 'make':
if args.command == 'make':
if args.all:
for c, chall_dict in challenge_dicts.values():
for s in chall_dict.keys():
Expand All @@ -100,7 +110,7 @@
if a != "default":
print(f"Making {c.__name__} {s} {a}")
config_file, total_jobs = save_config(s, a, challenge_cls=c, output_dir=args.output_dir)
prPink(f"{config_file} with {total_jobs} jobs")
print(pink(f"{config_file} with {total_jobs} jobs"))
else:
assert args.algo.split("_")[0] in algo_dict.keys(), f"algo {args.algo} must be in algo_dict"
if args.challenge == "fc":
Expand All @@ -110,7 +120,7 @@
else:
challenge_cls = FixedError
config_file, total_jobs = save_config(args.system, args.algo, challenge_cls, output_dir=args.output_dir)
prPink(f"{config_file} with {total_jobs} jobs")
print(pink(f"{config_file} with {total_jobs} jobs"))
if rest: #maybe parse more args
args = program.parse_args(rest)
if args.command == 'run':
Expand All @@ -126,15 +136,15 @@
total_jobs = config["total_jobs"]
_, data = load_data(os.path.join(args.output_dir, config["folder_path"]))
if data is None:
prGreen("No previous jobs found.")
print(green("No previous jobs found."))
args.jobs = None
else:
completed_jobs = data['job_id'].drop_duplicates().to_list()
missing_jobs = [i for i in range(total_jobs) if i not in completed_jobs]
if len(missing_jobs) == 0:
prGreen("All jobs already completed. Exiting.")
print(green("All jobs already completed. Exiting."))
exit(0)
prGreen(f"{len(missing_jobs)} missing jobs found. Only running missing jobs.")
print(green(f"{len(missing_jobs)} missing jobs found. Only running missing jobs."))
args.jobs = ','.join(map(str, missing_jobs))

if args.node is not None and args.total_nodes > 1:
Expand All @@ -147,7 +157,7 @@
jobs_filter=[int(j) for j in args.jobs.split(",")] if args.jobs else None
)
else: # run the whole challenge
prGreen(f"Running {len(args.jobs.split(',')) if args.jobs else 'all'} jobs.")
print(green(f"Running {len(args.jobs.split(',')) if args.jobs else 'all'} jobs."))
run_challenge(
config_file_path=args.config_file,
output_dir=args.output_dir,
Expand Down Expand Up @@ -178,12 +188,11 @@
completed_jobs = data['job_id'].drop_duplicates().to_list()
missing_jobs = [i for i in range(total_jobs) if i not in completed_jobs]
if len(missing_jobs) == 0:
prGreen("All jobs completed.")
print(green("All jobs completed."))
exit(0)
print(f"Num of missing jobs: \t {len(missing_jobs)} of {total_jobs}")
print(f"Missing jobs: \n{','.join(map(str, missing_jobs))}")


elif args.command == 'scale':
assert args.data_dir is not None, "must specify data directory"
files, data = load_data(args.data_dir)
Expand All @@ -192,18 +201,179 @@
try:
os.makedirs(data_dir_unscaled, exist_ok=False)
except FileExistsError:
prPink(f"Exiting...Already scaled data. {data_dir_unscaled} already exists. ")
print(pink(f"Exiting...Already scaled data. {data_dir_unscaled} already exists. "))
exit(0)

# move all csv files in data_dir to data_dir_unscaled
for filepath in files:
os.rename(filepath, data_dir_unscaled + "/" + os.path.basename(filepath))
prGreen(f"Original data moved to {data_dir_unscaled}")
print(green(f"Original data moved to {data_dir_unscaled}"))

# rescale all losses by dimensionality
data['error'] = data['error'] * data['latent_dim']
data['ood_error'] = data['ood_error'] * data['latent_dim']

# save the new data as csv file in data_dir
data.to_csv(args.data_dir + "/data.csv", index=False)
prGreen(f"Rescaled data saved to {args.data_dir}/data.csv")
print(green(f"Rescaled data saved to {args.data_dir}/data.csv"))

elif args.command == 'status':
experiment_list = [] # all the config.json files in the outputs folder
experiment_dict = {}
algo_filter = args.algo
system_filter = args.system
challenge_filter = args.challenge
complete_filter = args.is_complete

directory_path = 'experiments/outputs'

# Find all 'config.json' files, add filepath to a list, sorted by challenge type
for dirpath, dirnames, filenames in os.walk(directory_path):
for file in filenames:
if file.endswith('config.json'):
file_path = os.path.join(dirpath, file)
f = open(file_path, 'r')
experiment = json.load(f)

algo_cls = experiment.get('evaluate', {}).get('algo_cls', {})
algo_cls_name = algo_cls.get('class_name', '')
system_cls = experiment.get('challenge', {}).get('system_cls', {})
system_cls_name = system_cls.get('class_name', '')
challenge_cls = experiment.get('challenge_cls', {})
challenge_cls_name = challenge_cls.get('class_name', '')

# Check algorithm if filter is set
if algo_filter:
algo_filter_name = {
'lr': 'LinearRegression',
'dnn': 'DNN',
'sindy': 'SINDy'
}.get(algo_filter, '')
if algo_cls_name != algo_filter_name:
continue

# Check system if filter is set
if system_filter:
system_filter_name = {
'lds': 'LDSystem',
'lorenz': 'LorenzSystem',
'lv_p': 'PreyPredatorSystem',
'epi_1': 'LorenzSystem',
'nbody': 'LorenzSystem',
'heat': 'LorenzSystem',
'fbsnn_1': 'LorenzSystem',
'fbsnn_2': 'LorenzSystem',
'ctln': 'LorenzSystem',
'kura': 'LorenzSystem'
}.get(system_filter, '')
if system_cls_name != system_filter_name:
continue

# Check challenge if filter is set
if challenge_filter:
challenge_filter_name = {
'fc': 'FixedComplexity',
'fts': 'FixedTrainSize',
'fe': 'FixedError'
}.get(challenge_filter, '')
if challenge_cls_name != challenge_filter_name:
continue


experiment_type = experiment['challenge_cls']['class_name']
total_jobs = experiment.get("total_jobs", 0)
_, data = load_data(dirpath, print_status=False)
if data is None:
completed_jobs = []
else:
completed_jobs = data['job_id'].drop_duplicates().to_list()

complete_cls = str(len(completed_jobs) == total_jobs).lower()

#Check complete if filter is set
if complete_filter:
if complete_cls != complete_filter:
continue

# Sort
if experiment_type in experiment_dict.keys():
experiment_dict[experiment_type].append({
'total_jobs': total_jobs,
'complete_jobs': len(completed_jobs),
'folder_path': file_path
})
else:
experiment_dict[experiment['challenge_cls']['class_name']] = [{
'total_jobs': total_jobs,
'complete_jobs': len(completed_jobs),
'folder_path': file_path
}]

# Check if experiments made
if not experiment_dict:
print(cyan(bold('No experiments made')))
print(bold('Experiment configs available: 0'), end=' ')
print(loadingBar(0, 1, 40))

# Hints to make experiments
print('\033[1;31m'+'To make an experiment:'+'\033[0m')
print('\033[0;31m'+' python -m experiments make'+'\033[0m')

else:
# Determine max length for formatting, and count total jobs
max_length = 0
max_length_job = 0
all_jobs = 0
all_finished_jobs = 0
job_dict = {}

# Get max length for formatting
# Get all jobs for each experiment type (for progress bar)
for challenge_type in experiment_dict.keys():
output_list = [path for path in experiment_dict[challenge_type]]

# Format job numbers red or green depending on status
max_length = max(max_length, max(len(' ' + path['folder_path']) for path in output_list))
max_length_job = max(max_length_job, max(len(f"{path['complete_jobs']} / {path['total_jobs']} Jobs") for path in output_list))

all_jobs += sum(jobs['total_jobs'] for jobs in experiment_dict[challenge_type])
all_finished_jobs += sum(jobs['complete_jobs'] for jobs in experiment_dict[challenge_type])
job_dict[challenge_type] = {'all_jobs': all_jobs, 'all_completed_jobs': all_finished_jobs}

max_title = max(len(challenge_type) for challenge_type in experiment_dict.keys())

# Print Title
print(bold(f'Experiment configs available: {all_jobs}'), end=' ')
print(loadingBar(all_finished_jobs, all_jobs, max_length + max_length_job - len(f'Experiment configs available: {all_jobs}') + 9))
print('\033[1;31m'+'To run an experiment:'+'\033[0m')
print('\033[0;31m'+' python -m experiments run --config_file <name>\n'+'\033[0m')

# Print paths by Challenge Type
for challenge_type in experiment_dict.keys():
print(bold(f"{challenge_type}: " + ' ' * (max_title - len(challenge_type)) + str(len(experiment_dict[challenge_type]))), end=' ')
print(loadingBar(job_dict[challenge_type]['all_completed_jobs'], job_dict[challenge_type]['all_jobs'], 20))

output_list = [path for path in experiment_dict[challenge_type]]

# Print paths
for path in output_list:
output = path['folder_path']

# Bolding experiment part of the filepath
output_bold = str(output).split('/')
output_bold[-2] = bold(output_bold[-2], color='\033[96m')
output_str = ''
for out in output_bold:
output_str += out + '/'
output_str = cyan(output_str[0:-1])

print(' '+output_str+' '*((max_length-len(output)+(max_length_job-len(str(path['complete_jobs'])+' / '+str(path['total_jobs'])+' Jobs')))), end ='')

# Print number of jobs + progress bar
if path['complete_jobs'] == path['total_jobs']:
print(green(f'{path["complete_jobs"]}'), '/', green(f'{path["total_jobs"]}'), end=' ')
else:
print(red(f'{path["complete_jobs"]}'), '/', red(f'{path["total_jobs"]}'), end=' ')

print(loadingBar(path['complete_jobs'], path['total_jobs'], 10))
print()
42 changes: 29 additions & 13 deletions experiments/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def save_config(
# don't overwrite existing config
config_file_path = os.path.join(output_dir, folder_path, "config.json")
if os.path.exists(config_file_path):
prGreen(f"Config already exist for {folder_path}...skipping")
print(green(f"Config already exist for {folder_path}...skipping"))
else:
save_to_json(experiment_config, os.path.join(output_dir, folder_path, "config.json"))
return config_file_path, experiment_config['total_jobs']
Expand Down Expand Up @@ -71,9 +71,9 @@ def run_challenge(
assert isinstance(split, tuple), "split must be a tuple, (split_num, total_splits)"
split_num, total_splits = split
jobs = _get_jobs(all_jobs, split_num, total_splits) # list[tuples(trial, l)]
prGreen(f"Running split {split_num} of {total_splits} with jobs {jobs}")
print(green(f"Running split {split_num} of {total_splits} with jobs {jobs}"))
if jobs == []:
prGreen(f"Split {split_num} of {total_splits} has no jobs...skipping")
print(green(f"Split {split_num} of {total_splits} has no jobs...skipping"))
return
else:
jobs = all_jobs
Expand Down Expand Up @@ -105,9 +105,9 @@ def run_challenge(
csv_output_path = file_path #will save to csv in parallel
)
if split:
prGreen(f"COMPLETED SPLIT -- {split_num=} / {total_splits=}")
print(green(f"COMPLETED SPLIT -- {split_num=} / {total_splits=}"))
else:
prGreen(f"COMPLETED ALL JOBS -- see {folder_path}")
print(green(f"COMPLETED ALL JOBS -- see {folder_path}"))


def make_plots(
Expand Down Expand Up @@ -135,26 +135,29 @@ def make_plots(
# g.set(xscale="linear", yscale="linear")
if save:
g.figure.savefig(f"{output_dir}/{figure_filename}", bbox_inches='tight')
prGreen(f"Plot created with {len(files)} files in {data_path} and {len(filtered_data)}/{len(data)} rows: {output_dir}/{figure_filename} ")
print(green(f"Plot created with {len(files)} files in {data_path} and {len(filtered_data)}/{len(data)} rows: {output_dir}/{figure_filename} "))
# for file in files:
# print(f"\t- {file}")
return g, data

def load_data(data_path):
def load_data(data_path, print_status = True):
files = _find_all_csv(data_path)
if len(files) <= 0:
# print(f"No plot created: No files matching {csv_filename} found in {data_path}")
prGreen(f"No CSV files found in {data_path}")
if print_status:
print(green(f"No CSV files found in {data_path}"))
return [], None

data = pd.DataFrame()
# Handling split challenge runs
# Concatenate all files into one dataframe and drop duplicates
for file in files:
try:
print(file)
if print_status:
print(file)
df = pd.read_csv(file)
prCyan(f"Loaded {len(df)} rows from {file}")
if print_status:
print(cyan(f"Loaded {len(df)} rows from {file}"))
except:
continue
data = pd.concat([data, df])
Expand Down Expand Up @@ -214,8 +217,21 @@ def _get_jobs(all_jobs:list[int], split_num:int, total_splits:int):
splits = [all_jobs[i*k+min(i, mod):(i+1)*k+min(i+1, mod)] for i in range(total_splits)]
return splits[split_num-1]

def prGreen(skk): print("\033[92m{}\033[00m" .format(skk))
#Colors
def green(skk): return("\033[92m{}\033[00m".format(skk))

def prCyan(skk): print("\033[96m{}\033[00m" .format(skk))
def cyan(skk): return("\033[96m{}\033[00m" .format(skk))

def prPink(skk): print("\033[95m{}\033[00m" .format(skk))
def pink(skk): return("\033[95m{}\033[00m" .format(skk))

def red(skk): return("\033[0;31m{}\033[00m" .format(skk))
#Helper function for status bar:
def loadingBar(num_complete : int, num_total : int, length : int, color : str = '\x1b[38;5;48m',alt_color : str = '\x1b[38;5;237m') -> str:
#Colors:
RESET = '\033[0m'

num_status = int(((num_complete/num_total)*length)//1)
return (color+'━'*num_status + RESET + alt_color+ '━'*(length-num_status)+RESET)

def bold(text : str, color = ''):
return('\033[1m'+text+'\033[0m'+color)
Loading