-
Notifications
You must be signed in to change notification settings - Fork 0
/
s2_merge.py
73 lines (53 loc) · 2.04 KB
/
s2_merge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""
Merge outputs of metrics from second stage model tests into a single file
for comparison.
"""
import os
import datetime
import time
import json
import glob
from copy import deepcopy
import numpy as np
import pandas as pd
from utils.settings_builder import Settings
# -----------------------------------------------------------------------------
# *****************
# *****************
json_path = "settings/settings_example.json"
json_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), json_path)
# *****************
# *****************
timestamp = datetime.datetime.fromtimestamp(int(time.time())).strftime(
'%Y_%m_%d_%H_%M_%S')
s = Settings()
s.load(json_path)
base_path = s.base_path
version = s.config["version"]
predict_tag = s.config["predict_tag"]
model_tag = s.config["model_tag"]
# final merged output
merge_out_path = os.path.join(base_path, "output/s2_merge/merge_{}_{}_{}_{}.csv".format(version, predict_tag, model_tag, timestamp))
# find input data based on models
# use combinations of version, predict tag, and model tag to search
regex_str = os.path.join(base_path, "output/s2_metrics/metrics_*_{}_{}_{}.csv".format(version, predict_tag, model_tag))
regex_search = glob.glob(regex_str)
qlist = regex_search
merge_df_list = []
for metric_file in qlist:
df = pd.read_csv(metric_file, quotechar='\"',
na_values='', keep_default_na=False,
encoding='utf-8')
model_hash = os.path.basename(metric_file)[:-4].split("_")[2]
param_json_path = os.path.join(base_path, "output/s1_train/train_{}_{}.json".format(model_hash, version))
with open(param_json_path) as f:
params = json.load(f)
for k in params:
if k in ["train", "static"]:
for ki in params[k]:
df[ki] = [params[k][ki]] * len(df)
else:
df[k] = [params[k]] * len(df)
merge_df_list.append(df)
merge_df = pd.concat(merge_df_list, axis=0, ignore_index=True)
merge_df.to_csv(merge_out_path, index=False, encoding='utf-8')