Skip to content

Commit 3b6ea9b

Browse files
committed
cleanup
1 parent 74e0553 commit 3b6ea9b

File tree

9 files changed

+267
-113
lines changed

9 files changed

+267
-113
lines changed

hbw/analysis/create_analysis.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,12 @@ def create_hbw_analysis(
6565
campaign_run2_2017_nano_v9 = cmsdb.campaigns.run2_2017_nano_v9.campaign_run2_2017_nano_v9
6666

6767
# default config
68-
#c17 = add_config( # noqa
69-
# analysis_inst,
70-
# campaign_run2_2017_nano_v9.copy(),
71-
# config_name="c17",
72-
# config_id=2,
73-
#)
68+
c17 = add_config( # noqa
69+
analysis_inst,
70+
campaign_run2_2017_nano_v9.copy(),
71+
config_name="c17",
72+
config_id=2,
73+
)
7474

7575
# config with limited number of files
7676
l17 = add_config( # noqa

hbw/config/config_run2.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ def add_config(
3636
limit_dataset_files: int | None = None,
3737
) -> od.Config:
3838
# validations
39-
print("add", analysis, config_name, config_id)
4039
assert campaign.x.year in [2016, 2017, 2018]
4140
if campaign.x.year == 2016:
4241
assert campaign.x.vfp in ["pre", "post"]

hbw/config/defaults_and_groups.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,7 @@ def ml_inputs_producer(cls, container, task_params):
5353
def default_producers(cls, container, task_params):
5454
""" Default producers chosen based on the Inference model and the ML Model """
5555

56-
# how it was before merge default, use the ml_inputs and event_weights
57-
# TODO: we might need two ml_inputs producers in the future (sl vs dl)
58-
#default_producers = ["dl_ml_inputs"]
59-
#if dataset_inst and dataset_inst.is_mc:
60-
# run event weights producer only if it's a MC dataset
61-
# default_producers.append("event_weights")
56+
# per default, use the ml_inputs and event_weights
6257
default_producers = [ml_inputs_producer(cls, container, task_params), "event_weights"]
6358

6459
# check if a ml_model has been set

hbw/config/dl/variables.py

Lines changed: 156 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,11 @@
1111
np = maybe_import("numpy")
1212
ak = maybe_import("awkward")
1313

14-
from columnflow.columnar_util import EMPTY_FLOAT # noqa
14+
from hbw.config.styling import default_var_binning, default_var_unit
15+
from hbw.util import call_once_on_config
1516

1617

18+
@call_once_on_config()
1719
def add_dl_variables(config: od.Config) -> None:
1820
# bjet features
1921
config.add_variable(
@@ -99,3 +101,156 @@ def add_dl_variables(config: od.Config) -> None:
99101
binning=(40, 0, 3),
100102
x_title=r"$ \Delta \phi(ll,jj)$",
101103
)
104+
105+
106+
@call_once_on_config()
107+
def add_dl_ml_variables(config: od.Config) -> None:
108+
"""
109+
Adds ML input variables to a *config*.
110+
"""
111+
112+
# reconstructed variables
113+
config.add_variable(
114+
name="mli_ht",
115+
expression="mli_ht",
116+
binning=(40, 0, 1200),
117+
unit="GeV",
118+
x_title="HT",
119+
)
120+
config.add_variable(
121+
name="mli_n_jet",
122+
expression="mli_n_jet",
123+
binning=(11, -0.5, 10.5),
124+
x_title="Number of jets",
125+
)
126+
config.add_variable(
127+
name="mli_n_deepjet",
128+
expression="mli_n_deepjet",
129+
binning=(11, -0.5, 10.5),
130+
x_title="Number of b-tagged jets (deepjet medium WP)",
131+
)
132+
config.add_variable(
133+
name="mli_deepjetsum",
134+
expression="mli_deepjetsum",
135+
binning=(40, 0, 4),
136+
x_title="sum of deepjet scores",
137+
)
138+
config.add_variable(
139+
name="mli_b_deepjetsum",
140+
expression="mli_b_deepjetsum",
141+
binning=(40, 0, 4),
142+
x_title="sum of bjet deepjet scores",
143+
)
144+
config.add_variable(
145+
name="mli_dr_bb",
146+
expression="mli_dr_bb",
147+
binning=(40, 0, 8),
148+
x_title=r"$\Delta R(b,b)$",
149+
)
150+
config.add_variable(
151+
name="mli_dphi_bb",
152+
expression="mli_dphi_bb",
153+
binning=(40, 0, 3.2),
154+
x_title=r"$\Delta\Phi(b,b)$",
155+
)
156+
config.add_variable(
157+
name="mli_mbb",
158+
expression="mli_mbb",
159+
binning=(40, 0, 400),
160+
unit="GeV",
161+
x_title=r"m(b,b)",
162+
)
163+
config.add_variable(
164+
name="mli_mindr_lb",
165+
expression="mli_mindr_lb",
166+
binning=(40, 0, 8),
167+
x_title=r"min $\Delta R(l,b)$",
168+
)
169+
config.add_variable(
170+
name="mli_dphi_bb_nu",
171+
expression="mli_dphi_bb_nu",
172+
binning=(40, 0, 3.2),
173+
x_title=r"$\Delta\Phi(bb,\nu)$",
174+
)
175+
config.add_variable(
176+
name="mli_dr_bb_l",
177+
expression="mli_dr_bb_l",
178+
binning=(40, 0, 8),
179+
x_title=r"$\Delta R(bb,l)$",
180+
)
181+
config.add_variable(
182+
name="mli_mll",
183+
expression="mli_mll",
184+
binning=(40, 0, 80),
185+
x_title=r"$m_{ll}$",
186+
)
187+
config.add_variable(
188+
name="mli_dr_ll",
189+
expression="mli_dr_ll",
190+
binning=(40, 0, 8),
191+
x_title=r"$\Delta R(ll)$",
192+
)
193+
config.add_variable(
194+
name="mli__min_dr_llbb",
195+
expression="mli_min_dr_llbb",
196+
binning=(40, 0, 8),
197+
x_title=r"$\Delta R(bb,ll)$",
198+
)
199+
config.add_variable(
200+
name="mli_bb_pt",
201+
expression="mli_bb_pt",
202+
binning=(40, 0, 500),
203+
unit="GeV",
204+
x_title=r"$bb_p_T$",
205+
)
206+
config.add_variable(
207+
name="mli_mllMET",
208+
expression="mli_mllMET",
209+
binning=(40, 0, 200),
210+
x_title=r"$m_{llMET}$",
211+
)
212+
config.add_variable(
213+
name="mli_dr_bb_llMET",
214+
expression="mli_dr_bb_llMET",
215+
binning=(40, 0, 8),
216+
x_title=r"$\Delta R(bb,llMET)$",
217+
)
218+
config.add_variable(
219+
name="mli_dphi_bb_llMET",
220+
expression="mli_dphi_bb_llMET",
221+
binning=(40, 0, 8),
222+
x_title=r"$\Delta \phi(bb,llMET)$",
223+
)
224+
config.add_variable(
225+
name="mli_mbbllMET",
226+
expression="mli_mbbllMET",
227+
binning=(40, 0, 500),
228+
unit="GeV",
229+
x_title=r"$m_{bbllMET}$",
230+
)
231+
config.add_variable(
232+
name="mli_dphi_ll",
233+
expression="mli_dphi_ll",
234+
binning=(40, 0, 8),
235+
unit="GeV",
236+
x_title=r"$\Delta \phi_{ll}$",
237+
)
238+
config.add_variable(
239+
name="mli_ll_pt",
240+
expression="mli_ll_pt",
241+
binning=(40, 0, 200),
242+
unit="GeV",
243+
x_title=r"$ll p_T$",
244+
)
245+
246+
for obj in ["b1", "b2", "lep", "lep2", "met"]:
247+
for var in ["pt", "eta"]:
248+
if var == "eta" and obj == "met":
249+
continue
250+
config.add_variable(
251+
name=f"mli_{obj}_{var}",
252+
expression=f"mli_{obj}_{var}",
253+
binning=default_var_binning[var],
254+
unit=default_var_unit.get(var, var),
255+
x_title="{obj} {var}".format(obj=obj, var=var),
256+
)

hbw/config/ml_variables.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66

77
import order as od
88

9-
# from columnflow.columnar_util import EMPTY_FLOAT
109
from hbw.config.styling import default_var_binning, default_var_unit
1110
from hbw.util import call_once_on_config
1211

hbw/ml/base.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ def prepare_inputs(
184184

185185
# calculate some stats per dataset
186186
filenames = [inp["mlevents"].path for inp in files]
187-
187+
188188
N_events = sum([len(ak.from_parquet(fn)) for fn in filenames])
189189
if N_events == 0:
190190
# skip empty datasets
@@ -378,8 +378,7 @@ def prepare_ml_model(
378378

379379
from keras.models import Sequential
380380
from keras.layers import Dense, BatchNormalization
381-
# from hbw.ml.tf_util import cumulated_crossentropy
382-
import tensorflow as tf
381+
from hbw.ml.tf_util import cumulated_crossentropy
383382

384383
n_inputs = len(set(self.input_features))
385384
n_outputs = len(self.processes)
@@ -400,9 +399,8 @@ def prepare_ml_model(
400399
# compile the network
401400
# NOTE: the custom loss needed due to output layer changes for negative weights
402401
optimizer = keras.optimizers.Adam(learning_rate=0.00050)
403-
categorical_crossentropy = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
404402
model.compile(
405-
loss=categorical_crossentropy, #cumulated_crossentropy,
403+
loss=cumulated_crossentropy,
406404
optimizer=optimizer,
407405
weighted_metrics=["categorical_accuracy"],
408406
)

hbw/ml/dense_classifier.py

Lines changed: 32 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -25,45 +25,40 @@
2525
class DenseClassifier(ModelFitMixin, DenseModelMixin, MLClassifierBase):
2626

2727
processes = [
28-
#"sg",
29-
"ggHH_kl_5_kt_1_dl_hbbhww",
30-
#"tt",
31-
#"st",
28+
"ggHH_kl_1_kt_1_sl_hbbhww",
29+
"qqHH_CV_1_C2V_1_kl_1_sl_hbbhww",
30+
"tt",
31+
"st",
3232
"v_lep",
33-
"t_bkg",
34-
#"w_lnu",
35-
#"dy_lep",
33+
# "w_lnu",
34+
# "dy_lep",
3635
]
3736

3837
ml_process_weights = {
39-
"ggHH_kl_0_kt_1_dl_hbbhww": 1,
40-
"ggHH_kl_1_kt_1_dl_hbbhww": 1,
41-
"ggHH_kl_5_kt_1_dl_hbbhww": 1,
42-
"sg": 1,
43-
"tt": 1,
44-
"st": 1,
45-
"v_lep": 1,
46-
"tt_bkg": 1,
38+
"ggHH_kl_1_kt_1_sl_hbbhww": 1,
39+
"qqHH_CV_1_C2V_1_kl_1_sl_hbbhww": 1,
40+
"tt": 2,
41+
"st": 2,
42+
"v_lep": 2,
4743
"w_lnu": 2,
48-
"dy_lep": 1,
44+
"dy_lep": 2,
4945
}
5046

5147
dataset_names = {
52-
#"ggHH_kl_0_kt_1_dl_hbbhww_powheg",
53-
#"ggHH_kl_1_kt_1_dl_hbbhww_powheg",
54-
"ggHH_kl_5_kt_1_dl_hbbhww_powheg",
48+
"ggHH_kl_1_kt_1_sl_hbbhww_powheg",
49+
"qqHH_CV_1_C2V_1_kl_1_sl_hbbhww_madgraph",
5550
# TTbar
5651
"tt_sl_powheg",
5752
"tt_dl_powheg",
5853
"tt_fh_powheg",
5954
# SingleTop
6055
"st_tchannel_t_powheg",
61-
# "st_tchannel_tbar_powheg", #problem in previous task for production
56+
"st_tchannel_tbar_powheg",
6257
"st_twchannel_t_powheg",
6358
"st_twchannel_tbar_powheg",
64-
#"st_schannel_lep_amcatnlo", #problem with normalizatino weights..
59+
"st_schannel_lep_amcatnlo",
6560
# "st_schannel_had_amcatnlo",
66-
# WJets commented out because no events avaible and hence no nomralization weights
61+
# WJets
6762
"w_lnu_ht70To100_madgraph",
6863
"w_lnu_ht100To200_madgraph",
6964
"w_lnu_ht200To400_madgraph",
@@ -84,41 +79,29 @@ class DenseClassifier(ModelFitMixin, DenseModelMixin, MLClassifierBase):
8479
}
8580

8681
input_features = [
87-
"mli_mll", "mli_min_dr_llbb", "mli_dr_ll", "mli_bb_pt",
8882
"mli_ht", "mli_n_jet", "mli_n_deepjet",
89-
"mli_deepjetsum", "mli_b_deepjetsum", "mli_l_deepjetsum",
83+
# "mli_deepjetsum", "mli_b_deepjetsum", "mli_l_deepjetsum",
9084
"mli_dr_bb", "mli_dphi_bb", "mli_mbb", "mli_mindr_lb",
91-
"mli_dphi_ll", "mli_dphi_bb_nu", "mli_dphi_bb_llMET", "mli_mllMET",
92-
"mli_mbbllMET", "mli_dr_bb_llMET", "mli_ll_pt", "mli_met_pt",
93-
#"mli_met_eta", "meli_met_pt",
94-
#"mli_dr_jj", "mli_dphi_jj", "mli_mjj", "mli_mindr_lj",
95-
#"mli_dphi_lnu", "mli_mlnu", "mli_mjjlnu", "mli_mjjl", "mli_dphi_bb_jjlnu", "mli_dr_bb_jjlnu",
96-
#"mli_dphi_bb_jjl", "mli_dr_bb_jjl", "mli_dphi_bb_nu", "mli_dphi_jj_nu", "mli_dr_bb_l", "mli_dr_jj_l",
97-
#"mli_mbbjjlnu", "mli_mbbjjl", "mli_s_min",
85+
"mli_dr_jj", "mli_dphi_jj", "mli_mjj", "mli_mindr_lj",
86+
"mli_dphi_lnu", "mli_mlnu", "mli_mjjlnu", "mli_mjjl", "mli_dphi_bb_jjlnu", "mli_dr_bb_jjlnu",
87+
"mli_dphi_bb_jjl", "mli_dr_bb_jjl", "mli_dphi_bb_nu", "mli_dphi_jj_nu", "mli_dr_bb_l", "mli_dr_jj_l",
88+
"mli_mbbjjlnu", "mli_mbbjjl", "mli_s_min",
9889
] + [
9990
f"mli_{obj}_{var}"
100-
for obj in ["b1", "b2", "lep", "lep2"]
91+
for obj in ["b1", "b2", "j1", "j2", "lep", "met"]
10192
for var in ["pt", "eta"]
102-
]
103-
"""
104-
+ [
93+
] + [
10594
f"mli_{obj}_{var}"
10695
for obj in ["fj"]
10796
for var in ["pt", "eta", "phi", "mass", "msoftdrop", "deepTagMD_HbbvsQCD"]
10897
]
109-
"""
11098

11199
store_name = "inputs_v1"
112100

113-
folds = 3
114-
layers = (164, 164, 164)
115-
activation = "relu"
116-
learningrate = 0.0005
117-
batchsize = 8000 #2 ** 12
118-
epochs = 150
119-
dropout = 0.50
120-
negative_weights = "abs"
101+
folds = 5
121102
validation_fraction = 0.20
103+
learningrate = 0.00050
104+
negative_weights = "handle"
122105

123106
# overwriting DenseModelMixin parameters
124107
activation = "relu"
@@ -204,21 +187,19 @@ def training_selector(self, config_inst: od.Config, requested_selector: str) ->
204187

205188
def training_producers(self, config_inst: od.Config, requested_producers: Sequence[str]) -> list[str]:
206189
# fix MLTraining Phase Space
207-
return ["dl_ml_inputs"] if self.config_ist.has_tag("is_sl") else [""]
190+
return ["ml_inputs"]
208191

209192

210193
# copies of the default DenseClassifier for testing hard-coded changes
211194
for i in range(10):
212195
dense_copy = DenseClassifier.derive(f"dense_{i}")
213196

214197
cls_dict_test = {
215-
"folds": 5,
216-
"epochs": 100,
217-
"processes": ["ggHH_kl_5_kt_1_dl_hbbhww", "v_lep", "t_bkg"],
198+
"epochs": 4,
199+
"processes": ["ggHH_kl_1_kt_1_sl_hbbhww", "qqHH_CV_1_C2V_1_kl_1_sl_hbbhww", "tt", "st", "v_lep"],
218200
"dataset_names": {
219-
"ggHH_kl_5_kt_1_dl_hbbhww_powheg", # "tt_dl_powheg",
220-
# "st_tchannel_t_powheg", #"w_lnu_ht400To600_madgraph",
221-
"dy_lep_m50_ht400to600_madgraph",
201+
"ggHH_kl_1_kt_1_sl_hbbhww_powheg", "qqHH_CV_1_C2V_1_kl_1_sl_hbbhww_madgraph", "tt_dl_powheg",
202+
"st_tchannel_t_powheg", "w_lnu_ht400To600_madgraph",
222203
},
223204
}
224205

0 commit comments

Comments
 (0)