Skip to content

Commit

Permalink
adding tau variable and temporary cfg changes
Browse files Browse the repository at this point in the history
  • Loading branch information
aalvesan committed Dec 4, 2024
1 parent 5ee7b03 commit 026dcdb
Show file tree
Hide file tree
Showing 3 changed files with 191 additions and 23 deletions.
46 changes: 23 additions & 23 deletions hbt/config/configs_hbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def if_era(
if process_name.startswith(("graviton_hh_", "radion_hh_")):
proc.add_tag("signal")
proc.add_tag("resonant_signal")
if process_name.startswith("tt_"):
if process_name.startswith("tt"):
proc.add_tag({"ttbar", "tt"})
if process_name.startswith("dy_"):
proc.add_tag("dy")
Expand Down Expand Up @@ -200,10 +200,10 @@ def if_era(
"hh_vbf_hbb_htt_kvm2p12_k2v3p87_klm5p96_madgraph",

# some resonances
"radion_hh_ggf_hbb_htt_m450_madgraph",
"radion_hh_ggf_hbb_htt_m1200_madgraph",
"graviton_hh_ggf_hbb_htt_m450_madgraph",
"graviton_hh_ggf_hbb_htt_m1200_madgraph",
# "radion_hh_ggf_hbb_htt_m450_madgraph",
# "radion_hh_ggf_hbb_htt_m1200_madgraph",
# "graviton_hh_ggf_hbb_htt_m450_madgraph",
# "graviton_hh_ggf_hbb_htt_m1200_madgraph",
]),

# backgrounds
Expand All @@ -222,8 +222,8 @@ def if_era(
"st_twchannel_tbar_dl_powheg",
"st_twchannel_t_fh_powheg",
"st_twchannel_tbar_fh_powheg",
"st_schannel_t_lep_4f_amcatnlo",
"st_schannel_tbar_lep_4f_amcatnlo",
# "st_schannel_t_lep_4f_amcatnlo",
# "st_schannel_tbar_lep_4f_amcatnlo",

# tt + v
"ttw_wlnu_amcatnlo",
Expand Down Expand Up @@ -258,19 +258,19 @@ def if_era(

# w + jets
"w_lnu_amcatnlo",
"w_lnu_0j_amcatnlo",
"w_lnu_1j_amcatnlo",
"w_lnu_2j_amcatnlo",
"w_lnu_1j_pt40to100_amcatnlo",
"w_lnu_1j_pt100to200_amcatnlo",
"w_lnu_1j_pt200to400_amcatnlo",
"w_lnu_1j_pt400to600_amcatnlo",
"w_lnu_1j_pt600toinf_amcatnlo",
"w_lnu_2j_pt40to100_amcatnlo",
"w_lnu_2j_pt100to200_amcatnlo",
"w_lnu_2j_pt200to400_amcatnlo",
"w_lnu_2j_pt400to600_amcatnlo",
"w_lnu_2j_pt600toinf_amcatnlo",
# "w_lnu_0j_amcatnlo",
# "w_lnu_1j_amcatnlo",
# "w_lnu_2j_amcatnlo",
# "w_lnu_1j_pt40to100_amcatnlo",
# "w_lnu_1j_pt100to200_amcatnlo",
# "w_lnu_1j_pt200to400_amcatnlo",
# "w_lnu_1j_pt400to600_amcatnlo",
# "w_lnu_1j_pt600toinf_amcatnlo",
# "w_lnu_2j_pt40to100_amcatnlo",
# "w_lnu_2j_pt100to200_amcatnlo",
# "w_lnu_2j_pt200to400_amcatnlo",
# "w_lnu_2j_pt400to600_amcatnlo",
# "w_lnu_2j_pt600toinf_amcatnlo",

# z + jets (not DY but qq)
# decided to drop z_qq for now as their contribution is negligible,
Expand Down Expand Up @@ -303,8 +303,8 @@ def if_era(
"wph_wlnu_hbb_powheg",
"wph_htt_powheg",
"wmh_htt_powheg",
"wph_wqq_hbb_powheg",
"wmh_wqq_hbb_powheg",
# "wph_wqq_hbb_powheg",
# "wmh_wqq_hbb_powheg",
"zh_zll_hbb_powheg",
"zh_zqq_hbb_powheg",
"zh_htt_powheg",
Expand Down Expand Up @@ -464,7 +464,7 @@ def if_era(
}
cfg.x.default_custom_style_config = "small_legend"

cfg.x.default_blinding_threshold = 3e-4
cfg.x.default_blinding_threshold = None

################################################################################################
# luminosity and normalization
Expand Down
158 changes: 158 additions & 0 deletions hbt/config/hist_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,164 @@ def qcd_estimation(task, hists):
f"could not find index of bin on 'category' axis of qcd histogram {qcd_hist} "
f"for category {group.os_iso}",
)
return hists

def closure_test(task, hists):
print("-- ENTERING CLOSURE TEST HOOK --")
if not hists:
return hists

# get the qcd process
qcd_proc = config.get_process("qcd", default=None)
if not qcd_proc:
return hists

# get the data process
pseudo_data = config.get_process("data", default=None)
if not pseudo_data:
return hists

# extract all unique category ids and verify that the axis order is exactly
# "category -> shift -> variable" which is needed to insert values at the end
CAT_AXIS, SHIFT_AXIS, VAR_AXIS = range(3)
category_ids = set()
for proc, h in hists.items():
# validate axes
assert len(h.axes) == 3
assert h.axes[CAT_AXIS].name == "category"
assert h.axes[SHIFT_AXIS].name == "shift"
# get the category axis
cat_ax = h.axes["category"]
for cat_index in range(cat_ax.size):
category_ids.add(cat_ax.value(cat_index))

# create qcd groups
qcd_groups: dict[str, dict[str, od.Category]] = defaultdict(DotDict)
for cat_id in category_ids:
cat_inst = config.get_category(cat_id)
if cat_inst.has_tag({"os", "iso"}, mode=all):
qcd_groups[cat_inst.x.qcd_group].os_iso = cat_inst
elif cat_inst.has_tag({"os", "noniso"}, mode=all):
qcd_groups[cat_inst.x.qcd_group].os_noniso = cat_inst
elif cat_inst.has_tag({"ss", "iso"}, mode=all):
qcd_groups[cat_inst.x.qcd_group].ss_iso = cat_inst
elif cat_inst.has_tag({"ss", "noniso"}, mode=all):
qcd_groups[cat_inst.x.qcd_group].ss_noniso = cat_inst

# get complete qcd groups
complete_groups = [name for name, cats in qcd_groups.items() if len(cats) == 4]

# nothing to do if there are no complete groups
if not complete_groups:
return hists

# get mc, data and ttbar histograms
mc_hists = [h for p, h in hists.items() if p.is_mc and not p.has_tag("signal")]
data_hists = [h for p, h in hists.items() if p.is_data]
tt_hist = [h for p, h in hists.items() if p.has_tag("ttbar")]

# sum up hists, stop early when empty
if not mc_hists or not data_hists or not tt_hist:
return hists
mc_hist = sum(mc_hists[1:], mc_hists[0].copy())
data_hist = sum(data_hists[1:], data_hists[0].copy())
data_hist = data_hist.copy().reset()

# start by copying the data hist and reset it, then fill it at specific category slices
hists[pseudo_data] = data_hist
# insert values into the qcd histogram

# start by copying the mc hist and reset it, then fill it at specific category slices
hists[qcd_proc] = qcd_hist = mc_hist.copy().reset()
for group_name in complete_groups:
group = qcd_groups[group_name]

# get the corresponding histograms and convert them to number objects,
# each one storing an array of values with uncertainties
# shapes: (SHIFT, VAR)
get_hist = lambda h, region_name: h[{"category": hist.loc(group[region_name].id)}]
os_noniso_mc = hist_to_num(get_hist(mc_hist, "os_noniso"), "os_noniso_mc")
ss_noniso_mc = hist_to_num(get_hist(mc_hist, "ss_noniso"), "ss_noniso_mc")
ss_iso_mc = hist_to_num(get_hist(mc_hist, "ss_iso"), "ss_iso_mc")
os_noniso_data = hist_to_num(get_hist(data_hist, "os_noniso"), "os_noniso_data")
ss_noniso_data = hist_to_num(get_hist(data_hist, "ss_noniso"), "ss_noniso_data")
ss_iso_data = hist_to_num(get_hist(data_hist, "ss_iso"), "ss_iso_data")

# estimate qcd shapes in the three sideband regions
# shapes: (SHIFT, VAR)
os_noniso_qcd = os_noniso_data - os_noniso_mc
ss_iso_qcd = ss_iso_data - ss_iso_mc
ss_noniso_qcd = ss_noniso_data - ss_noniso_mc

# get integrals in ss regions for the transfer factor
# shapes: (SHIFT,)
int_ss_iso = integrate_num(ss_iso_qcd, axis=1)
int_ss_noniso = integrate_num(ss_noniso_qcd, axis=1)

# complain about negative integrals
int_ss_iso_neg = int_ss_iso <= 0
int_ss_noniso_neg = int_ss_noniso <= 0
if int_ss_iso_neg.any():
shift_ids = list(map(mc_hist.axes["shift"].value, np.where(int_ss_iso_neg)[0]))
shifts = list(map(config.get_shift, shift_ids))
logger.warning(
f"negative QCD integral in ss_iso region for group {group_name} and shifts: "
f"{', '.join(map(str, shifts))}",
)
if int_ss_noniso_neg.any():
shift_ids = list(map(mc_hist.axes["shift"].value, np.where(int_ss_noniso_neg)[0]))
shifts = list(map(config.get_shift, shift_ids))
logger.warning(
f"negative QCD integral in ss_noniso region for group {group_name} and shifts: "
f"{', '.join(map(str, shifts))}",
)

# ABCD method
# shape: (SHIFT, VAR)
os_iso_qcd = os_noniso_qcd * ((int_ss_iso / int_ss_noniso)[:, None])

# combine uncertainties and store values in bare arrays
os_iso_qcd_values = os_iso_qcd()
os_iso_qcd_variances = os_iso_qcd(sn.UP, sn.ALL, unc=True)**2

# define uncertainties
unc_data = os_iso_qcd(sn.UP, ["os_noniso_data", "ss_iso_data", "ss_noniso_data"], unc=True)
unc_mc = os_iso_qcd(sn.UP, ["os_noniso_mc", "ss_iso_mc", "ss_noniso_mc"], unc=True)
unc_data_rel = abs(unc_data / os_iso_qcd_values)
unc_mc_rel = abs(unc_mc / os_iso_qcd_values)

# only keep the MC uncertainty if it is larger than the data uncertainty and larger than 15%
keep_variance_mask = (
np.isfinite(unc_mc_rel) &
(unc_mc_rel > unc_data_rel) &
(unc_mc_rel > 0.15)
)
os_iso_qcd_variances[keep_variance_mask] = unc_mc[keep_variance_mask]**2
os_iso_qcd_variances[~keep_variance_mask] = 0

# retro-actively set values to zero for shifts that had negative integrals
neg_int_mask = int_ss_iso_neg | int_ss_noniso_neg
os_iso_qcd_values[neg_int_mask] = 1e-5
os_iso_qcd_variances[neg_int_mask] = 0

# residual zero filling
zero_mask = os_iso_qcd_values <= 0
os_iso_qcd_values[zero_mask] = 1e-5
os_iso_qcd_variances[zero_mask] = 0

# insert values into the qcd histogram
cat_axis = qcd_hist.axes["category"]
for cat_index in range(cat_axis.size):
if cat_axis.value(cat_index) == group.os_iso.id:
qcd_hist.view().value[cat_index, ...] = os_iso_qcd_values
qcd_hist.view().variance[cat_index, ...] = os_iso_qcd_variances
break
else:
raise RuntimeError(
f"could not find index of bin on 'category' axis of qcd histogram {qcd_hist} "
f"for category {group.os_iso}",
)
print("-- EXITING CLOSURE TEST HOOK --")
return hists

def flat_s(task, hists: dict[od.Process, hist.Histogram]) -> dict[od.Process, hist.Histogram]:
Expand Down Expand Up @@ -473,5 +630,6 @@ def apply_edges(h: hist.Hist, edges: np.ndarray, indices: np.ndarray, variable:

config.x.hist_hooks = {
"qcd": qcd_estimation,
"closure_test": closure_test,
"flat_s": flat_s,
}
10 changes: 10 additions & 0 deletions hbt/config/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,19 @@ def add_variables(config: od.Config) -> None:
expression="Electron.pt",
null_value=EMPTY_FLOAT,
binning=(400, 0, 400),
unit="GeV",
x_title=r"Electron p$_{T}$",
)

config.add_variable(
name="tau_pt",
expression="Tau.pt",
null_value=EMPTY_FLOAT,
binning=(80, 20, 100),
unit="GeV",
x_title=r"Tau p$_{T}$",
)

# weights
config.add_variable(
name="mc_weight",
Expand Down

0 comments on commit 026dcdb

Please sign in to comment.