From 026dcdb7baec9c327f43f22685d42ae0c1015332 Mon Sep 17 00:00:00 2001 From: Ana A Date: Wed, 4 Dec 2024 14:39:13 +0100 Subject: [PATCH] adding tau variable and temporary cfg changes --- hbt/config/configs_hbt.py | 46 +++++------ hbt/config/hist_hooks.py | 158 ++++++++++++++++++++++++++++++++++++++ hbt/config/variables.py | 10 +++ 3 files changed, 191 insertions(+), 23 deletions(-) diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py index 5d15f626..4a5d54ad 100644 --- a/hbt/config/configs_hbt.py +++ b/hbt/config/configs_hbt.py @@ -159,7 +159,7 @@ def if_era( if process_name.startswith(("graviton_hh_", "radion_hh_")): proc.add_tag("signal") proc.add_tag("resonant_signal") - if process_name.startswith("tt_"): + if process_name.startswith("tt"): proc.add_tag({"ttbar", "tt"}) if process_name.startswith("dy_"): proc.add_tag("dy") @@ -200,10 +200,10 @@ def if_era( "hh_vbf_hbb_htt_kvm2p12_k2v3p87_klm5p96_madgraph", # some resonances - "radion_hh_ggf_hbb_htt_m450_madgraph", - "radion_hh_ggf_hbb_htt_m1200_madgraph", - "graviton_hh_ggf_hbb_htt_m450_madgraph", - "graviton_hh_ggf_hbb_htt_m1200_madgraph", + # "radion_hh_ggf_hbb_htt_m450_madgraph", + # "radion_hh_ggf_hbb_htt_m1200_madgraph", + # "graviton_hh_ggf_hbb_htt_m450_madgraph", + # "graviton_hh_ggf_hbb_htt_m1200_madgraph", ]), # backgrounds @@ -222,8 +222,8 @@ def if_era( "st_twchannel_tbar_dl_powheg", "st_twchannel_t_fh_powheg", "st_twchannel_tbar_fh_powheg", - "st_schannel_t_lep_4f_amcatnlo", - "st_schannel_tbar_lep_4f_amcatnlo", + # "st_schannel_t_lep_4f_amcatnlo", + # "st_schannel_tbar_lep_4f_amcatnlo", # tt + v "ttw_wlnu_amcatnlo", @@ -258,19 +258,19 @@ def if_era( # w + jets "w_lnu_amcatnlo", - "w_lnu_0j_amcatnlo", - "w_lnu_1j_amcatnlo", - "w_lnu_2j_amcatnlo", - "w_lnu_1j_pt40to100_amcatnlo", - "w_lnu_1j_pt100to200_amcatnlo", - "w_lnu_1j_pt200to400_amcatnlo", - "w_lnu_1j_pt400to600_amcatnlo", - "w_lnu_1j_pt600toinf_amcatnlo", - "w_lnu_2j_pt40to100_amcatnlo", - "w_lnu_2j_pt100to200_amcatnlo", - "w_lnu_2j_pt200to400_amcatnlo", - "w_lnu_2j_pt400to600_amcatnlo", - "w_lnu_2j_pt600toinf_amcatnlo", + # "w_lnu_0j_amcatnlo", + # "w_lnu_1j_amcatnlo", + # "w_lnu_2j_amcatnlo", + # "w_lnu_1j_pt40to100_amcatnlo", + # "w_lnu_1j_pt100to200_amcatnlo", + # "w_lnu_1j_pt200to400_amcatnlo", + # "w_lnu_1j_pt400to600_amcatnlo", + # "w_lnu_1j_pt600toinf_amcatnlo", + # "w_lnu_2j_pt40to100_amcatnlo", + # "w_lnu_2j_pt100to200_amcatnlo", + # "w_lnu_2j_pt200to400_amcatnlo", + # "w_lnu_2j_pt400to600_amcatnlo", + # "w_lnu_2j_pt600toinf_amcatnlo", # z + jets (not DY but qq) # decided to drop z_qq for now as their contribution is negligible, @@ -303,8 +303,8 @@ def if_era( "wph_wlnu_hbb_powheg", "wph_htt_powheg", "wmh_htt_powheg", - "wph_wqq_hbb_powheg", - "wmh_wqq_hbb_powheg", + # "wph_wqq_hbb_powheg", + # "wmh_wqq_hbb_powheg", "zh_zll_hbb_powheg", "zh_zqq_hbb_powheg", "zh_htt_powheg", @@ -464,7 +464,7 @@ def if_era( } cfg.x.default_custom_style_config = "small_legend" - cfg.x.default_blinding_threshold = 3e-4 + cfg.x.default_blinding_threshold = None ################################################################################################ # luminosity and normalization diff --git a/hbt/config/hist_hooks.py b/hbt/config/hist_hooks.py index ccc51791..a345e495 100644 --- a/hbt/config/hist_hooks.py +++ b/hbt/config/hist_hooks.py @@ -184,7 +184,164 @@ def qcd_estimation(task, hists): f"could not find index of bin on 'category' axis of qcd histogram {qcd_hist} " f"for category {group.os_iso}", ) + return hists + + def closure_test(task, hists): + print("-- ENTERING CLOSURE TEST HOOK --") + if not hists: + return hists + + # get the qcd process + qcd_proc = config.get_process("qcd", default=None) + if not qcd_proc: + return hists + + # get the data process + pseudo_data = config.get_process("data", default=None) + if not pseudo_data: + return hists + + # extract all unique category ids and verify that the axis order is exactly + # "category -> shift -> variable" which is needed to insert values at the end + CAT_AXIS, SHIFT_AXIS, VAR_AXIS = range(3) + category_ids = set() + for proc, h in hists.items(): + # validate axes + assert len(h.axes) == 3 + assert h.axes[CAT_AXIS].name == "category" + assert h.axes[SHIFT_AXIS].name == "shift" + # get the category axis + cat_ax = h.axes["category"] + for cat_index in range(cat_ax.size): + category_ids.add(cat_ax.value(cat_index)) + + # create qcd groups + qcd_groups: dict[str, dict[str, od.Category]] = defaultdict(DotDict) + for cat_id in category_ids: + cat_inst = config.get_category(cat_id) + if cat_inst.has_tag({"os", "iso"}, mode=all): + qcd_groups[cat_inst.x.qcd_group].os_iso = cat_inst + elif cat_inst.has_tag({"os", "noniso"}, mode=all): + qcd_groups[cat_inst.x.qcd_group].os_noniso = cat_inst + elif cat_inst.has_tag({"ss", "iso"}, mode=all): + qcd_groups[cat_inst.x.qcd_group].ss_iso = cat_inst + elif cat_inst.has_tag({"ss", "noniso"}, mode=all): + qcd_groups[cat_inst.x.qcd_group].ss_noniso = cat_inst + + # get complete qcd groups + complete_groups = [name for name, cats in qcd_groups.items() if len(cats) == 4] + + # nothing to do if there are no complete groups + if not complete_groups: + return hists + + # get mc, data and ttbar histograms + mc_hists = [h for p, h in hists.items() if p.is_mc and not p.has_tag("signal")] + data_hists = [h for p, h in hists.items() if p.is_data] + tt_hist = [h for p, h in hists.items() if p.has_tag("ttbar")] + + # sum up hists, stop early when empty + if not mc_hists or not data_hists or not tt_hist: + return hists + mc_hist = sum(mc_hists[1:], mc_hists[0].copy()) + data_hist = sum(data_hists[1:], data_hists[0].copy()) + data_hist = data_hist.copy().reset() + + # start by copying the data hist and reset it, then fill it at specific category slices + hists[pseudo_data] = data_hist + # insert values into the qcd histogram + + # start by copying the mc hist and reset it, then fill it at specific category slices + hists[qcd_proc] = qcd_hist = mc_hist.copy().reset() + for group_name in complete_groups: + group = qcd_groups[group_name] + + # get the corresponding histograms and convert them to number objects, + # each one storing an array of values with uncertainties + # shapes: (SHIFT, VAR) + get_hist = lambda h, region_name: h[{"category": hist.loc(group[region_name].id)}] + os_noniso_mc = hist_to_num(get_hist(mc_hist, "os_noniso"), "os_noniso_mc") + ss_noniso_mc = hist_to_num(get_hist(mc_hist, "ss_noniso"), "ss_noniso_mc") + ss_iso_mc = hist_to_num(get_hist(mc_hist, "ss_iso"), "ss_iso_mc") + os_noniso_data = hist_to_num(get_hist(data_hist, "os_noniso"), "os_noniso_data") + ss_noniso_data = hist_to_num(get_hist(data_hist, "ss_noniso"), "ss_noniso_data") + ss_iso_data = hist_to_num(get_hist(data_hist, "ss_iso"), "ss_iso_data") + # estimate qcd shapes in the three sideband regions + # shapes: (SHIFT, VAR) + os_noniso_qcd = os_noniso_data - os_noniso_mc + ss_iso_qcd = ss_iso_data - ss_iso_mc + ss_noniso_qcd = ss_noniso_data - ss_noniso_mc + + # get integrals in ss regions for the transfer factor + # shapes: (SHIFT,) + int_ss_iso = integrate_num(ss_iso_qcd, axis=1) + int_ss_noniso = integrate_num(ss_noniso_qcd, axis=1) + + # complain about negative integrals + int_ss_iso_neg = int_ss_iso <= 0 + int_ss_noniso_neg = int_ss_noniso <= 0 + if int_ss_iso_neg.any(): + shift_ids = list(map(mc_hist.axes["shift"].value, np.where(int_ss_iso_neg)[0])) + shifts = list(map(config.get_shift, shift_ids)) + logger.warning( + f"negative QCD integral in ss_iso region for group {group_name} and shifts: " + f"{', '.join(map(str, shifts))}", + ) + if int_ss_noniso_neg.any(): + shift_ids = list(map(mc_hist.axes["shift"].value, np.where(int_ss_noniso_neg)[0])) + shifts = list(map(config.get_shift, shift_ids)) + logger.warning( + f"negative QCD integral in ss_noniso region for group {group_name} and shifts: " + f"{', '.join(map(str, shifts))}", + ) + + # ABCD method + # shape: (SHIFT, VAR) + os_iso_qcd = os_noniso_qcd * ((int_ss_iso / int_ss_noniso)[:, None]) + + # combine uncertainties and store values in bare arrays + os_iso_qcd_values = os_iso_qcd() + os_iso_qcd_variances = os_iso_qcd(sn.UP, sn.ALL, unc=True)**2 + + # define uncertainties + unc_data = os_iso_qcd(sn.UP, ["os_noniso_data", "ss_iso_data", "ss_noniso_data"], unc=True) + unc_mc = os_iso_qcd(sn.UP, ["os_noniso_mc", "ss_iso_mc", "ss_noniso_mc"], unc=True) + unc_data_rel = abs(unc_data / os_iso_qcd_values) + unc_mc_rel = abs(unc_mc / os_iso_qcd_values) + + # only keep the MC uncertainty if it is larger than the data uncertainty and larger than 15% + keep_variance_mask = ( + np.isfinite(unc_mc_rel) & + (unc_mc_rel > unc_data_rel) & + (unc_mc_rel > 0.15) + ) + os_iso_qcd_variances[keep_variance_mask] = unc_mc[keep_variance_mask]**2 + os_iso_qcd_variances[~keep_variance_mask] = 0 + + # retro-actively set values to zero for shifts that had negative integrals + neg_int_mask = int_ss_iso_neg | int_ss_noniso_neg + os_iso_qcd_values[neg_int_mask] = 1e-5 + os_iso_qcd_variances[neg_int_mask] = 0 + + # residual zero filling + zero_mask = os_iso_qcd_values <= 0 + os_iso_qcd_values[zero_mask] = 1e-5 + os_iso_qcd_variances[zero_mask] = 0 + + # insert values into the qcd histogram + cat_axis = qcd_hist.axes["category"] + for cat_index in range(cat_axis.size): + if cat_axis.value(cat_index) == group.os_iso.id: + qcd_hist.view().value[cat_index, ...] = os_iso_qcd_values + qcd_hist.view().variance[cat_index, ...] = os_iso_qcd_variances + break + else: + raise RuntimeError( + f"could not find index of bin on 'category' axis of qcd histogram {qcd_hist} " + f"for category {group.os_iso}", + ) + print("-- EXITING CLOSURE TEST HOOK --") return hists def flat_s(task, hists: dict[od.Process, hist.Histogram]) -> dict[od.Process, hist.Histogram]: @@ -473,5 +630,6 @@ def apply_edges(h: hist.Hist, edges: np.ndarray, indices: np.ndarray, variable: config.x.hist_hooks = { "qcd": qcd_estimation, + "closure_test": closure_test, "flat_s": flat_s, } diff --git a/hbt/config/variables.py b/hbt/config/variables.py index 1f4e83ff..7111c505 100644 --- a/hbt/config/variables.py +++ b/hbt/config/variables.py @@ -98,9 +98,19 @@ def add_variables(config: od.Config) -> None: expression="Electron.pt", null_value=EMPTY_FLOAT, binning=(400, 0, 400), + unit="GeV", x_title=r"Electron p$_{T}$", ) + config.add_variable( + name="tau_pt", + expression="Tau.pt", + null_value=EMPTY_FLOAT, + binning=(80, 20, 100), + unit="GeV", + x_title=r"Tau p$_{T}$", + ) + # weights config.add_variable( name="mc_weight",