From 2074153fae326ea7494349cb126232f711f2ec35 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Fri, 20 Dec 2024 15:51:15 +0100 Subject: [PATCH 01/29] minor fixes --- hbw/config/datasets.py | 10 ++++++---- hbw/ml/base.py | 4 +--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/hbw/config/datasets.py b/hbw/config/datasets.py index a919347..6b3eaa6 100644 --- a/hbw/config/datasets.py +++ b/hbw/config/datasets.py @@ -116,10 +116,12 @@ def hbw_dataset_names(config: od.Config, as_list: bool = False) -> DotDict[str: ], "ttv": [ "ttw_wlnu_amcatnlo", - "ttz_zll_m4to50_amcatnlo", - "ttz_zll_m50toinf_amcatnlo", - "ttz_znunu_amcatnlo", - "ttz_zqq_amcatnlo", + *config.x.if_era(run=3, values=[ + "ttz_zll_m4to50_amcatnlo", + "ttz_zll_m50toinf_amcatnlo", + "ttz_znunu_amcatnlo", + "ttz_zqq_amcatnlo", + ]), ], "h": [ *config.x.if_era(run=3, values=[ diff --git a/hbw/ml/base.py b/hbw/ml/base.py index e03c3bd..9a22c72 100644 --- a/hbw/ml/base.py +++ b/hbw/ml/base.py @@ -210,7 +210,7 @@ def setup(self) -> None: expression=f"mlscore.{proc}", null_value=-1, binning=(1000, 0., 1.), - x_title=f"DNN output score {config_inst.get_process(proc).x.ml_label}", + x_title=f"DNN output score {config_inst.get_process(proc).x('ml_label', proc)}", aux={ "rebin": 25, "rebin_config": { @@ -305,11 +305,9 @@ def output(self, task: law.Task) -> dict[str, law.FileSystemTarget]: # declare the main target target = task.target(f"mlmodel_f{task.branch}of{self.folds}", dir=True) - # TODO: cleanup (produce plots, stats in separate task) outp = { "mlmodel": target, "plots": target.child("plots", type="d", optional=True), - # "dummy": target.child("dummy", type="d", optional=True), "checkpoint": target.child("checkpoint", type="d", optional=True), } From 82ecce32acc724ecdbb70e216353d26fae05360b Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Fri, 20 Dec 2024 15:51:26 +0100 Subject: [PATCH 02/29] switch MET column in calibration and add FatJet calibration --- hbw/calibration/default.py | 102 +++++++++++++++++++++++++---- hbw/config/config_run2.py | 130 +++++++++++++++++-------------------- 2 files changed, 148 insertions(+), 84 deletions(-) diff --git a/hbw/calibration/default.py b/hbw/calibration/default.py index 405d3da..331ade8 100644 --- a/hbw/calibration/default.py +++ b/hbw/calibration/default.py @@ -7,11 +7,12 @@ import law from columnflow.calibration import Calibrator, calibrator +from columnflow.calibration.cms.met import met_phi from columnflow.calibration.cms.jets import jec, jer from columnflow.production.cms.seeds import deterministic_seeds from columnflow.util import maybe_import -from hbw.calibration.jet import jec_nominal, bjet_regression +from hbw.calibration.jet import bjet_regression ak = maybe_import("awkward") @@ -19,13 +20,67 @@ logger = law.logger.get_logger(__name__) +@calibrator( + # jec uncertainty_sources: set to None to use config default + jec_sources=["Total"], + version=1, +) +def fatjet(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array: + """ + FatJet calibrator, combining JEC and JER. + """ + if self.task.local_shift != "nominal": + raise Exception("FatJet Calibrator should not be run for shifts other than nominal") + + # apply the fatjet JEC and JER + events = self[self.fatjet_jec_cls](events, **kwargs) + if self.dataset_inst.is_mc: + events = self[self.fatjet_jer_cls](events, **kwargs) + + return events + + +@fatjet.init +def fatjet_init(self: Calibrator) -> None: + if not getattr(self, "dataset_inst", None): + return + + # list of calibrators to apply (in that order) + self.calibrators = [] + + fatjet_jec_cls_dict = { + "jet_name": "FatJet", + "gen_jet_name": "GenJetAK8", + # MET propagation is performed in AK4 jet calibrator; fatjet should never use any MET columns + "propagate_met": False, + "met_name": "DO_NOT_USE", + "raw_met_name": "DO_NOT_USE", + } + fatjet_jer_cls_dict = fatjet_jec_cls_dict.copy() + # NOTE: deterministic FatJet seeds are not yet possible to produce + # fatjet_jer_cls_dict["deterministic_seed_index"] = 0 + + uncertainty_sources = [] if self.dataset_inst.is_data else self.jec_sources + jec_cls_name = f"fatjet_jec{'_nominal' if uncertainty_sources == [] else ''}" + self.fatjet_jec_cls = jec.derive(jec_cls_name, cls_dict={ + **fatjet_jec_cls_dict, + "uncertainty_sources": uncertainty_sources, + }) + self.fatjet_jer_cls = jer.derive("deterministic_fatjet_jer", cls_dict=fatjet_jer_cls_dict) + + self.uses |= {self.fatjet_jec_cls, self.fatjet_jer_cls} + self.produces |= {self.fatjet_jec_cls, self.fatjet_jer_cls} + + @calibrator( uses={deterministic_seeds}, produces={deterministic_seeds}, - skip_jecunc=True, + # jec uncertainty_sources: set to None to use config default + jec_sources=["Total"], bjet_regression=True, + version=1, ) -def base(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array: +def jet_base(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array: events = self[deterministic_seeds](events, **kwargs) logger.info(f"Running calibrators '{[calib.cls_name for calib in self.calibrators]}' (in that order)") @@ -35,31 +90,50 @@ def base(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array: return events -@base.init -def base_init(self: Calibrator) -> None: +@jet_base.init +def jet_base_init(self: Calibrator) -> None: if not getattr(self, "dataset_inst", None): return + met_name = self.config_inst.x.met_name + raw_met_name = self.config_inst.x.raw_met_name + # list of calibrators to apply (in that order) self.calibrators = [] - if self.dataset_inst.is_data or self.skip_jecunc: - self.calibrators.append(jec_nominal) - else: - self.calibrators.append(jec) + uncertainty_sources = [] if self.dataset_inst.is_data else self.jec_sources + jec_cls_name = f"ak4_jec{'_nominal' if uncertainty_sources == [] else ''}" + + jec_cls = jec.derive( + jec_cls_name, + cls_dict={ + "uncertainty_sources": uncertainty_sources, + "met_name": met_name, + "raw_met_name": raw_met_name, + }, + ) + self.calibrators.append(jec_cls) if self.bjet_regression: self.calibrators.append(bjet_regression) # run JER only on MC if self.dataset_inst.is_mc: - self.calibrators.append(jer) + # version of jer that uses the first random number from deterministic_seeds + deterministic_jer_cls = jer.derive( + "deterministic_jer", + cls_dict={"deterministic_seed_index": 0, "met_name": met_name}, + ) + self.calibrators.append(deterministic_jer_cls) + + if self.config_inst.x.run == 2: + # derive met_phi calibrator (currently only for run 2) + met_phi_cls = met_phi.derive("met_phi", cls_dict={"met_name": met_name}) + self.calibrators.append(met_phi_cls) self.uses |= set(self.calibrators) self.produces |= set(self.calibrators) -default = base.derive("default", cls_dict=dict(skip_jecunc=False, bjet_regression=False)) -skip_jecunc = base.derive("skip_jecunc", cls_dict=dict(skip_jecunc=True, bjet_regression=False)) -with_b_reg = base.derive("with_b_reg", cls_dict=dict(skip_jecunc=True, bjet_regression=True)) -full = base.derive("full", cls_dict=dict(skip_jecunc=False, bjet_regression=True)) +skip_jecunc = jet_base.derive("skip_jecunc", cls_dict=dict(bjet_regression=False)) +with_b_reg = jet_base.derive("with_b_reg", cls_dict=dict(bjet_regression=True)) diff --git a/hbw/config/config_run2.py b/hbw/config/config_run2.py index 7fac547..d5718e1 100644 --- a/hbw/config/config_run2.py +++ b/hbw/config/config_run2.py @@ -192,83 +192,62 @@ def if_era( if cfg.x.run == 2: jerc_campaign = f"Summer19UL{year2}{jerc_postfix}" jet_type = "AK4PFchs" + fatjet_type = "AK8PFchs" elif cfg.x.run == 3: jerc_campaign = f"Summer{year2}{jerc_postfix}_22Sep2023" jet_type = "AK4PFPuppi" + fatjet_type = "AK8PFPuppi" + + jec_uncertainties = [ + # NOTE: there are many more sources available, but it is likely that we only need Total + "Total", + # "CorrelationGroupMPFInSitu", + # "CorrelationGroupIntercalibration", + # "CorrelationGroupbJES", + # "CorrelationGroupFlavor", + # "CorrelationGroupUncorrelated", + ] - cfg.x.jec = DotDict.wrap({"Jet": { - "campaign": jerc_campaign, - "version": {2016: "V7", 2017: "V5", 2018: "V5", 2022: "V2"}[year], - "jet_type": jet_type, - "levels": ["L1FastJet", "L2Relative", "L2L3Residual", "L3Absolute"], - "levels_for_type1_met": ["L1FastJet"], - "uncertainty_sources": [ - # "AbsoluteStat", - # "AbsoluteScale", - # "AbsoluteSample", - # "AbsoluteFlavMap", - # "AbsoluteMPFBias", - # "Fragmentation", - # "SinglePionECAL", - # "SinglePionHCAL", - # "FlavorQCD", - # "TimePtEta", - # "RelativeJEREC1", - # "RelativeJEREC2", - # "RelativeJERHF", - # "RelativePtBB", - # "RelativePtEC1", - # "RelativePtEC2", - # "RelativePtHF", - # "RelativeBal", - # "RelativeSample", - # "RelativeFSR", - # "RelativeStatFSR", - # "RelativeStatEC", - # "RelativeStatHF", - # "PileUpDataMC", - # "PileUpPtRef", - # "PileUpPtBB", - # "PileUpPtEC1", - # "PileUpPtEC2", - # "PileUpPtHF", - # "PileUpMuZero", - # "PileUpEnvelope", - # "SubTotalPileUp", - # "SubTotalRelative", - # "SubTotalPt", - # "SubTotalScale", - # "SubTotalAbsolute", - # "SubTotalMC", - "Total", - # "TotalNoFlavor", - # "TotalNoTime", - # "TotalNoFlavorNoTime", - # "FlavorZJet", - # "FlavorPhotonJet", - # "FlavorPureGluon", - # "FlavorPureQuark", - # "FlavorPureCharm", - # "FlavorPureBottom", - # "TimeRunA", - # "TimeRunB", - # "TimeRunC", - # "TimeRunD", - "CorrelationGroupMPFInSitu", - "CorrelationGroupIntercalibration", - "CorrelationGroupbJES", - "CorrelationGroupFlavor", - "CorrelationGroupUncorrelated", - ], - }}) + cfg.x.jec = DotDict.wrap({ + # NOTE: currently, we set the uncertainty_sources in the calibrator itself + "Jet": { + "campaign": jerc_campaign, + "version": {2016: "V7", 2017: "V5", 2018: "V5", 2022: "V2"}[year], + "jet_type": jet_type, + "external_file_key": "jet_jerc", + "levels": ["L1FastJet", "L2Relative", "L2L3Residual", "L3Absolute"], + "levels_for_type1_met": ["L1FastJet"], + "uncertainty_sources": jec_uncertainties, + }, + "FatJet": { + "campaign": jerc_campaign, + "version": {2016: "V7", 2017: "V5", 2018: "V5", 2022: "V2"}[year], + "jet_type": fatjet_type, + "external_file_key": "fat_jet_jerc", + "levels": ["L1FastJet", "L2Relative", "L2L3Residual", "L3Absolute"], + "levels_for_type1_met": ["L1FastJet"], + "uncertainty_sources": jec_uncertainties, + }, + }) # JER # https://twiki.cern.ch/twiki/bin/view/CMS/JetResolution?rev=107 - cfg.x.jer = DotDict.wrap({"Jet": { - "campaign": jerc_campaign, - "version": {2016: "JRV3", 2017: "JRV2", 2018: "JRV2", 2022: "JRV1"}[year], - "jet_type": jet_type, - }}) + cfg.x.jer = DotDict.wrap({ + "Jet": { + "campaign": jerc_campaign, + "version": {2016: "JRV3", 2017: "JRV2", 2018: "JRV2", 2022: "JRV1"}[year], + "jet_type": jet_type, + "external_file_key": "jet_jerc", + }, + "FatJet": { + "campaign": jerc_campaign, + "version": {2016: "JRV3", 2017: "JRV2", 2018: "JRV2", 2022: "JRV1"}[year], + # "jet_type": "fatjet_type", + # JER info only for AK4 jets, stored in AK4 file + "jet_type": jet_type, + "external_file_key": "jet_jerc", + }, + }) # JEC uncertainty sources propagated to btag scale factors # (names derived from contents in BTV correctionlib file) @@ -344,6 +323,16 @@ def if_era( }[cfg.x.run] cfg.x.btag_wp = "medium" + # met configuration + cfg.x.met_name = { + 2: "MET", + 3: "PuppiMET", + }[cfg.x.run] + cfg.x.raw_met_name = { + 2: "RawMET", + 3: "RawPuppiMET", + }[cfg.x.run] + # top pt reweighting parameters # https://twiki.cern.ch/twiki/bin/viewauth/CMS/TopPtReweighting#TOP_PAG_corrections_based_on_dat?rev=31 cfg.x.top_pt_reweighting_params = { @@ -603,6 +592,7 @@ def add_external(name, value): add_external("pu_sf", (f"{json_mirror}/POG/LUM/{corr_tag}/puWeights.json.gz", "v1")) # jet energy correction add_external("jet_jerc", (f"{json_mirror}/POG/JME/{corr_tag}/jet_jerc.json.gz", "v1")) + add_external("fat_jet_jerc", (f"{json_mirror}/POG/JME/{corr_tag}/fatJet_jerc.json.gz", "v1")) # jet veto map add_external("jet_veto_map", (f"{json_mirror}/POG/JME/{corr_tag}/jetvetomaps.json.gz", "v1")) # electron scale factors From a35f433f3bb03259a8d91fc42bb55efbb1f1abd4 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Fri, 20 Dec 2024 16:13:06 +0100 Subject: [PATCH 03/29] add correct met_column in downstream modules --- hbw/categorization/categories.py | 10 ++- hbw/config/variables.py | 12 +-- hbw/production/ml_inputs.py | 53 ++++++----- hbw/production/neutrino.py | 7 +- hbw/production/prepare_objects.py | 9 +- hbw/production/resonant_features.py | 3 +- hbw/production/synchronization.py | 5 +- hbw/production/trigger.py | 134 ++++++++++++++++++++++++++++ hbw/scripts/synchronization.py | 5 +- hbw/util.py | 20 +++++ 10 files changed, 214 insertions(+), 44 deletions(-) create mode 100644 hbw/production/trigger.py diff --git a/hbw/categorization/categories.py b/hbw/categorization/categories.py index c5799e3..bfde28f 100644 --- a/hbw/categorization/categories.py +++ b/hbw/categorization/categories.py @@ -13,6 +13,8 @@ from columnflow.selection import SelectionResult from columnflow.columnar_util import has_ak_column, optional_column +from hbw.util import MET_COLUMN + np = maybe_import("numpy") ak = maybe_import("awkward") @@ -160,15 +162,15 @@ def catid_fake( return events, mask -@categorizer(uses={"MET.pt"}) +@categorizer(uses={MET_COLUMN("pt")}) def catid_highmet(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - mask = events.MET.pt >= 20 + mask = events[self.config_inst.x.met_name].pt >= 20 return events, mask -@categorizer(uses={"MET.pt"}) +@categorizer(uses={MET_COLUMN("pt")}) def catid_lowmet(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - mask = events.MET.pt < 20 + mask = events[self.config_inst.x.met_name].pt < 20 return events, mask # diff --git a/hbw/config/variables.py b/hbw/config/variables.py index 20fbe34..c48992a 100644 --- a/hbw/config/variables.py +++ b/hbw/config/variables.py @@ -365,10 +365,11 @@ def add_variables(config: od.Config) -> None: unit="GeV", x_title="HT", ) + met_name = config.x.met_name config.add_variable( name="lt", expression=lambda events: ( - ak.sum(events.Muon.pt, axis=1) + ak.sum(events.Muon.pt, axis=1) + events.MET.pt + ak.sum(events.Muon.pt, axis=1) + ak.sum(events.Muon.pt, axis=1) + events[met_name].pt ), aux={"inputs": {"Muon.pt", "Electron.pt", "MET.pt"}}, binning=(40, 0, 1200), @@ -646,16 +647,17 @@ def add_variables(config: od.Config) -> None: ) # MET + config.add_variable( name="met_pt", - expression="MET.pt", + expression=f"{met_name}.pt", binning=(40, 0., 400.), unit="GeV", - x_title=r"MET $p_{T}$", + x_title=r"{met_name} $p_{{T}}$".format(met_name=met_name), ) config.add_variable( name="met_phi", - expression="MET.phi", + expression=f"{met_name}.phi", binning=(40, -3.2, 3.2), - x_title=r"MET $\phi$", + x_title=r"{met_name} $\phi$".format(met_name=met_name), ) diff --git a/hbw/production/ml_inputs.py b/hbw/production/ml_inputs.py index c56f4f9..d5612ed 100644 --- a/hbw/production/ml_inputs.py +++ b/hbw/production/ml_inputs.py @@ -17,6 +17,8 @@ from hbw.config.dl.variables import add_dl_ml_variables from hbw.config.sl_res.variables import add_sl_res_ml_variables +from hbw.util import MET_COLUMN + ak = maybe_import("awkward") np = maybe_import("numpy") @@ -60,7 +62,7 @@ def check_column_bookkeeping(self: Producer, events: ak.Array) -> None: prepare_objects, "HbbJet.msoftdrop", "{Electron,Muon,Jet,Bjet,Lightjet,VBFJet,HbbJet}.{pt,eta,phi,mass}", - "MET.{pt,phi}", + MET_COLUMN("pt"), MET_COLUMN("phi"), }, # produced columns set in the init function ) @@ -71,6 +73,8 @@ def common_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array: # add behavior and define new collections (e.g. Lepton) events = self[prepare_objects](events, **kwargs) + met_name = self.config_inst.x.met_name + # object padding events = set_ak_column(events, "Lightjet", ak.pad_none(events.Lightjet, 2)) events = set_ak_column(events, "Bjet", ak.pad_none(events.Bjet, 2)) @@ -99,12 +103,12 @@ def common_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array: events = set_ak_column_f32(events, "mli_lep_pt", events.Lepton[:, 0].pt) events = set_ak_column_f32(events, "mli_lep_eta", events.Lepton[:, 0].eta) - events = set_ak_column_f32(events, "mli_met_pt", events.MET.pt) - events = set_ak_column_f32(events, "mli_met_phi", events.MET.phi) + events = set_ak_column_f32(events, "mli_met_pt", events[met_name].pt) + events = set_ak_column_f32(events, "mli_met_phi", events[met_name].phi) # general events = set_ak_column_f32(events, "mli_ht", ak.sum(events.Jet.pt, axis=1)) - events = set_ak_column_f32(events, "mli_lt", ak.sum(events.Lepton.pt, axis=1) + events.MET.pt) + events = set_ak_column_f32(events, "mli_lt", ak.sum(events.Lepton.pt, axis=1) + events[met_name].pt) events = set_ak_column_f32(events, "mli_n_jet", ak.num(events.Jet.pt, axis=1)) # vbf jet pair features @@ -197,6 +201,7 @@ def sl_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array: """ Producer used for ML Training in the SL analysis. """ + met_name = self.config_inst.x.met_name # produce common input features events = self[common_ml_inputs](events, **kwargs) @@ -209,9 +214,9 @@ def sl_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array: # wlnu features # NOTE: we might want to consider neutrino reconstruction or transverse masses instead when including MET - wlnu = events.MET + events.Lepton[:, 0] + wlnu = events[met_name] + events.Lepton[:, 0] events = set_ak_column_f32(events, "mli_mlnu", wlnu.mass) - events = set_ak_column_f32(events, "mli_dphi_lnu", abs(events.Lepton[:, 0].delta_phi(events.MET))) + events = set_ak_column_f32(events, "mli_dphi_lnu", abs(events.Lepton[:, 0].delta_phi(events[met_name]))) events = set_ak_column_f32(events, "mli_dphi_wl", abs(wlnu.delta_phi(events.Lepton[:, 0]))) # hww features @@ -230,8 +235,8 @@ def sl_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array: events = set_ak_column_f32(events, "mli_dphi_bb_jjl", abs(hbb.delta_phi(hww_vis))) events = set_ak_column_f32(events, "mli_dr_bb_jjl", hbb.delta_r(hww_vis)) - events = set_ak_column_f32(events, "mli_dphi_bb_nu", abs(hbb.delta_phi(events.MET))) - events = set_ak_column_f32(events, "mli_dphi_jj_nu", abs(wjj.delta_phi(events.MET))) + events = set_ak_column_f32(events, "mli_dphi_bb_nu", abs(hbb.delta_phi(events[met_name]))) + events = set_ak_column_f32(events, "mli_dphi_jj_nu", abs(wjj.delta_phi(events[met_name]))) events = set_ak_column_f32(events, "mli_dr_bb_l", hbb.delta_r(events.Lepton[:, 0])) events = set_ak_column_f32(events, "mli_dr_jj_l", hbb.delta_r(events.Lepton[:, 0])) @@ -243,8 +248,8 @@ def sl_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array: events = set_ak_column_f32(events, "mli_mbbjjl", hh_vis.mass) s_min = ( - 2 * events.MET.pt * ((hh_vis.mass ** 2 + hh_vis.energy ** 2) ** 0.5 - - hh_vis.pt * np.cos(hh_vis.delta_phi(events.MET)) + hh_vis.mass ** 2) + 2 * events[met_name].pt * ((hh_vis.mass ** 2 + hh_vis.energy ** 2) ** 0.5 - + hh_vis.pt * np.cos(hh_vis.delta_phi(events[met_name])) + hh_vis.mass ** 2) ) ** 0.5 events = set_ak_column_f32(events, "mli_s_min", s_min) @@ -294,6 +299,7 @@ def dl_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array: """ Producer used for ML Training in the DL analysis. """ + met_name = self.config_inst.x.met_name # produce common input features events = self[common_ml_inputs](events, **kwargs) @@ -307,7 +313,7 @@ def dl_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array: hll = (events.Lepton[:, 0] + events.Lepton[:, 1]) events = set_ak_column_f32(events, "mli_ll_pt", hll.pt) events = set_ak_column_f32(events, "mli_mll", hll.mass) - events = set_ak_column_f32(events, "mli_mllMET", (hll + events.MET[:]).mass) + events = set_ak_column_f32(events, "mli_mllMET", (hll + events[met_name][:]).mass) events = set_ak_column_f32(events, "mli_dr_ll", events.Lepton[:, 0].delta_r(events.Lepton[:, 1])) events = set_ak_column_f32(events, "mli_dphi_ll", events.Lepton[:, 0].delta_phi(events.Lepton[:, 1])) @@ -319,10 +325,10 @@ def dl_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array: # hh system hbb = (events.Bjet[:, 0] + events.Bjet[:, 1]) * 1 # NOTE: *1 so it is a Lorentzvector not a candidate vector - events = set_ak_column_f32(events, "mli_mbbllMET", (hll + hbb + events.MET[:]).mass) - events = set_ak_column_f32(events, "mli_dr_bb_llMET", hbb.delta_r(hll + events.MET[:])) - events = set_ak_column_f32(events, "mli_dphi_bb_nu", abs(hbb.delta_phi(events.MET))) - events = set_ak_column_f32(events, "mli_dphi_bb_llMET", hbb.delta_phi(hll + events.MET[:])) + events = set_ak_column_f32(events, "mli_mbbllMET", (hll + hbb + events[met_name][:]).mass) + events = set_ak_column_f32(events, "mli_dr_bb_llMET", hbb.delta_r(hll + events[met_name][:])) + events = set_ak_column_f32(events, "mli_dphi_bb_nu", abs(hbb.delta_phi(events[met_name]))) + events = set_ak_column_f32(events, "mli_dphi_bb_llMET", hbb.delta_phi(hll + events[met_name][:])) # fill nan/none values of all produced columns for col in self.ml_input_columns: @@ -363,6 +369,7 @@ def sl_res_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array: """ Producer used for ML Training in the SL analysis. """ + met_name = self.config_inst.x.met_name # produce common input features events = self[common_ml_inputs](events, **kwargs) @@ -382,8 +389,8 @@ def sl_res_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array: events = set_ak_column_f32(events, "mli_phi_jj", wjj.phi) # wlnu features - wlnu = events.MET + events.Lepton[:, 0] - events = set_ak_column_f32(events, "mli_dphi_lnu", abs(events.Lepton[:, 0].delta_phi(events.MET))) + wlnu = events[met_name] + events.Lepton[:, 0] + events = set_ak_column_f32(events, "mli_dphi_lnu", abs(events.Lepton[:, 0].delta_phi(events[met_name]))) # NOTE: this column can be set to nan value events = set_ak_column_f32(events, "mli_mlnu", wlnu.mass) events = set_ak_column_f32(events, "mli_pt_lnu", wlnu.pt) @@ -410,10 +417,10 @@ def sl_res_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array: events = set_ak_column_f32(events, "mli_dphi_bb_jjl", abs(hbb.delta_phi(hww_vis))) events = set_ak_column_f32(events, "mli_dr_bb_jjl", hbb.delta_r(hww_vis)) - events = set_ak_column_f32(events, "mli_dphi_bb_nu", abs(hbb.delta_phi(events.MET))) - events = set_ak_column_f32(events, "mli_dphi_jj_nu", abs(wjj.delta_phi(events.MET))) - events = set_ak_column_f32(events, "mli_dr_bb_l", hbb.delta_r(events.MET)) - events = set_ak_column_f32(events, "mli_dr_jj_l", hbb.delta_r(events.MET)) + events = set_ak_column_f32(events, "mli_dphi_bb_nu", abs(hbb.delta_phi(events[met_name]))) + events = set_ak_column_f32(events, "mli_dphi_jj_nu", abs(wjj.delta_phi(events[met_name]))) + events = set_ak_column_f32(events, "mli_dr_bb_l", hbb.delta_r(events[met_name])) + events = set_ak_column_f32(events, "mli_dr_jj_l", hbb.delta_r(events[met_name])) # hh features hh = hbb + hww @@ -423,8 +430,8 @@ def sl_res_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array: events = set_ak_column_f32(events, "mli_mbbjjl", hh_vis.mass) s_min = ( - 2 * events.MET.pt * ((hh_vis.mass ** 2 + hh_vis.energy ** 2) ** 0.5 - - hh_vis.pt * np.cos(hh_vis.delta_phi(events.MET)) + hh_vis.mass ** 2) + 2 * events[met_name].pt * ((hh_vis.mass ** 2 + hh_vis.energy ** 2) ** 0.5 - + hh_vis.pt * np.cos(hh_vis.delta_phi(events[met_name])) + hh_vis.mass ** 2) ) ** 0.5 events = set_ak_column_f32(events, "mli_s_min", s_min) diff --git a/hbw/production/neutrino.py b/hbw/production/neutrino.py index 963f32e..0483100 100644 --- a/hbw/production/neutrino.py +++ b/hbw/production/neutrino.py @@ -39,6 +39,7 @@ def neutrino_reconstruction(self: Producer, events: ak.Array, **kwargs) -> ak.Ar TODO: reference """ + met_name = self.config_inst.x.met_name # add behavior and define new collections (e.g. Lepton) events = self[prepare_objects](events, **kwargs) @@ -49,9 +50,9 @@ def neutrino_reconstruction(self: Producer, events: ak.Array, **kwargs) -> ak.Ar E_l = events.Lepton.E[:, 0] pt_l = events.Lepton.pt[:, 0] pz_l = events.Lepton.pz[:, 0] - pt_nu = events.MET.pt + pt_nu = events[met_name].pt - delta_phi = abs(events.Lepton[:, 0].delta_phi(events.MET)) + delta_phi = abs(events.Lepton[:, 0].delta_phi(events[met_name])) mu = w_mass**2 / 2 + pt_nu * pt_l * np.cos(delta_phi) # Neutrino pz will be calculated as: pz_nu = A +- sqrt(B-C) @@ -86,7 +87,7 @@ def neutrino_reconstruction(self: Producer, events: ak.Array, **kwargs) -> ak.Ar p_nu_1 = np.sqrt(pt_nu**2 + pz_nu**2) eta_nu_1 = np.log((p_nu_1 + pz_nu) / (p_nu_1 - pz_nu)) / 2 # store Neutrino 4 vector components - events[f"Neutrino{i}"] = events.MET + events[f"Neutrino{i}"] = events[met_name] events = set_ak_column_f32(events, f"Neutrino{i}.eta", eta_nu_1) # sanity check: Neutrino pz should be the same as pz_nu within rounding errors diff --git a/hbw/production/prepare_objects.py b/hbw/production/prepare_objects.py index fdc73c5..7c32069 100644 --- a/hbw/production/prepare_objects.py +++ b/hbw/production/prepare_objects.py @@ -110,9 +110,10 @@ def prepare_objects(self: Producer, events: ak.Array, results: SelectionResult = events = set_ak_column(events, "Lepton", lepton[ak.argsort(lepton.pt, ascending=False)]) # transform MET into 4-vector - if "MET" in events.fields: - events["MET"] = set_ak_column(events.MET, "mass", 0) - events["MET"] = set_ak_column(events.MET, "eta", 0) - events["MET"] = ak.with_name(events["MET"], "PtEtaPhiMLorentzVector") + met_name = self.config_inst.x.met_name + if met_name in events.fields: + events[met_name] = set_ak_column(events[met_name], "mass", 0) + events[met_name] = set_ak_column(events[met_name], "eta", 0) + events[met_name] = ak.with_name(events[met_name], "PtEtaPhiMLorentzVector") return events diff --git a/hbw/production/resonant_features.py b/hbw/production/resonant_features.py index d8aafe5..b408344 100644 --- a/hbw/production/resonant_features.py +++ b/hbw/production/resonant_features.py @@ -53,6 +53,7 @@ def resonant_features(self: Producer, events: ak.Array, **kwargs) -> ak.Array: # \ # q' # + met_name = self.config_inst.x.met_name # object padding events = set_ak_column(events, "Jet", ak.pad_none(events.Jet, 2)) @@ -64,7 +65,7 @@ def resonant_features(self: Producer, events: ak.Array, **kwargs) -> ak.Array: if "Whadron" not in events.fields: events = set_ak_column(events, "Whadron", events.Lightjet[:, 0] + events.Lightjet[:, 1]) if "Wlepton" not in events.fields: - events = set_ak_column(events, "Wlepton", events.Lepton[:, 0] + events.MET[:]) + events = set_ak_column(events, "Wlepton", events.Lepton[:, 0] + events[met_name][:]) if "Higgs_WW" not in events.fields: events = set_ak_column(events, "Higgs_WW", events.Whadron[:] + events.Wlepton[:]) if "Higgs_bb" not in events.fields: diff --git a/hbw/production/synchronization.py b/hbw/production/synchronization.py index b1aa3a5..4ce0dbe 100644 --- a/hbw/production/synchronization.py +++ b/hbw/production/synchronization.py @@ -19,6 +19,7 @@ def get_columns_to_store(config_inst): + met_name = config_inst.x.met_name columns_to_store = { "event_nr": lambda events: events.event, "run_nr": lambda events: events.run, @@ -66,8 +67,8 @@ def get_columns_to_store(config_inst): "ak8jet0_eta": lambda events: events.HbbJet.eta[:, 0], "ak8jet0_phi": lambda events: events.HbbJet.phi[:, 0], "ak8jet0_msoftdrop": lambda events: events.HbbJet.msoftdrop[:, 0], - "met_pt": lambda events: events.MET.pt, - "met_phi": lambda events: events.MET.phi, + "met_pt": lambda events: events[met_name].pt, + "met_phi": lambda events: events[met_name].phi, "mc_weight": lambda events: events.mc_weight, "normalized_pu_weight": lambda events: events.pu_weight, "normalized_btag_weight": lambda events: events.normalized_btag_weight, diff --git a/hbw/production/trigger.py b/hbw/production/trigger.py new file mode 100644 index 0000000..61ea829 --- /dev/null +++ b/hbw/production/trigger.py @@ -0,0 +1,134 @@ +# coding: utf-8 + +""" +Trigger related event weights. +""" + +from __future__ import annotations + +from columnflow.production import Producer, producer +from columnflow.util import maybe_import, InsertableDict +from columnflow.columnar_util import set_ak_column, flat_np_view, layout_ak_array + +np = maybe_import("numpy") +ak = maybe_import("awkward") + + +@producer( + uses={ + "Trigger.pt", "Trigger.eta", + }, + # produces in the init + # only run on mc + mc_only=True, + # function to determine the correction file + get_trigger_file=(lambda self, external_files: external_files.trigger_sf), + # function to determine the trigger weight config + # get_trigger_config=(lambda self: self.config_inst.x.trigger_sf_names), + weight_name="trigger_weight", +) +def trigger_weights( + self: Producer, + events: ak.Array, + trigger_mask: ak.Array | type(Ellipsis) = Ellipsis, + **kwargs, +) -> ak.Array: + """ + Creates trigger weights using the correctionlib. Requires an external file in the config under + ``trigger_sf``: + + .. code-block:: python + + cfg.x.external_files = DotDict.wrap({ + "trigger_sf": "/afs/cern.ch/work/m/mrieger/public/mirrors/jsonpog-integration-9ea86c4c/POG/MUO/2017_UL/trigger_z.json.gz", # noqa + }) + + *get_trigger_file* can be adapted in a subclass in case it is stored differently in the external + files. + + The name of the correction set and the year string for the weight evaluation should be given as + an auxiliary entry in the config: + + .. code-block:: python + + cfg.x.trigger_sf_names = ("NUM_TightRelIso_DEN_TightIDandIPCut", "2017_UL") + + *get_trigger_config* can be adapted in a subclass in case it is stored differently in the config. + + Optionally, a *trigger_mask* can be supplied to compute the scale factor weight based only on a + subset of triggers. + """ + # flat absolute eta and pt views + abs_eta = flat_np_view(abs(events.Trigger.eta[trigger_mask]), axis=1) + pt = flat_np_view(events.Trigger.pt[trigger_mask], axis=1) + + variable_map = { + "year": self.year, + "abseta": abs_eta, + "eta": abs_eta, + "pt": pt, + } + + # loop over systematics + for syst, postfix in [ + ("sf", ""), + ("systup", "_up"), + ("systdown", "_down"), + ]: + # get the inputs for this type of variation + variable_map_syst = { + **variable_map, + "scale_factors": "nominal" if syst == "sf" else syst, # syst key in 2022 + "ValType": syst, # syst key in 2017 + } + inputs = [variable_map_syst[inp.name] for inp in self.trigger_sf_corrector.inputs] + sf_flat = self.trigger_sf_corrector(*inputs) + + # add the correct layout to it + sf = layout_ak_array(sf_flat, events.Trigger.pt[trigger_mask]) + + # create the product over all triggers in one event + weight = ak.prod(sf, axis=1, mask_identity=False) + + # store it + events = set_ak_column(events, f"{self.weight_name}{postfix}", weight, value_type=np.float32) + + return events + + +@trigger_weights.requires +def trigger_weights_requires(self: Producer, reqs: dict) -> None: + if "external_files" in reqs: + return + + from columnflow.tasks.external import BundleExternalFiles + reqs["external_files"] = BundleExternalFiles.req(self.task) + + +@trigger_weights.setup +def trigger_weights_setup( + self: Producer, + reqs: dict, + inputs: dict, + reader_targets: InsertableDict, +) -> None: + bundle = reqs["external_files"] + + # create the corrector + import correctionlib + correctionlib.highlevel.Correction.__call__ = correctionlib.highlevel.Correction.evaluate + correction_set = correctionlib.CorrectionSet.from_string( + self.get_trigger_file(bundle.files), + ) + corrector_name, self.year = self.get_trigger_config() + self.trigger_sf_corrector = correction_set[corrector_name] + + # check versions + if self.supported_versions and self.trigger_sf_corrector.version not in self.supported_versions: + raise Exception(f"unsuppprted trigger sf corrector version {self.trigger_sf_corrector.version}") + + +@trigger_weights.init +def trigger_weights_init(self: Producer, **kwargs) -> None: + weight_name = self.weight_name + self.produces |= {weight_name, f"{weight_name}_up", f"{weight_name}_down"} diff --git a/hbw/scripts/synchronization.py b/hbw/scripts/synchronization.py index 5dd2a89..5d44e95 100644 --- a/hbw/scripts/synchronization.py +++ b/hbw/scripts/synchronization.py @@ -17,6 +17,7 @@ def get_columns_to_store(config_inst): + met_name = config_inst.x.met_name columns_to_store = { "event_nr": lambda events: events.event, "run_nr": lambda events: events.run, @@ -64,8 +65,8 @@ def get_columns_to_store(config_inst): "ak8jet0_eta": lambda events: events.HbbJet.eta[:, 0], "ak8jet0_phi": lambda events: events.HbbJet.phi[:, 0], "ak8jet0_msoftdrop": lambda events: events.HbbJet.msoftdrop[:, 0], - "met_pt": lambda events: events.MET.pt, - "met_phi": lambda events: events.MET.phi, + "met_pt": lambda events: events[met_name].pt, + "met_phi": lambda events: events[met_name].phi, "mc_weight": lambda events: events.mc_weight, "pu_weight": lambda events: events.pu_weight, "btag_weight": lambda events: events.btag_weight, diff --git a/hbw/util.py b/hbw/util.py index 67082e9..aae4bea 100644 --- a/hbw/util.py +++ b/hbw/util.py @@ -615,6 +615,26 @@ def my_producer(self, events): return f"{self.get()}.{btag_column}" +@deferred_column +def MET_COLUMN(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + """ + This helper allows adding the correct btag column based on the b_tagger configuration. + Requires the b_tagger aux to be set in the config. Example usecase: + + .. code-block:: python + + @producer(uses={MET_COLUMN("pt")}) + def my_producer(self, events): + met_pt = events[self.config_inst.x.met_name].pt + ... + return events + """ + met_name = func.config_inst.x("met_name", None) + if not met_name: + raise Exception("the met_name has not been configured") + return f"{met_name}.{self.get()}" + + @deferred_column def IF_DATASET_HAS_LHE_WEIGHTS( self: ArrayFunction.DeferredColumn, From 508dc2fecd09cdfb2c5e76beb00f23e06010ecfb Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Fri, 20 Dec 2024 16:23:17 +0100 Subject: [PATCH 04/29] loosen FatJet pt cut --- hbw/selection/jet.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hbw/selection/jet.py b/hbw/selection/jet.py index b80558a..9141b92 100644 --- a/hbw/selection/jet.py +++ b/hbw/selection/jet.py @@ -294,9 +294,9 @@ def sl_boosted_jet_selection( # baseline fatjet selection fatjet_mask = ( - (events.FatJet.pt > 200) & + (events.FatJet.pt > 170) & (abs(events.FatJet.eta) < 2.4) & - (events.FatJet.jetId == 6) & + (events.FatJet.jetId >= 6) & (ak.all(events.FatJet.metric_table(electron) > 0.8, axis=2)) & (ak.all(events.FatJet.metric_table(muon) > 0.8, axis=2)) ) @@ -305,6 +305,7 @@ def sl_boosted_jet_selection( # H->bb fatjet definition based on Aachen analysis hbbJet_mask = ( fatjet_mask & + (events.FatJet.pt > 200) & (events.FatJet.msoftdrop > 30) & (events.FatJet.msoftdrop < 210) & (events.FatJet.subJetIdx1 >= 0) & From 8df61dc3f117763bf0cf29957520be3033364a26 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Fri, 20 Dec 2024 16:24:27 +0100 Subject: [PATCH 05/29] redefine order of output paths --- hbw/analysis/create_analysis.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/hbw/analysis/create_analysis.py b/hbw/analysis/create_analysis.py index 5879523..71ac05a 100644 --- a/hbw/analysis/create_analysis.py +++ b/hbw/analysis/create_analysis.py @@ -113,7 +113,7 @@ def analysis_factory(configs: od.UniqueObjectIndex): ) else: raise ValueError( - f"Campaign used for {config_name} is not yet initialized; to initialize, run: \n", + f"Campaign used for {config_name} is not yet initialized; to initialize, run: \n" f"law run {cpn_task.task_family} --config {config_name} --remove-output 0,a,y", ) # cpn_task.run() @@ -176,7 +176,7 @@ def analysis_factory(configs: od.UniqueObjectIndex): software_tasks = ("cf.BundleBashSandbox", "cf.BundleCMSSWSandbox", "cf.BundleSoftware") shareable_analysis_tasks = ("cf.CalibrateEvents", "cf.GetDatasetLFNs") limited_config_shared_tasks = ("cf.CalibrateEvents", "cf.GetDatasetLFNs", "cf.SelectEvents", "cf.ReduceEvents") - skip_new_version_schema = ("cf.CalibrateEvents", "cf.GetDatasetLFNs") + skip_new_version_schema = () known_parts = ( # from cf "analysis", "task_family", "config", "configs", "dataset", "shift", "version", @@ -237,11 +237,13 @@ def reorganize_parts(task, store_parts): "analysis", "calibrator", "calibrators", "calib", "selector", "sel", - "producer", "producers", "prod", + "config", "configs", + "producers", "prod", "ml_data", "ml_model", "ml_models", "weightprod", "inf_model", "task_family", - "config", "dataset", "shift", + "calibrator", "producer", + "shift", "dataset", ] parts_order_end = ["version"] From f621b9cfe62d41a4d6cf373f43afc9e13dc83101 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Fri, 20 Dec 2024 16:55:16 +0100 Subject: [PATCH 06/29] update default calibrators --- hbw/config/defaults_and_groups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbw/config/defaults_and_groups.py b/hbw/config/defaults_and_groups.py index 4bb544b..bb59762 100644 --- a/hbw/config/defaults_and_groups.py +++ b/hbw/config/defaults_and_groups.py @@ -8,7 +8,7 @@ def default_calibrator(container): - return "with_b_reg" + return ["with_b_reg", "fatjet"] def default_selector(container): From 252938631010cbb0fb11ae4a1c4cb5e14698edd9 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Fri, 20 Dec 2024 16:55:35 +0100 Subject: [PATCH 07/29] add nonisolated electron triggers --- hbw/config/trigger.py | 65 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 55 insertions(+), 10 deletions(-) diff --git a/hbw/config/trigger.py b/hbw/config/trigger.py index 4ebe5b2..65e7a76 100644 --- a/hbw/config/trigger.py +++ b/hbw/config/trigger.py @@ -249,6 +249,11 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]: Electron Trigger: https://twiki.cern.ch/twiki/bin/view/CMS/EgHLTRunIIISummary Muon Trigger: https://twiki.cern.ch/twiki/bin/view/CMS/MuonHLT2022 + trigger_bits are obtained from the TrigObj.filterBits docstring, by running some task and + starting an embed shell, e.g. via: + law run cf.SelectEvents --selector check_columns + events.TrigObj.filterBits? + Auxiliary data in use: - "channels": list of channels during selection that the trigger applies to, e.g. ["e", "ee", "emu", "mue"] (TODO: use this in SL aswell) @@ -264,7 +269,7 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]: legs=[ TriggerLeg( pdg_id=13, - min_pt=25.0, + min_pt=24.0, # filter names: # hltL3crIsoL1sSingleMu22L1f0L2f10QL3f24QL3trkIsoFiltered0p08 (1mu + Iso) trigger_bits=2**1 + 2**3, # Iso (bit 1) + 1mu (bit 3) @@ -282,14 +287,14 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]: legs=[ TriggerLeg( pdg_id=13, - min_pt=18.0, + min_pt=17.0, # filter names: # TODO trigger_bits=2**0 + 2**4, # TrkIsoVVL (bit 0) + 2mu (bit 4) ), TriggerLeg( pdg_id=13, - min_pt=9.0, + min_pt=8.0, # filter names: # TODO trigger_bits=2**0 + 2**4, # TrkIsoVVL (bit 0) + 2mu (bit 4) + DZ_Mass3p8 (bit ?) @@ -307,7 +312,7 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]: legs=[ TriggerLeg( pdg_id=11, - min_pt=31.0, + min_pt=30.0, # filter names: # hltEle30WPTightGsfTrackIsoFilter trigger_bits=2**1, # 1e (WPTight) (bit 1) @@ -326,14 +331,14 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]: legs=[ TriggerLeg( pdg_id=11, - min_pt=24.0, + min_pt=23.0, # filter names: # TODO trigger_bits=2**4 + 2**0, # 2e (bit 4) + CaloIdL_TrackIdL_IsoVL (bit 0) ), TriggerLeg( pdg_id=11, - min_pt=13.0, + min_pt=12.0, # filter names: # TODO trigger_bits=2**4 + 2**0, # 2e (bit 4) + CaloIdL_TrackIdL_IsoVL (bit 0) @@ -345,20 +350,58 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]: }, tags={"di_trigger", "di_e"}, ) + single_e50_noniso = Trigger( + name="HLT_Ele50_CaloIdVT_GsfTrkIdT_PFJet165", + id=203, + legs=[ + TriggerLeg( + pdg_id=11, + min_pt=50.0, + # filter names: TODO + trigger_bits=2**11 + 2**12, # CaloIdVT_GsfTrkIdT (bit 11) + PFJet (bit 12) + ), + ], + aux={ + "channels": ["e", "ee", "emu", "mue", "mixed"], + "data_stream": "data_egamma" if config.x.run == 3 else "data_e", + } + ) + di_e33_noniso = Trigger( + name="HLT_DoubleEle33_CaloIdL_MW", + id=204, + legs=[ + TriggerLeg( + pdg_id=11, + min_pt=33.0, + # filter names: TODO + trigger_bits=2**4, # 2e (bit 4) + CaloIdL_MW (no bit?) + ), + TriggerLeg( + pdg_id=11, + min_pt=33.0, + # filter names: TODO + trigger_bits=2**4, # 2e (bit 4) + CaloIdL_MW (no bit?) + ), + ], + aux={ + "channels": ["ee"], + "data_stream": "data_egamma" if config.x.run == 3 else "data_e", + } + ) mixed_mue = Trigger( name="HLT_Mu23_TrkIsoVVL_Ele12_CaloIdL_TrackIdL_IsoVL", id=301, legs=[ TriggerLeg( pdg_id=13, - min_pt=24.0, + min_pt=23.0, # filter names: # TODO trigger_bits=2**5 + 2**0, # 1e-1mu (bit 5) + TrkIsoVVL (bit 0) ), TriggerLeg( pdg_id=11, - min_pt=13.0, + min_pt=12.0, # filter names: # TODO trigger_bits=2**5 + 2**0, # 1mu-1e (bit 5) + CaloIdL_TrackIdL_IsoVL (bit 0) @@ -376,14 +419,14 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]: legs=[ TriggerLeg( pdg_id=13, - min_pt=9.0, + min_pt=8.0, # filter names: # TODO trigger_bits=2**5 + 2**0, # 1mu-1e (bit 5) + TrkIsoVVL (bit 0) ), TriggerLeg( pdg_id=11, - min_pt=24.0, + min_pt=23.0, # filter names: # TODO trigger_bits=2**5 + 2**0, # 1mu-1e (bit 5) + CaloIdL_TrackIdL_IsoVL (bit 0) @@ -400,8 +443,10 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]: if config.has_tag("is_dl"): config.x.triggers = od.UniqueObjectIndex(Trigger, [ single_e, + single_e50_noniso, single_mu, di_e, + di_e33_noniso, di_mu, mixed_mue, mixed_emu, From 5932aaa61b80e8c6e737a3123693c84d6f061b5f Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Fri, 20 Dec 2024 16:56:10 +0100 Subject: [PATCH 08/29] update columnflow --- hbw/config/trigger.py | 4 ++-- modules/columnflow | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hbw/config/trigger.py b/hbw/config/trigger.py index 65e7a76..2b906ab 100644 --- a/hbw/config/trigger.py +++ b/hbw/config/trigger.py @@ -364,7 +364,7 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]: aux={ "channels": ["e", "ee", "emu", "mue", "mixed"], "data_stream": "data_egamma" if config.x.run == 3 else "data_e", - } + }, ) di_e33_noniso = Trigger( name="HLT_DoubleEle33_CaloIdL_MW", @@ -386,7 +386,7 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]: aux={ "channels": ["ee"], "data_stream": "data_egamma" if config.x.run == 3 else "data_e", - } + }, ) mixed_mue = Trigger( name="HLT_Mu23_TrkIsoVVL_Ele12_CaloIdL_TrackIdL_IsoVL", diff --git a/modules/columnflow b/modules/columnflow index 312bd05..c4139ca 160000 --- a/modules/columnflow +++ b/modules/columnflow @@ -1 +1 @@ -Subproject commit 312bd05015de0f6edfea656353cd60ff02d8c608 +Subproject commit c4139cabf44246492b4afa8217750612d401d3d3 From 28738c131cb538f8ce4b3b61fa1ef1bae481339a Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Fri, 20 Dec 2024 17:17:14 +0100 Subject: [PATCH 09/29] use uhh campaigns per default --- hbw/tasks/campaigns.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/hbw/tasks/campaigns.py b/hbw/tasks/campaigns.py index 71cc3d1..b31d5a9 100644 --- a/hbw/tasks/campaigns.py +++ b/hbw/tasks/campaigns.py @@ -23,14 +23,14 @@ "cmsdb.campaigns.run2_2017_nano_v9": "campaign_run2_2017_nano_v9", }, "c22pre": { + "cmsdb.campaigns.run3_2022_preEE_nano_uhh_v12": "campaign_run3_2022_preEE_nano_uhh_v12", "cmsdb.campaigns.run3_2022_preEE_nano_v12": "campaign_run3_2022_preEE_nano_v12", "cmsdb.campaigns.run3_2022_preEE_nano_v13": "campaign_run3_2022_preEE_nano_v13", - "cmsdb.campaigns.run3_2022_preEE_nano_uhh_v12": "campaign_run3_2022_preEE_nano_uhh_v12", }, "c22post": { + "cmsdb.campaigns.run3_2022_postEE_nano_uhh_v12": "campaign_run3_2022_postEE_nano_uhh_v12", "cmsdb.campaigns.run3_2022_postEE_nano_v12": "campaign_run3_2022_postEE_nano_v12", "cmsdb.campaigns.run3_2022_postEE_nano_v13": "campaign_run3_2022_postEE_nano_v13", - "cmsdb.campaigns.run3_2022_postEE_nano_uhh_v12": "campaign_run3_2022_postEE_nano_uhh_v12", }, } @@ -69,25 +69,12 @@ def campaign_insts(self): for mod, campaign in self.campaigns.items() ] - dataset_from_uhh_identifier = { - # TODO: use DY from uhh campaign - # "dy_m10to50_amcatnlo", - # "dy_m4to10_amcatnlo", - "ttw_", - "ttz_", - } - def get_dataset_prio(self, dataset_name, campaign): """ If dataset should be overwritten from this campaign, return True. Otherwise, return False. + (not currently used, but could be used to prioritize e.g. the central tt dataset (less stats)) """ - if "uhh" in campaign.name and any( - dataset_identifier in dataset_name - for dataset_identifier in self.dataset_from_uhh_identifier - ): - return True - return False def output(self): From 2e529678bcbd61aaf578c2c3eba2e94e99e28e04 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Fri, 20 Dec 2024 19:24:51 +0100 Subject: [PATCH 10/29] prioritize central data datasets --- hbw/tasks/campaigns.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hbw/tasks/campaigns.py b/hbw/tasks/campaigns.py index b31d5a9..f000a45 100644 --- a/hbw/tasks/campaigns.py +++ b/hbw/tasks/campaigns.py @@ -75,6 +75,11 @@ def get_dataset_prio(self, dataset_name, campaign): Otherwise, return False. (not currently used, but could be used to prioritize e.g. the central tt dataset (less stats)) """ + if "v12" in campaign.name and "uhh" not in campaign.name: + # Take data from the central v12 campaign + if "data" in dataset_name: + return True + return False def output(self): From 4eb31b7de6b3dabfdb60d7abee8a0395a84b39e6 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Fri, 20 Dec 2024 19:25:09 +0100 Subject: [PATCH 11/29] reduce init overhead --- hbw/calibration/default.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/hbw/calibration/default.py b/hbw/calibration/default.py index 331ade8..928de15 100644 --- a/hbw/calibration/default.py +++ b/hbw/calibration/default.py @@ -42,6 +42,10 @@ def fatjet(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array: @fatjet.init def fatjet_init(self: Calibrator) -> None: + if not self.task or self.task.task_family != "cf.CalibrateEvents": + # init only required for task itself + return + if not getattr(self, "dataset_inst", None): return @@ -92,6 +96,10 @@ def jet_base(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array: @jet_base.init def jet_base_init(self: Calibrator) -> None: + if not self.task or self.task.task_family != "cf.CalibrateEvents": + # init only required for task itself + return + if not getattr(self, "dataset_inst", None): return @@ -122,7 +130,10 @@ def jet_base_init(self: Calibrator) -> None: # version of jer that uses the first random number from deterministic_seeds deterministic_jer_cls = jer.derive( "deterministic_jer", - cls_dict={"deterministic_seed_index": 0, "met_name": met_name}, + cls_dict={ + "deterministic_seed_index": 0, + "met_name": met_name, + }, ) self.calibrators.append(deterministic_jer_cls) From 11724049e82ece05204d416d6750b4b385de7f31 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Tue, 7 Jan 2025 08:59:12 +0100 Subject: [PATCH 12/29] minor fixes --- hbw/ml/base.py | 1 + hbw/production/neutrino.py | 2 +- hbw/selection/dl_remastered.py | 7 ++++++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/hbw/ml/base.py b/hbw/ml/base.py index 9a22c72..dfb134a 100644 --- a/hbw/ml/base.py +++ b/hbw/ml/base.py @@ -110,6 +110,7 @@ def __init__( for param in self.settings_parameters: # overwrite the default value with the value from the parameters + # TODO: this is quite dangerous, as it overwrites a class attribute instead of an instance attribute setattr(self, param, self.parameters.get(param, getattr(self, param))) # cast the ml parameters to the correct types if necessary diff --git a/hbw/production/neutrino.py b/hbw/production/neutrino.py index 0483100..a89dd4f 100644 --- a/hbw/production/neutrino.py +++ b/hbw/production/neutrino.py @@ -167,7 +167,7 @@ def top_reconstruction(self: Producer, events: ak.Array, **kwargs) -> ak.Array: # replace nan, none, and inf values with EMPTY_FLOAT col = route.apply(events) col = ak.fill_none(ak.nan_to_none(route.apply(events)), EMPTY_FLOAT) - col = ak.where(np.isinf(col), EMPTY_FLOAT, col) + col = ak.where(~np.isfinite(col), EMPTY_FLOAT, col) events = set_ak_column(events, route.string_column, col) diff --git a/hbw/selection/dl_remastered.py b/hbw/selection/dl_remastered.py index 89afc61..486038d 100644 --- a/hbw/selection/dl_remastered.py +++ b/hbw/selection/dl_remastered.py @@ -106,7 +106,12 @@ def dl_lepton_selection( dilepton = ak.pad_none(lepton, 2) dilepton = dilepton[:, 0] + dilepton[:, 1] - events = set_ak_column(events, "mll", ak.fill_none(dilepton.mass, EMPTY_FLOAT), value_type=np.float32) + events = set_ak_column( + events, + "mll", + ak.fill_none(ak.nan_to_none(dilepton.mass), EMPTY_FLOAT), + value_type=np.float32, + ) lepton_results.steps["DiLeptonMass81"] = ak.fill_none(dilepton.mass <= m_z.nominal - 10, False) # lepton channel masks lepton_results.steps["Lep_mm"] = mm_mask = ( From 5dada6849f1c53d0534dc91c3a05a4257d542fd7 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Tue, 7 Jan 2025 09:00:09 +0100 Subject: [PATCH 13/29] fix infinite values in MET during calibration --- hbw/calibration/default.py | 26 +++++++++++++++++++++++++- hbw/selection/stats.py | 8 ++++++-- hbw/util.py | 11 +++++++++++ 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/hbw/calibration/default.py b/hbw/calibration/default.py index 928de15..1a0e793 100644 --- a/hbw/calibration/default.py +++ b/hbw/calibration/default.py @@ -10,11 +10,13 @@ from columnflow.calibration.cms.met import met_phi from columnflow.calibration.cms.jets import jec, jer from columnflow.production.cms.seeds import deterministic_seeds -from columnflow.util import maybe_import +from columnflow.util import maybe_import, try_float +from columnflow.columnar_util import set_ak_column, EMPTY_FLOAT from hbw.calibration.jet import bjet_regression ak = maybe_import("awkward") +np = maybe_import("numpy") logger = law.logger.get_logger(__name__) @@ -24,6 +26,9 @@ # jec uncertainty_sources: set to None to use config default jec_sources=["Total"], version=1, + # add dummy produces such that this calibrator will always be run when requested + # (temporary workaround until init's are only run as often as necessary) + produces={"FatJet.pt"}, ) def fatjet(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array: """ @@ -87,10 +92,29 @@ def fatjet_init(self: Calibrator) -> None: def jet_base(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array: events = self[deterministic_seeds](events, **kwargs) + # keep a copy of non-propagated MET to replace infinite values + pre_calib_met = events[self.config_inst.x.met_name] + logger.info(f"Running calibrators '{[calib.cls_name for calib in self.calibrators]}' (in that order)") for calibrator_inst in self.calibrators: events = self[calibrator_inst](events, **kwargs) + # workaround for infinite values in MET pt/phi + for route in self.produced_columns: + col = route.string_column + m = ~np.isfinite(route.apply(events)) + if ak.any(m): + # replace infinite values + replace_value = EMPTY_FLOAT + if self.config_inst.x.met_name in col: + # use pre-calibrated MET to replace infinite values of MET pt/phi + replace_value = pre_calib_met[col.split(".")[-1].split("_")[0]] + logger.info( + f"Found infinite values in {col}; Values will be replaced with " + f"{replace_value if try_float(replace_value) else replace_value[m]}" + ) + events = set_ak_column(events, col, ak.where(m, replace_value, route.apply(events))) + return events diff --git a/hbw/selection/stats.py b/hbw/selection/stats.py index af31f37..17d96cf 100644 --- a/hbw/selection/stats.py +++ b/hbw/selection/stats.py @@ -11,7 +11,7 @@ from columnflow.columnar_util import optional_column as optional from columnflow.util import maybe_import -from hbw.util import has_tag +from hbw.util import has_tag, RAW_MET_COLUMN np = maybe_import("numpy") ak = maybe_import("awkward") @@ -50,7 +50,7 @@ def hbw_selection_step_stats( @selector( - uses={increment_stats, event_weights_to_normalize}, + uses={increment_stats, event_weights_to_normalize, RAW_MET_COLUMN("pt")}, ) def hbw_increment_stats( self: Selector, @@ -79,6 +79,10 @@ def hbw_increment_stats( weight_map["num_negative_weights"] = (events.mc_weight < 0) weight_map["num_pu_0"] = (events.pu_weight == 0) weight_map["num_pu_100"] = (events.pu_weight >= 100) + + raw_puppi_met = events[self.config_inst.x.raw_met_name] + weight_map["num_raw_met_isinf"] = (~np.isfinite(raw_puppi_met.pt)) + weight_map["num_raw_met_isinf_selected"] = (~np.isfinite(raw_puppi_met.pt) & event_mask) # "sum" operations weight_map["sum_mc_weight"] = events.mc_weight # weights of all events weight_map["sum_mc_weight_selected"] = (events.mc_weight, event_mask) # weights of selected events diff --git a/hbw/util.py b/hbw/util.py index aae4bea..261ed3f 100644 --- a/hbw/util.py +++ b/hbw/util.py @@ -635,6 +635,17 @@ def my_producer(self, events): return f"{met_name}.{self.get()}" +@deferred_column +def RAW_MET_COLUMN(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + """ + Similar to MET_COLUMN, see MET_COLUMN for more information. + """ + raw_met_name = func.config_inst.x("raw_met_name", None) + if not raw_met_name: + raise Exception("the raw_met_name has not been configured") + return f"{raw_met_name}.{self.get()}" + + @deferred_column def IF_DATASET_HAS_LHE_WEIGHTS( self: ArrayFunction.DeferredColumn, From 6f43549a3d78a4ed5669aa2f2d78b36b845db532 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Tue, 7 Jan 2025 09:47:09 +0100 Subject: [PATCH 14/29] remove broken files from uhh 22postEE campaign --- hbw/config/datasets.py | 3 +++ hbw/tasks/campaigns.py | 33 +++++++++++++++++++++++++++++---- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/hbw/config/datasets.py b/hbw/config/datasets.py index 6b3eaa6..300fda3 100644 --- a/hbw/config/datasets.py +++ b/hbw/config/datasets.py @@ -570,10 +570,13 @@ def get_dataset_lfns_uhh( fs=f"wlcg_fs_{cpn_name}", ) + broken_files = dataset_inst[shift_inst.name].get_aux("broken_files", []) + print(broken_files) # loop though files and interpret paths as lfns return [ lfn_base.child(basename, type="f").path for basename in lfn_base.listdir(pattern="*.root") + if lfn_base.child(basename, type="f").path not in broken_files ] if any("uhh" in cpn_name for cpn_name in cfg.campaign.x("campaigns", [])): diff --git a/hbw/tasks/campaigns.py b/hbw/tasks/campaigns.py index f000a45..ab3a2d7 100644 --- a/hbw/tasks/campaigns.py +++ b/hbw/tasks/campaigns.py @@ -62,12 +62,37 @@ def campaigns(self): raise ValueError(f"Unknown config {self.config}") return campaign_map[self.config] + def modify_campaign(self, campaign_inst): + """ + Modify the campaign instance, e.g. by adding datasets or changing dataset properties. + """ + if campaign_inst.name == "run3_2022_postEE_nano_uhh_v12": + # remove broken files + dy_m10to50_nominal = campaign_inst.get_dataset("dy_m10to50_amcatnlo").info["nominal"] + dy_m10to50_nominal.x.broken_files = [ + # missing scale weights + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/4B7063C8-D7B7-A45F-0B56-817AECEAFB43.root", # noqa: E501 + ] + dy_m10to50_nominal.n_files = dy_m10to50_nominal.n_files - 1 + dy_m10to50_nominal.n_events = dy_m10to50_nominal.n_events - 1651814 + + dy_m50toinf_nominal = campaign_inst.get_dataset("dy_m50toinf_amcatnlo").info["nominal"] + dy_m50toinf_nominal.x.broken_files = [ + # broken file + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/3FE6B8C0-4234-4EE4-5BEA-E232539E0D85.root", # noqa: E501 + ] + dy_m50toinf_nominal.n_files = dy_m50toinf_nominal.n_files - 1 + dy_m50toinf_nominal.n_events = -1 + # dy_m50toinf.x.n_events = dy_m50toinf.x.n_events - ???? + @cached_property def campaign_insts(self): - return [ - getattr(importlib.import_module(mod), campaign).copy() - for mod, campaign in self.campaigns.items() - ] + campaign_insts = [] + for mod, campaign in self.campaigns.items(): + campaign_inst = getattr(importlib.import_module(mod), campaign).copy() + self.modify_campaign(campaign_inst) + campaign_insts.append(campaign_inst) + return campaign_insts def get_dataset_prio(self, dataset_name, campaign): """ From e787673cbd024a7b4e83099723937fc8ecc0d78d Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Thu, 9 Jan 2025 10:39:07 +0100 Subject: [PATCH 15/29] fix non-unique processes issue and add broken files --- hbw/analysis/create_analysis.py | 21 ++--- hbw/analysis/processes.py | 83 ++++++++++++++++ hbw/config/config_run2.py | 17 ++++ hbw/config/datasets.py | 8 +- hbw/config/processes.py | 162 +++++--------------------------- hbw/tasks/campaigns.py | 128 +++++++++++++++++++++---- 6 files changed, 245 insertions(+), 174 deletions(-) create mode 100644 hbw/analysis/processes.py diff --git a/hbw/analysis/create_analysis.py b/hbw/analysis/create_analysis.py index 71ac05a..21e5aba 100644 --- a/hbw/analysis/create_analysis.py +++ b/hbw/analysis/create_analysis.py @@ -141,34 +141,31 @@ def analysis_factory(configs: od.UniqueObjectIndex): # 2017 add_lazy_config( - # { - # "cmsdb.campaigns.run2_2017_nano_v9": "campaign_run2_2017_nano_v9", - # }, "c17", 1700, ) # 2022 preEE add_lazy_config( - # { - # "cmsdb.campaigns.run3_2022_preEE_nano_v12": "campaign_run3_2022_preEE_nano_v12", - # "cmsdb.campaigns.run3_2022_preEE_nano_v13": "campaign_run3_2022_preEE_nano_v13", - # }, "c22pre", 2200, ) # 2022 postEE add_lazy_config( - # { - # "cmsdb.campaigns.run3_2022_postEE_nano_v12": "campaign_run3_2022_postEE_nano_v12", - # "cmsdb.campaigns.run3_2022_postEE_nano_v13": "campaign_run3_2022_postEE_nano_v13", - # "cmsdb.campaigns.run3_2022_postEE_nano_uhh_v12": "campaign_run3_2022_postEE_nano_uhh_v12", - # }, "c22post", 2210, ) + add_lazy_config( + "c22pre_das", + 2201, + ) + add_lazy_config( + "c22post_das", + 2211, + ) + # # modify store_parts # diff --git a/hbw/analysis/processes.py b/hbw/analysis/processes.py new file mode 100644 index 0000000..2ece609 --- /dev/null +++ b/hbw/analysis/processes.py @@ -0,0 +1,83 @@ +# coding: utf-8 + +""" +Creation and modification of processes in the HH -> bbWW analysis. +NOTE: it is crucial to modify processes before the campaign is created. Otherwise, +the changes will not be reflected in the campaign and there will be inconsistencies. +""" + +# import order as od + + +from hbw.config.processes import create_parent_process +from hbw.config.styling import color_palette +from cmsdb.util import add_decay_process + + +def modify_cmsdb_processes(): + from cmsdb.processes import ( + qcd_mu, qcd_em, qcd_bctoe, + tt, ttv, st, w_lnu, vv, h, + dy, dy_m4to10, dy_m10to50, dy_m50toinf, dy_m50toinf_0j, dy_m50toinf_1j, dy_m50toinf_2j, + ) + + qcd_mu.label = "QCD Muon enriched" + qcd_ele = create_parent_process( + [qcd_em, qcd_bctoe], + name="qcd_ele", + id=31199, + label="QCD Electron enriched", + ) + + v_lep = create_parent_process( + [w_lnu, dy], + name="v_lep", + id=64575573, # random number + label="W and DY", + ) + + t_bkg = create_parent_process( + [st, tt, ttv], + name="t_bkg", + id=97842611, # random number + label="tt + st", + ) + + background = create_parent_process( # noqa: F841 + [t_bkg, v_lep, vv, w_lnu, h, qcd_ele, qcd_mu], + name="background", + id=99999, + label="background", + color=color_palette["blue"], + ) + + decay_map = { + "lf": { + "name": "lf", + "id": 50, + "label": "(lf)", + "br": -1, + }, + "hf": { + "name": "hf", + "id": 60, + "label": "(hf)", + "br": -1, + }, + } + + for dy_proc_inst in ( + dy, dy_m4to10, dy_m10to50, dy_m50toinf, dy_m50toinf_0j, dy_m50toinf_1j, dy_m50toinf_2j, + ): + add_production_mode_parent = dy_proc_inst.name != "dy" + for flavour in ("hf", "lf"): + # the 'add_decay_process' function helps us to create all parent-daughter relationships + add_decay_process( + dy_proc_inst, + decay_map[flavour], + add_production_mode_parent=add_production_mode_parent, + name_func=lambda parent_name, decay_name: f"{parent_name}_{decay_name}", + label_func=lambda parent_label, decay_label: f"{parent_label} {decay_label}", + xsecs=None, + aux={"flavour": flavour}, + ) diff --git a/hbw/config/config_run2.py b/hbw/config/config_run2.py index d5718e1..d89932a 100644 --- a/hbw/config/config_run2.py +++ b/hbw/config/config_run2.py @@ -169,6 +169,17 @@ def if_era( "lumi_13TeV_2022": 0.01j, "lumi_13TeV_correlated": 0.006j, }) + elif year == 2023: + if campaign.has_tag("preBPix"): + cfg.x.luminosity = Number(17.794, { + "lumi_13TeV_2023": 0.01j, + "lumi_13TeV_correlated": 0.006j, + }) + elif campaign.has_tag("postBPix"): + cfg.x.luminosity = Number(9.451, { + "lumi_13TeV_2023": 0.01j, + "lumi_13TeV_correlated": 0.006j, + }) else: raise NotImplementedError(f"Luminosity for year {year} is not defined.") @@ -738,4 +749,10 @@ def add_external(name, value): from hbw.config.sl_res import configure_sl_res configure_sl_res(cfg) + # sanity check: sometimes the process is not the same as the one in the dataset + p1 = cfg.get_process("dy_m50toinf") + p2 = campaign.get_dataset("dy_m50toinf_amcatnlo").processes.get_first() + if p1 != p2: + raise Exception(f"Processes are not the same: {repr(p1)} != {repr(p2)}") + return cfg diff --git a/hbw/config/datasets.py b/hbw/config/datasets.py index 300fda3..a4041f1 100644 --- a/hbw/config/datasets.py +++ b/hbw/config/datasets.py @@ -571,13 +571,13 @@ def get_dataset_lfns_uhh( ) broken_files = dataset_inst[shift_inst.name].get_aux("broken_files", []) - print(broken_files) + # loop though files and interpret paths as lfns - return [ + lfns = [ lfn_base.child(basename, type="f").path for basename in lfn_base.listdir(pattern="*.root") - if lfn_base.child(basename, type="f").path not in broken_files ] + return [lfn for lfn in lfns if lfn not in broken_files] if any("uhh" in cpn_name for cpn_name in cfg.campaign.x("campaigns", [])): # define the lfn retrieval function @@ -586,6 +586,6 @@ def get_dataset_lfns_uhh( # define custom remote fs's to look at cfg.x.get_dataset_lfns_remote_fs = lambda dataset_inst: ( None if "uhh" not in dataset_inst.x("campaign", "") else [ - f"local_fs_{dataset_inst.x.campaign}", f"wlcg_fs_{dataset_inst.x.campaign}", + f"local_fs_{dataset_inst.x.campaign}", ]) diff --git a/hbw/config/processes.py b/hbw/config/processes.py index ac624a6..4659d86 100644 --- a/hbw/config/processes.py +++ b/hbw/config/processes.py @@ -4,18 +4,14 @@ Configuration of the Run 2 HH -> bbWW processes. """ -import cmsdb import order as od from scinum import Number -from cmsdb.util import add_decay_process from columnflow.util import DotDict -from hbw.config.styling import color_palette - -def add_parent_process(config: od.Config, child_procs: list[od.Process], **kwargs): +def create_parent_process(child_proces: list[od.Process], **kwargs): """ Helper function to create processes from multiple processes *child_procs* """ @@ -28,18 +24,26 @@ def add_parent_process(config: od.Config, child_procs: list[od.Process], **kwarg if "xsecs" not in kwargs: # set the xsec as sum of all xsecs when the ecm key exists for all processes - valid_ecms = set.intersection(*[set(proc.xsecs.keys()) for proc in child_procs]) - proc_kwargs["xsecs"] = {ecm: sum([proc.get_xsec(ecm) for proc in child_procs]) for ecm in valid_ecms} + valid_ecms = set.intersection(*[set(proc.xsecs.keys()) for proc in child_proces]) + proc_kwargs["xsecs"] = {ecm: sum([proc.get_xsec(ecm) for proc in child_proces]) for ecm in valid_ecms} - parent_process = config.add_process(**proc_kwargs) + parent_process = od.Process(**proc_kwargs) # add child processes to parent - for child_proc in child_procs: + for child_proc in child_proces: parent_process.add_process(child_proc) return parent_process +def add_parent_process(config: od.Config, child_procs: list[od.Process], **kwargs): + """ + Helper function to create a parent process and add it to the config instance + """ + parent_process = config.add_process(create_parent_process(child_procs, **kwargs)) + return parent_process + + def add_dummy_xsecs(config: od.Config, dummy_xsec: float = 0.1): """ Helper that adds some dummy xsecs when missing for the campaign's correspondign ecm """ ecm = config.campaign.ecm @@ -65,142 +69,20 @@ def add_dummy_xsecs(config: od.Config, dummy_xsec: float = 0.1): def configure_hbw_processes(config: od.Config): + """ + Function to modify the processes present in the config instance. + NOTE: we should not rely on modifying process instances themselves as part of the config initialization. + """ # add main HH process - config.add_process(cmsdb.processes.hh_ggf.copy()) + config.add_process(config.x.procs.n.hh_ggf) + + config.add_process(config.x.procs.n.t_bkg) + config.add_process(config.x.procs.n.v_lep) + config.add_process(config.x.procs.n.background) # Set dummy xsec for all processes if missing add_dummy_xsecs(config) - # QCD process customization - qcd_mu = config.get_process("qcd_mu", default=None) - if qcd_mu: - qcd_mu = "QCD Muon enriched" - - # add custom qcd_ele process - qcd_em = config.get_process("qcd_em", default=None) - qcd_bctoe = config.get_process("qcd_bctoe", default=None) - if qcd_em and qcd_bctoe: - qcd_ele = add_parent_process( # noqa - config, - [qcd_em, qcd_bctoe], - name="qcd_ele", - id=31199, - label="QCD Electron enriched", - ) - elif qcd_em: - qcd_ele = add_parent_process( # noqa - config, - [qcd_em], - name="qcd_ele", - id=31199, - label="QCD Electron enriched", - ) - - # custom v_lep process for ML Training, combining W+DY - w_lnu = config.get_process("w_lnu", default=None) - dy = config.get_process("dy", default=None) - if w_lnu and dy: - v_lep = add_parent_process( # noqa - config, - [w_lnu, dy], - name="v_lep", - id=64575573, # random number - label="W and DY", - ) - - # Custom t_bkg process for ML Training, combining tt+st - st = config.get_process("st", default=None) - tt = config.get_process("tt", default=None) - if st and tt: - t_bkg = add_parent_process( # noqa - config, - [st, tt], - name="t_bkg", - id=97842611, # random number - label="tt + st", - ) - - if config.has_tag("is_dl") and config.has_tag("is_nonresonant") and config.x.run == 2: - # Custom signal process for ML Training, combining multiple kl signal samples - # NOTE: only built for run 2 because kl variations are missing in run 3 - signal_processes = [ - config.get_process(f"hh_ggf_hbb_hvv2l2nu_kl{kl}_kt1", deep=True) - for kl in [0, 1, "2p45"] - ] - sig = config.add_process( - name="sig", - id=75835213, # random number - xsecs={ - 13: sum([proc.get_xsec(13) for proc in signal_processes]), - }, - label="signal", - ) - for proc in signal_processes: - try: - sig.add_process(proc) - except Exception: - # this also adds 'sig' as parent to 'proc', but sometimes this is happening - # multiple times, since we create multiple configs - pass - - # add auxiliary information if process is signal - for proc_inst, _, _ in config.walk_processes(): - is_signal = any([ - signal_tag in proc_inst.name - for signal_tag in ("hh_vbf", "hh_ggf", "radion", "gravition") - ]) - if is_signal: - proc_inst.add_tag("is_signal") - - decay_map = { - "lf": { - "name": "lf", - "id": 50, - "label": "(lf)", - "br": -1, - }, - "hf": { - "name": "hf", - "id": 60, - "label": "(hf)", - "br": -1, - }, - } - - # add heavy flavour and light flavour dy processes - for proc in ( - "dy", - "dy_m4to10", "dy_m10to50", - "dy_m50toinf", - "dy_m50toinf_0j", "dy_m50toinf_1j", "dy_m50toinf_2j", - ): - dy_proc_inst = config.get_process(proc, default=None) - if dy_proc_inst: - add_production_mode_parent = proc != "dy" - for flavour in ("hf", "lf"): - # the 'add_decay_process' function helps us to create all parent-daughter relationships - add_decay_process( - dy_proc_inst, - decay_map[flavour], - add_production_mode_parent=add_production_mode_parent, - name_func=lambda parent_name, decay_name: f"{parent_name}_{decay_name}", - label_func=lambda parent_label, decay_label: f"{parent_label} {decay_label}", - xsecs=None, - aux={"flavour": flavour}, - ) - - # create main background process - background = config.add_process( - name="background", - id=99999, - label="background", - color=color_palette["blue"], - ) - for bg in ["tt", "dy", "st", "vv", "w_lnu", "h"]: - if config.has_process(bg): - bg = config.get_process(bg) - background.add_process(bg) - from random import randint diff --git a/hbw/tasks/campaigns.py b/hbw/tasks/campaigns.py index ab3a2d7..16df5b5 100644 --- a/hbw/tasks/campaigns.py +++ b/hbw/tasks/campaigns.py @@ -32,6 +32,101 @@ "cmsdb.campaigns.run3_2022_postEE_nano_v12": "campaign_run3_2022_postEE_nano_v12", "cmsdb.campaigns.run3_2022_postEE_nano_v13": "campaign_run3_2022_postEE_nano_v13", }, + "c22post_das": { + "cmsdb.campaigns.run3_2022_postEE_nano_v12": "campaign_run3_2022_postEE_nano_v12", + "cmsdb.campaigns.run3_2022_postEE_nano_v13": "campaign_run3_2022_postEE_nano_v13", + "cmsdb.campaigns.run3_2022_postEE_nano_uhh_v12": "campaign_run3_2022_postEE_nano_uhh_v12", + }, + "c22pre_das": { + "cmsdb.campaigns.run3_2022_preEE_nano_v12": "campaign_run3_2022_preEE_nano_v12", + "cmsdb.campaigns.run3_2022_preEE_nano_v13": "campaign_run3_2022_preEE_nano_v13", + "cmsdb.campaigns.run3_2022_preEE_nano_uhh_v12": "campaign_run3_2022_preEE_nano_uhh_v12", + }, +} + +broken_files = { + "run3_2022_postEE_nano_uhh_v12": { + "dy_m10to50_amcatnlo": [ + # missing LHEScaleWeights + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/4B7063C8-D7B7-A45F-0B56-817AECEAFB43.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/D4D70538-4AF1-A95C-3A57-5EB5D2FFAB08.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/24934037-F730-CFB5-A82E-5D6669E8C85B.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/EB93CCFF-F013-D816-7586-1051CA0BC3C8.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/08C7ABCF-F7DE-F73F-218E-12A85C1A6E89.root", # noqa: E501 + ], + "dy_m50toinf_amcatnlo": [ + # broken + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/3FE6B8C0-4234-4EE4-5BEA-E232539E0D85.root", # noqa: E501 + # missing LHEScaleWeights + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/10B3DD52-F1B9-F8FD-E6FD-D59ECCE90963.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/13DA9D04-5A59-51B8-67EC-54723C6DB4F3.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/F31A7CD7-F9CF-2A51-42B6-26E82E134DE7.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/5B7AFD98-EC30-D01C-59FA-162D86E82C61.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/4E19FA69-9612-E1AF-A537-099F0119CC60.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/197F9F10-660F-AC8B-83DF-AE02CA2AEA71.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/FAFAECB9-A1C2-A07C-16F7-C7A8008A404E.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/F541A987-BD0F-09AA-156F-2836570E8886.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/B600A38B-9418-1EA3-8B4E-8969BE8ECDDE.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/3B2EEDD7-0767-6112-8C60-B522A4A1910C.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/5AC98FFE-A2A1-EAD4-BFD5-59F64E2A3465.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/442D7323-2E81-9EFA-B9C1-E3414FF2C5B4.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/198C8C10-BD66-5B2B-C70A-34EC4EEFB65C.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/C0D75D2C-1A95-A416-E2BA-3E16E3249333.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/DDBA1F4E-4795-A218-E0A0-4FF036B5CB68.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/770ADB5F-4F37-50A4-1FA2-34D04AD062B8.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/81E8769E-6D9A-674A-419A-40227862E8CC.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/7CA623D4-4E9F-E689-ECFA-6F251291FAB3.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/5FDA4334-32A3-0262-C0F5-5AA2AF906F94.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/CE34DA61-BB00-E50C-76F6-591032050F6F.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/0E2C013A-B1CD-63AA-4FBC-92AB1171BDF7.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/4CC2A468-5DC4-3513-C484-CF10B96DD7E1.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/0568BFAE-B3C4-FF86-2E8E-ABFEB3F418BC.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/A95D7402-87A5-C41B-3B89-211DCE48A4BB.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/836369D5-B667-F3DD-78D7-9D075766A182.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/65B3170C-8F95-F3AA-2B8F-056AAF05905D.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/EE0F4D89-CC83-02CC-19EE-8BEA0AC9EB88.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/69DBFCA4-B503-DA49-8972-D8EFFA69DA7C.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/20988817-825D-C5DD-3AC0-5A929F768A5F.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/BD755398-6344-C786-2BBE-B648C5056544.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/E80FA718-9A41-22EF-B2C2-ABE91B334447.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/F095A2DC-3D9D-540A-D77A-E0881A062F06.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/7BAFD1A3-6EF1-18A5-AC03-9158A4D965E0.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/C71C0C70-9F3D-C581-219E-6FE00957D3CB.root", # noqa: E501 + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/2307BFB8-74FF-BB2A-52A2-909D5F57C221.root", # noqa: E501 + # missing LHEScaleWeights + ], + "dy_m50toinf_2j_amcatnlo": [ + # broken + "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_2J_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/FD427E18-2F78-5055-7B38-8929DDF4F1EA.root", # noqa: E501 + ], + }, + "run3_2022_preEE_nano_uhh_v12": { + "dy_m50toinf_amcatnlo": [ + # missing LHEScaleWeights + "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5-v2/0/140BABD5-F5C1-543C-7425-92CDA4A385B9.root", # noqa: E501 + "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5-v2/0/F96C5BD4-8AFF-3B01-A17B-62F17F74895B.root", # noqa: E501 + "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5_ext2-v2/0/39B93A78-FF63-8552-5C58-257144882E6B.root", # noqa: E501 + "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5_ext2-v2/0/12615068-4201-0739-6128-21B694B3CF6E.root", # noqa: E501 + "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5_ext2-v2/0/3163B05D-3FFB-1C6B-60BB-B5CD14166ACE.root", # noqa: E501 + "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5-v2/0/7BE26782-9B31-D8AC-E317-EF6F32C391BF.root", # noqa: E501 + "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5_ext1-v1/0/17AB951D-549D-89D1-345D-CE6CD5B5B3D0.root", # noqa: E501 + "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5_ext2-v2/0/22B6C39B-3332-7E8A-B8C7-F23367A5F297.root", # noqa: E501 + "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5_ext2-v2/0/BAE33AA8-086B-7D5D-26EA-C52C9C6D31FE.root", # noqa: E501 + "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5_ext2-v2/0/C70989AC-334C-EB82-4ACC-B8C48FFE2433.root", # noqa: E501 + "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5_ext2-v2/0/385E6DCC-4FB6-ED71-2B3A-5B23C5A3ACC2.root", # noqa: E501 + + ], + "dy_m10to50_amcatnlo": [ + # missing LHEScaleWeights + "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5-v2/0/315BBEDB-FF7D-B3FB-0355-F6DA23E297BE.root", # noqa: E501 + "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5-v2/0/2E0573CC-695C-340B-5720-85278B31496E.root", # noqa: E501 + "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5_ext1-v1/0/31669005-269B-419C-B93E-E3E4A607B644.root", # noqa: E501 + ], + "w_lnu_amcatnlo": [ + # missing LHEScaleWeights + "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/WtoLNu-2Jets_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5-v2/0/33E9A5A9-73C5-42C6-D337-08D23E9144BF.root", # noqa: E501 + ], + }, } @@ -66,24 +161,19 @@ def modify_campaign(self, campaign_inst): """ Modify the campaign instance, e.g. by adding datasets or changing dataset properties. """ - if campaign_inst.name == "run3_2022_postEE_nano_uhh_v12": - # remove broken files - dy_m10to50_nominal = campaign_inst.get_dataset("dy_m10to50_amcatnlo").info["nominal"] - dy_m10to50_nominal.x.broken_files = [ - # missing scale weights - "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/4B7063C8-D7B7-A45F-0B56-817AECEAFB43.root", # noqa: E501 - ] - dy_m10to50_nominal.n_files = dy_m10to50_nominal.n_files - 1 - dy_m10to50_nominal.n_events = dy_m10to50_nominal.n_events - 1651814 - - dy_m50toinf_nominal = campaign_inst.get_dataset("dy_m50toinf_amcatnlo").info["nominal"] - dy_m50toinf_nominal.x.broken_files = [ - # broken file - "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/3FE6B8C0-4234-4EE4-5BEA-E232539E0D85.root", # noqa: E501 - ] - dy_m50toinf_nominal.n_files = dy_m50toinf_nominal.n_files - 1 - dy_m50toinf_nominal.n_events = -1 - # dy_m50toinf.x.n_events = dy_m50toinf.x.n_events - ???? + if campaign_inst.name not in broken_files: + return + + for dataset_name, broken_files_list in broken_files[campaign_inst.name].items(): + dataset_inst_nominal = campaign_inst.get_dataset(dataset_name).info["nominal"] + + if len(set(broken_files_list)) != len(broken_files_list): + raise ValueError(f"Duplicate broken files in {dataset_name}") + + dataset_inst_nominal.x.broken_files = dataset_inst_nominal.x("broken_files", []) + broken_files_list + dataset_inst_nominal.n_files = dataset_inst_nominal.n_files - len(broken_files_list) + # n_events not known for all broken files, but is not used anyways + dataset_inst_nominal.n_events = -1 @cached_property def campaign_insts(self): @@ -163,6 +253,8 @@ def get_custom_campaign(self): @timeit_multiple def run(self): + from hbw.analysis.processes import modify_cmsdb_processes + modify_cmsdb_processes() output = self.output() # cross check if the dataset summary did change From bdb3a1d399760c626a8878fe4a89213030347cdf Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Thu, 9 Jan 2025 10:39:58 +0100 Subject: [PATCH 16/29] load MET for data in calibration --- hbw/calibration/default.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hbw/calibration/default.py b/hbw/calibration/default.py index 1a0e793..b3e48cf 100644 --- a/hbw/calibration/default.py +++ b/hbw/calibration/default.py @@ -13,6 +13,8 @@ from columnflow.util import maybe_import, try_float from columnflow.columnar_util import set_ak_column, EMPTY_FLOAT +from hbw.util import MET_COLUMN + from hbw.calibration.jet import bjet_regression ak = maybe_import("awkward") @@ -82,7 +84,7 @@ def fatjet_init(self: Calibrator) -> None: @calibrator( - uses={deterministic_seeds}, + uses={deterministic_seeds, MET_COLUMN("{pt,phi}")}, produces={deterministic_seeds}, # jec uncertainty_sources: set to None to use config default jec_sources=["Total"], @@ -111,7 +113,7 @@ def jet_base(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array: replace_value = pre_calib_met[col.split(".")[-1].split("_")[0]] logger.info( f"Found infinite values in {col}; Values will be replaced with " - f"{replace_value if try_float(replace_value) else replace_value[m]}" + f"{replace_value if try_float(replace_value) else replace_value[m]}", ) events = set_ak_column(events, col, ak.where(m, replace_value, route.apply(events))) From 99d81e623ccee8ca4023f7fe8c1f30c8a8ea4444 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Thu, 9 Jan 2025 10:48:59 +0100 Subject: [PATCH 17/29] add memory configuration and load gen columns only when required --- hbw/production/weights.py | 6 +++--- hbw/util.py | 24 ++++++++++++++++++++++++ law.cfg | 12 ++++++++++++ 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/hbw/production/weights.py b/hbw/production/weights.py index 4fa4b49..7b3b658 100644 --- a/hbw/production/weights.py +++ b/hbw/production/weights.py @@ -27,7 +27,7 @@ from hbw.production.normalized_weights import normalized_weight_factory from hbw.production.normalized_btag import normalized_btag_weights from hbw.production.dataset_normalization import dataset_normalization_weight -from hbw.util import has_tag +from hbw.util import has_tag, IF_DY, IF_TOP np = maybe_import("numpy") @@ -39,8 +39,8 @@ @producer( - uses={gen_parton_top, gen_v_boson, pu_weight}, - produces={gen_parton_top, gen_v_boson, pu_weight}, + uses={IF_TOP(gen_parton_top), IF_DY(gen_v_boson), pu_weight}, + produces={IF_TOP(gen_parton_top), IF_DY(gen_v_boson), pu_weight}, mc_only=True, ) def event_weights_to_normalize(self: Producer, events: ak.Array, results: SelectionResult, **kwargs) -> ak.Array: diff --git a/hbw/util.py b/hbw/util.py index 261ed3f..09ee8f6 100644 --- a/hbw/util.py +++ b/hbw/util.py @@ -663,3 +663,27 @@ def IF_MC(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[ return self.get() return self.get() if func.dataset_inst.is_mc else None + + +@deferred_column +def IF_DY(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + if getattr(func, "dataset_inst", None) is None: + return self.get() + + return self.get() if func.dataset_inst.has_tag("is_v_jets") else None + + +@deferred_column +def IF_TOP(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + if getattr(func, "dataset_inst", None) is None: + return self.get() + + return self.get() if func.dataset_inst.has_tag("has_top") else None + + +@deferred_column +def IF_TT(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + if getattr(func, "dataset_inst", None) is None: + return self.get() + + return self.get() if func.dataset_inst.has_tag("is_ttbar") else None diff --git a/law.cfg b/law.cfg index edebd04..3f50a3c 100644 --- a/law.cfg +++ b/law.cfg @@ -126,6 +126,18 @@ lfn_sources: local_desy_dcache, wlcg_fs_desy_store, wlcg_fs_infn_redirector, wlc ; c22post__cf.CalibrateEvents__nomin*: htcondor_memory=5GB ; cf.MLTraining: htcondor_memory=10GB, htcondor_gpus=1 +# use cluster default for signals +# cf.SelectEvents__hh*: dummy=0 +# cf.ReduceEvents__hh*: dummy=0 +# DY and W needs more memory due to gen_v_boson Producer +cf.SelectEvents__dy*: htcondor_memory=3GB +cf.ReduceEvents__dy*: htcondor_memory=3GB +cf.SelectEvents__w_lnu: htcondor_memory=3GB +cf.ReduceEvents__w_lnu: htcondor_memory=3GB +# default resources for all other datasets +cf.SelectEvents: htcondor_memory=2GB +cf.ReduceEvents: htcondor_memory=2GB + [luigi_cf.DummyTask] # To set defaults on a per-task basis From cea5ad6379ac29050e7e56340641c7dcbc5bbcff Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Thu, 9 Jan 2025 16:04:11 +0100 Subject: [PATCH 18/29] use stitching producer only where necessary --- hbw/production/weights.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hbw/production/weights.py b/hbw/production/weights.py index 7b3b658..b122a09 100644 --- a/hbw/production/weights.py +++ b/hbw/production/weights.py @@ -241,8 +241,10 @@ def combined_normalization_weights_init(self: Producer) -> None: if self.dataset_inst.has_tag("is_hbv"): self.norm_weights_producer = stitched_normalization_weights_brs_from_processes - else: + elif "dy_m50toinf" in self.dataset_inst.name: self.norm_weights_producer = stitched_normalization_weights + else: + self.norm_weights_producer = normalization_weights self.norm_weights_producer.weight_name = "stitched_normalization_weight" From 4f74b018f603c12b146a7b385c4c45b34ae280c6 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Thu, 9 Jan 2025 16:04:25 +0100 Subject: [PATCH 19/29] keep columns for electronSS corrections --- hbw/config/config_run2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbw/config/config_run2.py b/hbw/config/config_run2.py index d89932a..92e73b0 100644 --- a/hbw/config/config_run2.py +++ b/hbw/config/config_run2.py @@ -709,7 +709,7 @@ def add_external(name, value): "{FatJet,HbbJet}.{pt,eta,phi,mass,msoftdrop,tau1,tau2,tau3,btagHbb,deepTagMD_HbbvsQCD,particleNet_HbbvsQCD}", # Leptons "{Electron,Muon}.{pt,eta,phi,mass,charge,pdgId,jetRelIso,is_tight,dxy,dz}", - "Electron.deltaEtaSC", "mll", + "Electron.{deltaEtaSC,r9,seedGain}", "mll", # MET "{MET,PuppiMET}.{pt,phi}", # all columns added during selection using a ColumnCollection flag, but skip cutflow ones From 6c2652243280084d85d785220be7f4ace986bf49 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Thu, 9 Jan 2025 16:05:08 +0100 Subject: [PATCH 20/29] change retries parameter default --- hbw/columnflow_patches.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hbw/columnflow_patches.py b/hbw/columnflow_patches.py index 06ba7c5..45f3375 100644 --- a/hbw/columnflow_patches.py +++ b/hbw/columnflow_patches.py @@ -136,6 +136,10 @@ def patched_init(self, *args, **kwargs): @memoize def patch_all(): + # change the "retries" parameter default + from columnflow.tasks.framework.remote import RemoteWorkflow + RemoteWorkflow.retries = RemoteWorkflow.retries.copy(default=2) + patch_mltraining() patch_htcondor_workflow_naf_resources() # patch_column_alias_strategy() From 5f6942c790a7224d58d186c6893c64586d1339b9 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Fri, 10 Jan 2025 22:20:23 +0100 Subject: [PATCH 21/29] update columnflow and fixes --- hbw/columnflow_patches.py | 6 ++---- hbw/config/variables.py | 18 ++++++++++-------- hbw/production/normalized_weights.py | 3 ++- hbw/production/weights.py | 2 +- hbw/selection/stats.py | 2 +- modules/columnflow | 2 +- 6 files changed, 17 insertions(+), 16 deletions(-) diff --git a/hbw/columnflow_patches.py b/hbw/columnflow_patches.py index 45f3375..d40214b 100644 --- a/hbw/columnflow_patches.py +++ b/hbw/columnflow_patches.py @@ -93,9 +93,7 @@ def TaskArrayFunction_str(self): TaskArrayFunction.__str__ = TaskArrayFunction_str logger.info( - "patched TaskArrayFunction.__str__ to include the CSP version attribute " - "(NOTE that this currently does not work for the " - "MLTrainingMixin tasks (e.g. MLPreTraining and MLTraining))", + "patched TaskArrayFunction.__str__ to include the CSP version attribute", ) @@ -138,7 +136,7 @@ def patched_init(self, *args, **kwargs): def patch_all(): # change the "retries" parameter default from columnflow.tasks.framework.remote import RemoteWorkflow - RemoteWorkflow.retries = RemoteWorkflow.retries.copy(default=2) + RemoteWorkflow.retries = RemoteWorkflow.retries.copy(default=3) patch_mltraining() patch_htcondor_workflow_naf_resources() diff --git a/hbw/config/variables.py b/hbw/config/variables.py index c48992a..a1878a7 100644 --- a/hbw/config/variables.py +++ b/hbw/config/variables.py @@ -317,9 +317,11 @@ def add_variables(config: od.Config) -> None: x_title="Number of pnet jets (tight WP)", discrete_x=True, ) + # NOTE: there is some issue when loading columns via aux, but not loading all 4-vector components + # but no error is raised, when changing to the `object["pt"]` notation config.add_variable( name="n_fatjet", - expression=lambda events: ak.num(events.FatJet.pt, axis=1), + expression=lambda events: ak.num(events.FatJet["pt"], axis=1), aux={"inputs": {"FatJet.pt"}}, binning=(7, -0.5, 6.5), x_title="Number of fatjets", @@ -327,7 +329,7 @@ def add_variables(config: od.Config) -> None: ) config.add_variable( name="n_hbbjet", - expression=lambda events: ak.num(events.HbbJet.pt, axis=1), + expression=lambda events: ak.num(events.HbbJet["pt"], axis=1), aux={"inputs": {"HbbJet.pt"}}, binning=(4, -0.5, 3.5), x_title="Number of hbbjets", @@ -335,7 +337,7 @@ def add_variables(config: od.Config) -> None: ) config.add_variable( name="n_electron", - expression=lambda events: ak.num(events.Electron.pt, axis=1), + expression=lambda events: ak.num(events.Electron["pt"], axis=1), aux={"inputs": {"Electron.pt"}}, binning=(4, -0.5, 3.5), x_title="Number of electrons", @@ -343,7 +345,7 @@ def add_variables(config: od.Config) -> None: ) config.add_variable( name="n_muon", - expression=lambda events: ak.num(events.Muon.pt, axis=1), + expression=lambda events: ak.num(events.Muon["pt"], axis=1), aux={"inputs": {"Muon.pt"}}, binning=(4, -0.5, 3.5), x_title="Number of muons", @@ -351,7 +353,7 @@ def add_variables(config: od.Config) -> None: ) config.add_variable( name="n_bjet", - expression=lambda events: ak.num(events.Bjet.pt, axis=1), + expression=lambda events: ak.num(events.Bjet["pt"], axis=1), aux={"inputs": {"Bjet.pt"}}, binning=(4, -0.5, 3.5), x_title="Number of bjets", @@ -359,7 +361,7 @@ def add_variables(config: od.Config) -> None: ) config.add_variable( name="ht", - expression=lambda events: ak.sum(events.Jet.pt, axis=1), + expression=lambda events: ak.sum(events.Jet["pt"], axis=1), aux={"inputs": {"Jet.pt"}}, binning=(40, 0, 1200), unit="GeV", @@ -369,7 +371,7 @@ def add_variables(config: od.Config) -> None: config.add_variable( name="lt", expression=lambda events: ( - ak.sum(events.Muon.pt, axis=1) + ak.sum(events.Muon.pt, axis=1) + events[met_name].pt + ak.sum(events.Muon["pt"], axis=1) + ak.sum(events.Muon["pt"], axis=1) + events[met_name]["pt"] ), aux={"inputs": {"Muon.pt", "Electron.pt", "MET.pt"}}, binning=(40, 0, 1200), @@ -378,7 +380,7 @@ def add_variables(config: od.Config) -> None: ) config.add_variable( name="ht_bjet_norm", - expression=lambda events: ak.sum(events.Jet.pt, axis=1), + expression=lambda events: ak.sum(events.Jet["pt"], axis=1), aux={"inputs": {"Jet.pt"}}, binning=[0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1450, 1700, 2400], unit="GeV", diff --git a/hbw/production/normalized_weights.py b/hbw/production/normalized_weights.py index 3dc8948..324668e 100644 --- a/hbw/production/normalized_weights.py +++ b/hbw/production/normalized_weights.py @@ -26,7 +26,8 @@ def normalized_weight_factory( ) -> Callable: @producer( - uses=set(weight_producers) | set().union(*[w.produces for w in weight_producers]) | {"process_id"}, + # TODO: w.produces does not work as intended anymore, so we have to initialize the Producers here + uses=set(weight_producers) | set().union(*[w().produced_columns for w in weight_producers]) | {"process_id"}, cls_name=producer_name, mc_only=True, # skip the checking existence of used/produced columns because not all columns are there diff --git a/hbw/production/weights.py b/hbw/production/weights.py index b122a09..28e9e6f 100644 --- a/hbw/production/weights.py +++ b/hbw/production/weights.py @@ -241,7 +241,7 @@ def combined_normalization_weights_init(self: Producer) -> None: if self.dataset_inst.has_tag("is_hbv"): self.norm_weights_producer = stitched_normalization_weights_brs_from_processes - elif "dy_m50toinf" in self.dataset_inst.name: + elif "dy_" in self.dataset_inst.name: self.norm_weights_producer = stitched_normalization_weights else: self.norm_weights_producer = normalization_weights diff --git a/hbw/selection/stats.py b/hbw/selection/stats.py index 17d96cf..b7ab446 100644 --- a/hbw/selection/stats.py +++ b/hbw/selection/stats.py @@ -50,7 +50,7 @@ def hbw_selection_step_stats( @selector( - uses={increment_stats, event_weights_to_normalize, RAW_MET_COLUMN("pt")}, + uses={increment_stats, event_weights_to_normalize, RAW_MET_COLUMN("{pt,phi}")}, ) def hbw_increment_stats( self: Selector, diff --git a/modules/columnflow b/modules/columnflow index c4139ca..608ef91 160000 --- a/modules/columnflow +++ b/modules/columnflow @@ -1 +1 @@ -Subproject commit c4139cabf44246492b4afa8217750612d401d3d3 +Subproject commit 608ef912fa7ba33a82fa1e0af868380030fb3e75 From d4f57f0357817a64844fc9e12d5d1fa409ab4c16 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Mon, 13 Jan 2025 14:19:09 +0100 Subject: [PATCH 22/29] add missing variable titles --- hbw/config/variables.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hbw/config/variables.py b/hbw/config/variables.py index a1878a7..c1f6fef 100644 --- a/hbw/config/variables.py +++ b/hbw/config/variables.py @@ -569,6 +569,7 @@ def add_variables(config: od.Config) -> None: binning=(40, 0., 400.), unit="GeV", null_value=EMPTY_FLOAT, + x_title=f"Lepton {i} $p_{{T}}$", ) config.add_variable( name=f"lepton{i}_eta", @@ -579,6 +580,7 @@ def add_variables(config: od.Config) -> None: binning=(40, -3.2, 3.2), unit="GeV", null_value=EMPTY_FLOAT, + x_title=f"Lepton {i} $\eta$", ) config.add_variable( name=f"lepton{i}_phi", @@ -589,6 +591,7 @@ def add_variables(config: od.Config) -> None: binning=(50, -2.5, 2.5), unit="GeV", null_value=EMPTY_FLOAT, + x_title=f"Lepton {i} $\phi$", ) config.add_variable( name=f"lepton{i}_mass", @@ -599,6 +602,7 @@ def add_variables(config: od.Config) -> None: binning=(40, 0., 400.), unit="GeV", null_value=EMPTY_FLOAT, + x_title=f"Lepton {i} mass", ) for obj in ["Electron", "Muon"]: From ad5b98d402683401b62617a8b3280963159c70c3 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Mon, 13 Jan 2025 15:56:30 +0100 Subject: [PATCH 23/29] store ml inputs as float32 --- hbw/production/ml_inputs.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/hbw/production/ml_inputs.py b/hbw/production/ml_inputs.py index d5612ed..2fc06e0 100644 --- a/hbw/production/ml_inputs.py +++ b/hbw/production/ml_inputs.py @@ -147,7 +147,7 @@ def common_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array: # fill nan/none values of all produced columns for col in self.ml_input_columns: - events = set_ak_column(events, col, ak.fill_none(ak.nan_to_none(events[col]), ZERO_PADDING_VALUE)) + events = set_ak_column_f32(events, col, ak.fill_none(ak.nan_to_none(events[col]), ZERO_PADDING_VALUE)) check_column_bookkeeping(self, events) @@ -257,7 +257,7 @@ def sl_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array: # fill nan/none values of all produced columns for col in self.ml_input_columns: - events = set_ak_column(events, col, ak.fill_none(ak.nan_to_none(events[col]), ZERO_PADDING_VALUE)) + events = set_ak_column_f32(events, col, ak.fill_none(ak.nan_to_none(events[col]), ZERO_PADDING_VALUE)) check_column_bookkeeping(self, events) return events @@ -332,7 +332,7 @@ def dl_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array: # fill nan/none values of all produced columns for col in self.ml_input_columns: - events = set_ak_column(events, col, ak.fill_none(ak.nan_to_none(events[col]), ZERO_PADDING_VALUE)) + events = set_ak_column_f32(events, col, ak.fill_none(ak.nan_to_none(events[col]), ZERO_PADDING_VALUE)) check_column_bookkeeping(self, events) return events @@ -360,6 +360,9 @@ def dl_ml_inputs_init(self: Producer) -> None: check_variable_existence(self) +test_dl_ml_inputs = dl_ml_inputs.derive("test_dl_ml_inputs") + + @producer( uses={common_ml_inputs}, produces={common_ml_inputs}, @@ -437,7 +440,7 @@ def sl_res_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array: # fill nan/none values of all produced columns for col in self.ml_input_columns: - events = set_ak_column(events, col, ak.fill_none(ak.nan_to_none(events[col]), ZERO_PADDING_VALUE)) + events = set_ak_column_f32(events, col, ak.fill_none(ak.nan_to_none(events[col]), ZERO_PADDING_VALUE)) check_column_bookkeeping(self, events) return events From 63a6bf35645f2e6f8cacb43d9e1c8a9b967a5442 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Wed, 15 Jan 2025 17:25:37 +0100 Subject: [PATCH 24/29] add implementation + first set of (incomplete) trigger SFs --- hbw/config/config_run2.py | 17 ++++ hbw/production/trigger.py | 198 ++++++++++++++++++++++++-------------- hbw/production/weights.py | 63 +++--------- hbw/weight/default.py | 8 ++ 4 files changed, 166 insertions(+), 120 deletions(-) diff --git a/hbw/config/config_run2.py b/hbw/config/config_run2.py index 92e73b0..8089952 100644 --- a/hbw/config/config_run2.py +++ b/hbw/config/config_run2.py @@ -608,8 +608,22 @@ def add_external(name, value): add_external("jet_veto_map", (f"{json_mirror}/POG/JME/{corr_tag}/jetvetomaps.json.gz", "v1")) # electron scale factors add_external("electron_sf", (f"{json_mirror}/POG/EGM/{corr_tag}/electron.json.gz", "v1")) + add_external("electron_ss", (f"{json_mirror}/POG/EGM/{corr_tag}/electronSS.json.gz", "v1")) # muon scale factors add_external("muon_sf", (f"{json_mirror}/POG/MUO/{corr_tag}/muon_Z.json.gz", "v1")) + # trigger_sf from Balduin + # # files with uncertainties, not loadable because there are some NaNs in the json :/ + # trigger_sf_path = "/afs/desy.de/user/f/frahmmat/Projects/hh2bbww/data/software/trig_sf" + # add_external("trigger_sf_ee", (f"{trigger_sf_path}/sf_ee+Ele50_CaloI+DoubleEle33_mli_lep_pt-trig_ids.json", "v1")) + # add_external("trigger_sf_mm", (f"{trigger_sf_path}/sf_mm_mli_lep_pt-trig_ids.json", "v1")) + # add_external("trigger_sf_mixed", (f"{trigger_sf_path}/sf_mixed+Ele50_CaloI+DoubleEle33_mli_lep_pt-trig_ids.json", "v1")) # noqa: E501 + + # files without uncertainties and with wrong triggers + trigger_sf_path = "/nfs/dust/cms/user/letzerba/hh2bbww/data/cf_store/hbw_dl/cf.CalculateTriggerScaleFactors/c22post/nominal/calib__with_b_reg/sel__dl1_no_triggerV11__steps_no_trigger/prod__event_weightsV2__trigger_prodV2__pre_ml_catsV1__dl_ml_inputsV1/weight__ref_cut/datasets_4_10839b14e3/prod3/" # noqa: E501 + add_external("trigger_sf_ee", (f"{trigger_sf_path}/sf_ee_mli_lep_pt-trig_ids.json", "v1")) + add_external("trigger_sf_mm", (f"{trigger_sf_path}/sf_mm_mli_lep_pt-trig_ids.json", "v1")) + add_external("trigger_sf_mixed", (f"{trigger_sf_path}/sf_mixed_mli_lep_pt-trig_ids.json", "v1")) # noqa: E501 + # btag scale factor add_external("btag_sf_corr", (f"{json_mirror}/POG/BTV/{corr_tag}/btagging.json.gz", "v1")) # V+jets reweighting (derived for 13 TeV, custom json converted from ROOT, not centrally produced) @@ -710,6 +724,9 @@ def add_external(name, value): # Leptons "{Electron,Muon}.{pt,eta,phi,mass,charge,pdgId,jetRelIso,is_tight,dxy,dz}", "Electron.{deltaEtaSC,r9,seedGain}", "mll", + # isolations for testing + "Electron.{pfRelIso03_all,miniPFRelIso_all,mvaIso,mvaTTH}", + "Muon.{pfRelIso03_all,miniPFRelIso_all,mvaMuID,mvaTTH}", # MET "{MET,PuppiMET}.{pt,phi}", # all columns added during selection using a ColumnCollection flag, but skip cutflow ones diff --git a/hbw/production/trigger.py b/hbw/production/trigger.py index 61ea829..bb08c15 100644 --- a/hbw/production/trigger.py +++ b/hbw/production/trigger.py @@ -6,98 +6,122 @@ from __future__ import annotations +import functools + +# from dataclasses import dataclass + from columnflow.production import Producer, producer from columnflow.util import maybe_import, InsertableDict -from columnflow.columnar_util import set_ak_column, flat_np_view, layout_ak_array +from columnflow.columnar_util import set_ak_column, fill_at +from columnflow.production.cms.muon import muon_weights, MuonSFConfig + +from hbw.production.prepare_objects import prepare_objects np = maybe_import("numpy") ak = maybe_import("awkward") -@producer( - uses={ - "Trigger.pt", "Trigger.eta", +set_ak_column_f32 = functools.partial(set_ak_column, value_type=np.float32) +fill_at_f32 = functools.partial(fill_at, value_type=np.float32) + + +# @dataclass +# class TriggerSFConfig: + + +from hbw.categorization.categories import catid_2e, catid_2mu, catid_emu + + +# NOTE: dummy up/down variation at the moment +trigger_sf_config = { + "trigger_sf_ee": { + "corr_keys": { + "nominal": "sf_ee_mli_lep_pt-trig_ids", + "up": "sf_ee_mli_lep_pt-trig_ids", + "down": "sf_ee_mli_lep_pt-trig_ids", + }, + "category": catid_2e, + }, + "trigger_sf_mm": { + "corr_keys": { + "nominal": "sf_mm_mli_lep_pt-trig_ids", + "up": "sf_mm_mli_lep_pt-trig_ids", + "down": "sf_mm_mli_lep_pt-trig_ids", + }, + "category": catid_2mu, }, + "trigger_sf_mixed": { + "corr_keys": { + "nominal": "sf_mixed_mli_lep_pt-trig_ids", + "up": "sf_mixed_mli_lep_pt-trig_ids", + "down": "sf_mixed_mli_lep_pt-trig_ids", + }, + "category": catid_emu, + }, +} + + +@producer( + uses={"{Electron,Muon}.{pt,eta,phi,mass}", prepare_objects}, # produces in the init # only run on mc mc_only=True, # function to determine the correction file - get_trigger_file=(lambda self, external_files: external_files.trigger_sf), - # function to determine the trigger weight config - # get_trigger_config=(lambda self: self.config_inst.x.trigger_sf_names), + trigger_sf_config=trigger_sf_config, weight_name="trigger_weight", ) -def trigger_weights( +def dl_trigger_weights( self: Producer, events: ak.Array, trigger_mask: ak.Array | type(Ellipsis) = Ellipsis, **kwargs, ) -> ak.Array: """ - Creates trigger weights using the correctionlib. Requires an external file in the config under - ``trigger_sf``: + Creates trigger weights using custom trigger SF jsons. + """ - .. code-block:: python + events = self[prepare_objects](events, **kwargs) - cfg.x.external_files = DotDict.wrap({ - "trigger_sf": "/afs/cern.ch/work/m/mrieger/public/mirrors/jsonpog-integration-9ea86c4c/POG/MUO/2017_UL/trigger_z.json.gz", # noqa - }) + variable_map = { + "mli_lep_pt": events.Lepton[:, 0].pt, + } - *get_trigger_file* can be adapted in a subclass in case it is stored differently in the external - files. + full_mask = ak.zeros_like(events.event, dtype=bool) - The name of the correction set and the year string for the weight evaluation should be given as - an auxiliary entry in the config: + for key, corr_set in self.correction_sets.items(): + sf_config = self.trigger_sf_config[key] - .. code-block:: python + categorizer = sf_config["category"] + events, mask = self[categorizer](events, **kwargs) - cfg.x.trigger_sf_names = ("NUM_TightRelIso_DEN_TightIDandIPCut", "2017_UL") + # ensure that no event is assigned to multiple categories + if ak.any(mask & full_mask): + raise Exception(f"Overlapping categories in {dl_trigger_weights.cls_name}") + full_mask = mask | full_mask - *get_trigger_config* can be adapted in a subclass in case it is stored differently in the config. + for sys, corr_key in sf_config["corr_keys"].items(): + sysfix = "" if sys == "nominal" else f"_{sys}" + col_name = f"{self.weight_name}{sysfix}" + if col_name not in events.fields: + events = set_ak_column_f32(events, col_name, ak.ones_like(events.event)) - Optionally, a *trigger_mask* can be supplied to compute the scale factor weight based only on a - subset of triggers. - """ - # flat absolute eta and pt views - abs_eta = flat_np_view(abs(events.Trigger.eta[trigger_mask]), axis=1) - pt = flat_np_view(events.Trigger.pt[trigger_mask], axis=1) + corr = corr_set[corr_key] + inputs = [variable_map[inp.name] for inp in corr.inputs] - variable_map = { - "year": self.year, - "abseta": abs_eta, - "eta": abs_eta, - "pt": pt, - } + _sf = corr.evaluate(*inputs) - # loop over systematics - for syst, postfix in [ - ("sf", ""), - ("systup", "_up"), - ("systdown", "_down"), - ]: - # get the inputs for this type of variation - variable_map_syst = { - **variable_map, - "scale_factors": "nominal" if syst == "sf" else syst, # syst key in 2022 - "ValType": syst, # syst key in 2017 - } - inputs = [variable_map_syst[inp.name] for inp in self.trigger_sf_corrector.inputs] - sf_flat = self.trigger_sf_corrector(*inputs) - - # add the correct layout to it - sf = layout_ak_array(sf_flat, events.Trigger.pt[trigger_mask]) - - # create the product over all triggers in one event - weight = ak.prod(sf, axis=1, mask_identity=False) - - # store it - events = set_ak_column(events, f"{self.weight_name}{postfix}", weight, value_type=np.float32) + events = fill_at_f32( + ak_array=events, + where=mask, + route=col_name, + value=_sf, + ) return events -@trigger_weights.requires -def trigger_weights_requires(self: Producer, reqs: dict) -> None: +@dl_trigger_weights.requires +def dl_trigger_weights_requires(self: Producer, reqs: dict) -> None: if "external_files" in reqs: return @@ -105,30 +129,58 @@ def trigger_weights_requires(self: Producer, reqs: dict) -> None: reqs["external_files"] = BundleExternalFiles.req(self.task) -@trigger_weights.setup -def trigger_weights_setup( +@dl_trigger_weights.setup +def dl_trigger_weights_setup( self: Producer, reqs: dict, inputs: dict, reader_targets: InsertableDict, ) -> None: - bundle = reqs["external_files"] + bundle_files = reqs["external_files"].files # create the corrector import correctionlib - correctionlib.highlevel.Correction.__call__ = correctionlib.highlevel.Correction.evaluate - correction_set = correctionlib.CorrectionSet.from_string( - self.get_trigger_file(bundle.files), - ) - corrector_name, self.year = self.get_trigger_config() - self.trigger_sf_corrector = correction_set[corrector_name] + self.correction_sets = {} + for key, sf_config in self.trigger_sf_config.items(): + target = bundle_files[key] + correction_set = correctionlib.CorrectionSet.from_string(target.load(formatter="json")) + self.correction_sets[key] = correction_set - # check versions - if self.supported_versions and self.trigger_sf_corrector.version not in self.supported_versions: - raise Exception(f"unsuppprted trigger sf corrector version {self.trigger_sf_corrector.version}") - -@trigger_weights.init -def trigger_weights_init(self: Producer, **kwargs) -> None: +@dl_trigger_weights.init +def dl_trigger_weights_init(self: Producer, **kwargs) -> None: weight_name = self.weight_name self.produces |= {weight_name, f"{weight_name}_up", f"{weight_name}_down"} + + for key, sf_config in self.trigger_sf_config.items(): + self.uses.add(sf_config["category"]) + + +muon_trigger_weights = muon_weights.derive("muon_trigger_weights", cls_dict={ + "weight_name": "muon_trigger_weight", + "get_muon_config": (lambda self: MuonSFConfig.new(self.config_inst.x.muon_trigger_sf_names)), +}) + + +@producer( + uses={muon_trigger_weights}, +) +def sl_trigger_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array: + """ + Producer that calculates the single lepton trigger weights. + NOTE: this only includes the trigger weights from the muon channel. They should be combined with + the electron trigger weights in this producer. + """ + if not self.config_inst.has_aux("muon_trigger_sf_names"): + raise Exception(f"In {sl_trigger_weights.__name__}: missing 'muon_trigger_sf_names' in config") + + # compute muon trigger SF weights (NOTE: trigger SFs are only defined for muons with + # pt > 26 GeV, so create a copy of the events array with with all muon pt < 26 GeV set to 26 GeV) + trigger_sf_events = set_ak_column_f32(events, "Muon.pt", ak.where(events.Muon.pt > 26., events.Muon.pt, 26.)) + trigger_sf_events = self[muon_trigger_weights](trigger_sf_events, **kwargs) + for route in self[muon_trigger_weights].produced_columns: + events = set_ak_column_f32(events, route, route.apply(trigger_sf_events)) + # memory cleanup + del trigger_sf_events + + return events diff --git a/hbw/production/weights.py b/hbw/production/weights.py index 28e9e6f..6e297f2 100644 --- a/hbw/production/weights.py +++ b/hbw/production/weights.py @@ -27,6 +27,7 @@ from hbw.production.normalized_weights import normalized_weight_factory from hbw.production.normalized_btag import normalized_btag_weights from hbw.production.dataset_normalization import dataset_normalization_weight +from hbw.production.trigger import sl_trigger_weights, dl_trigger_weights from hbw.util import has_tag, IF_DY, IF_TOP @@ -129,10 +130,7 @@ def event_weights_to_normalize_init(self) -> None: "weight_name": "muon_iso_weight", "get_muon_config": (lambda self: MuonSFConfig.new(self.config_inst.x.muon_id_sf_names)), }) -muon_trigger_weights = muon_weights.derive("muon_trigger_weights", cls_dict={ - "weight_name": "muon_trigger_weight", - "get_muon_config": (lambda self: MuonSFConfig.new(self.config_inst.x.muon_trigger_sf_names)), -}) + @producer( @@ -151,43 +149,6 @@ def muon_id_iso_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array: return events -@producer( - uses={muon_trigger_weights}, -) -def sl_trigger_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array: - """ - Producer that calculates the single lepton trigger weights. - """ - if not self.config_inst.has_aux("muon_trigger_sf_names"): - raise Exception(f"In {sl_trigger_weights.__name__}: missing 'muon_trigger_sf_names' in config") - - # compute muon trigger SF weights (NOTE: trigger SFs are only defined for muons with - # pt > 26 GeV, so create a copy of the events array with with all muon pt < 26 GeV set to 26 GeV) - trigger_sf_events = set_ak_column_f32(events, "Muon.pt", ak.where(events.Muon.pt > 26., events.Muon.pt, 26.)) - trigger_sf_events = self[muon_trigger_weights](trigger_sf_events, **kwargs) - for route in self[muon_trigger_weights].produced_columns: - events = set_ak_column_f32(events, route, route.apply(trigger_sf_events)) - # memory cleanup - del trigger_sf_events - - return events - - -def sl_trigger_weights_skip_func(self: Producer) -> bool: - if not getattr(self, "config_inst", None) or not getattr(self, "dataset_inst", None): - # do not skip when config or dataset is not set - return False - - if self.config_inst.x.lepton_tag == "sl": - # do not skip when lepton tag is single lepton - return False - else: - return True - - -sl_trigger_weights.skip_func = sl_trigger_weights_skip_func - - @producer( uses={ normalization_weights, @@ -266,7 +227,7 @@ def combined_normalization_weights_init(self: Producer) -> None: normalized_pu_weights, }, mc_only=True, - version=law.config.get_expanded("analysis", "event_weights_version", 1), + version=law.config.get_expanded("analysis", "event_weights_version", 2), ) def event_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array: """ @@ -295,9 +256,8 @@ def event_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array: if not has_tag("skip_muon_weights", self.config_inst, self.dataset_inst, operator=any): events = self[muon_id_iso_weights](events, **kwargs) - if self.config_inst.x.lepton_tag == "sl": - # compute single lepton trigger SF weights - events = self[sl_trigger_weights](events, **kwargs) + if not has_tag("skip_trigger_weights", self.config_inst, self.dataset_inst, operator=any): + events = self[self.trigger_weights_producer](events, **kwargs) # normalize event weights using stats events = self[normalized_pu_weights](events, **kwargs) @@ -324,8 +284,13 @@ def event_weights_init(self: Producer) -> None: self.uses |= {muon_id_iso_weights} self.produces |= {muon_id_iso_weights} - self.uses |= {sl_trigger_weights} - self.produces |= {sl_trigger_weights} + if not has_tag("skip_trigger_weights", self.config_inst, self.dataset_inst, operator=any): + self.trigger_weights_producer = ( + sl_trigger_weights if self.config_inst.x.lepton_tag == "sl" + else dl_trigger_weights + ) + self.uses |= {self.trigger_weights_producer} + self.produces |= {self.trigger_weights_producer} if not has_tag("skip_btag_weights", self.config_inst, self.dataset_inst, operator=any): self.uses |= {btag_weights, normalized_btag_weights} @@ -358,3 +323,7 @@ def large_weights_killer(self: Producer, events: ak.Array, **kwargs) -> ak.Array events = set_ak_column(events, "mc_weight", ak.where(weight_too_large, 0, events.mc_weight)) return events + + +# for testing +test_event_weights = event_weights.derive("test_event_weights") diff --git a/hbw/weight/default.py b/hbw/weight/default.py index ef5db3c..01a1097 100644 --- a/hbw/weight/default.py +++ b/hbw/weight/default.py @@ -191,6 +191,14 @@ def base_init(self: WeightProducer) -> None: "vjets_weight": [], # TODO: corrections/shift missing "stitched_normalization_weight": [], }}) +with_trigger_weight = default_weight_producer.derive("with_trigger_weight", cls_dict={"weight_columns": { + **default_correction_weights, + "vjets_weight": [], # TODO: corrections/shift missing + "trigger_weight": [], # TODO: corrections/shift missing + "stitched_normalization_weight": [], +}}) + + base.derive("unstitched", cls_dict={"weight_columns": { **default_correction_weights, "normalization_weight": [], }}) From 99847e705435efc60845e50ce0bfcb6c380d1f7f Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Wed, 15 Jan 2025 17:26:13 +0100 Subject: [PATCH 25/29] update applied jetId --- hbw/selection/jet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hbw/selection/jet.py b/hbw/selection/jet.py index 9141b92..83df528 100644 --- a/hbw/selection/jet.py +++ b/hbw/selection/jet.py @@ -55,7 +55,7 @@ def jet_selection( jet_mask_loose = ( (events.Jet.pt >= self.jet_pt) & (abs(events.Jet.eta) <= 2.4) & - (events.Jet.jetId >= 2) # 1: loose, 2: tight, 4: isolated, 6: tight+isolated + (events.Jet.jetId >= 6) # 1: loose, 2: tight, 4: isolated, 6: tight+isolated ) electron = events.Electron[lepton_results.objects.Electron.LooseElectron] @@ -64,7 +64,7 @@ def jet_selection( jet_mask = ( (events.Jet.pt >= self.jet_pt) & (abs(events.Jet.eta) <= 2.4) & - (events.Jet.jetId >= 2) & # 1: loose, 2: tight, 4: isolated, 6: tight+isolated + (events.Jet.jetId >= 6) & # 1: loose, 2: tight, 4: isolated, 6: tight+isolated # ak.all(events.Jet.metric_table(lepton_results.x.lepton) > 0.4, axis=2) ak.all(events.Jet.metric_table(electron) > 0.4, axis=2) & ak.all(events.Jet.metric_table(muon) > 0.4, axis=2) From 2a0a7bb73313fbed05eaad95a8e2d001cd58a388 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Wed, 15 Jan 2025 17:30:53 +0100 Subject: [PATCH 26/29] minor configuration --- hbw/config/datasets.py | 5 +++-- hbw/config/defaults_and_groups.py | 4 +++- hbw/config/variables.py | 15 +++++++++------ hbw/production/weights.py | 1 - 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/hbw/config/datasets.py b/hbw/config/datasets.py index a4041f1..8acdbfd 100644 --- a/hbw/config/datasets.py +++ b/hbw/config/datasets.py @@ -152,8 +152,9 @@ def hbw_dataset_names(config: od.Config, as_list: bool = False) -> DotDict[str: "wmh_hzg_zll_powheg", "tth_hbb_powheg", "tth_hnonbb_powheg", # overlap with other samples, so be careful - "ttzh_madgraph", - "ttwh_madgraph", + # TODO: no cross sections setup for these samples + # "ttzh_madgraph", + # "ttwh_madgraph", ]), ], "hh_ggf_hbb_hvv": [ diff --git a/hbw/config/defaults_and_groups.py b/hbw/config/defaults_and_groups.py index bb59762..fc16a50 100644 --- a/hbw/config/defaults_and_groups.py +++ b/hbw/config/defaults_and_groups.py @@ -225,7 +225,9 @@ def set_config_defaults_and_groups(config_inst): "sl_much_boosted": ["sr__1mu__boosted"], "sl_ech_boosted": ["sr__1e__boosted"], "dl": ["sr", "dycr", "ttcr", "sr__1b", "sr__2b", "dycr__1b", "dycr__2b", "ttcr__1b", "ttcr__2b"], - "dl_preml": bracket_expansion(["incl", "{sr,ttcr,dycr}{,__2e,__2mu,__emu}{,__1b,__2b}"]), + "dl_preml_small": bracket_expansion(["incl", "{sr,ttcr,dycr}{,__2e,__2mu,__emu}__resolved{,__1b,__2b}"]), + "dl_preml_large": bracket_expansion(["incl", "{,sr__,ttcr__,dycr__}{,2e__,2mu__,emu__}resolved{,__1b,__2b}"]), + "dl_preml_boosted": bracket_expansion(["{,sr__,ttcr__,dycr__}{,2e__,2mu__,emu__}boosted"]), "dl_ttcr": ["ttcr", "ttcr__1b", "ttcr__2b", "ttcr__2e", "ttcr__2mu", "ttcr__emu"], "dl_dycr": ["dycr", "dycr__1b", "dycr__2b", "dycr__2e", "dycr__2mu", "dycr__emu"], "dl_sr": ["sr", "sr__1b", "sr__2b", "sr__2e", "sr__2mu", "sr__emu"], diff --git a/hbw/config/variables.py b/hbw/config/variables.py index c1f6fef..ad0cf22 100644 --- a/hbw/config/variables.py +++ b/hbw/config/variables.py @@ -560,9 +560,12 @@ def add_variables(config: od.Config) -> None: # Leptons for i in range(2): + # NOTE: inputs aux is only being used when the expression is a function and not a string; + # to define expression as a function, define as lambda function with passing i=i to avoid + # the late binding issue config.add_variable( name=f"lepton{i}_pt", - expression=f"Lepton[:, {i}].pt", + expression=lambda events, i=i: events.Lepton[:, i].pt, aux=dict( inputs={"{Electron,Muon}.{pt,eta,phi,mass}"}, ), @@ -573,29 +576,29 @@ def add_variables(config: od.Config) -> None: ) config.add_variable( name=f"lepton{i}_eta", - expression=f"Lepton[:, {i}].eta", + expression=lambda events, i=i: events.Lepton[:, i].eta, aux=dict( inputs={"{Electron,Muon}.{pt,eta,phi,mass}"}, ), binning=(40, -3.2, 3.2), unit="GeV", null_value=EMPTY_FLOAT, - x_title=f"Lepton {i} $\eta$", + x_title=f"Lepton {i} $\\eta$", ) config.add_variable( name=f"lepton{i}_phi", - expression=f"Lepton[:, {i}].phi", + expression=lambda events, i=i: events.Lepton[:, i].phi, aux=dict( inputs={"{Electron,Muon}.{pt,eta,phi,mass}"}, ), binning=(50, -2.5, 2.5), unit="GeV", null_value=EMPTY_FLOAT, - x_title=f"Lepton {i} $\phi$", + x_title=f"Lepton {i} $\\phi$", ) config.add_variable( name=f"lepton{i}_mass", - expression=f"Lepton[:, {i}].mass", + expression=lambda events, i=i: events.Lepton[:, i].mass, aux=dict( inputs={"{Electron,Muon}.{pt,eta,phi,mass}"}, ), diff --git a/hbw/production/weights.py b/hbw/production/weights.py index 6e297f2..fd431f3 100644 --- a/hbw/production/weights.py +++ b/hbw/production/weights.py @@ -132,7 +132,6 @@ def event_weights_to_normalize_init(self) -> None: }) - @producer( uses={muon_id_weights, muon_iso_weights}, produces={muon_id_weights, muon_iso_weights}, From 8c4f8f5672dcb2b3af5fc66d4f2c288aeb1000c1 Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Fri, 17 Jan 2025 08:44:26 +0100 Subject: [PATCH 27/29] update columnflow + fixes --- hbw/production/dataset_normalization.py | 2 - hbw/production/normalized_btag.py | 2 - hbw/production/normalized_weights.py | 2 - hbw/production/process_ids.py | 2 +- hbw/tasks/corrections.py | 2 - hbw/tasks/inspection.py | 2 - hbw/tasks/resolve_dummy.py | 59 +++++++++++++++++++++++++ modules/columnflow | 2 +- 8 files changed, 61 insertions(+), 12 deletions(-) create mode 100644 hbw/tasks/resolve_dummy.py diff --git a/hbw/production/dataset_normalization.py b/hbw/production/dataset_normalization.py index eb92353..cb69dac 100644 --- a/hbw/production/dataset_normalization.py +++ b/hbw/production/dataset_normalization.py @@ -50,9 +50,7 @@ def dataset_normalization_weight_requires(self: Producer, reqs: dict) -> None: from columnflow.tasks.selection import MergeSelectionStats reqs["selection_stats"] = MergeSelectionStats.req( self.task, - tree_index=0, branch=-1, - _exclude=MergeSelectionStats.exclude_params_forest_merge, ) diff --git a/hbw/production/normalized_btag.py b/hbw/production/normalized_btag.py index 7c6bbad..6b3903f 100644 --- a/hbw/production/normalized_btag.py +++ b/hbw/production/normalized_btag.py @@ -151,9 +151,7 @@ def normalized_btag_weights_from_json_requires(self: Producer, reqs: dict) -> No from columnflow.tasks.selection import MergeSelectionStats reqs["selection_stats"] = MergeSelectionStats.req( self.task, - tree_index=0, branch=-1, - _exclude=MergeSelectionStats.exclude_params_forest_merge, ) diff --git a/hbw/production/normalized_weights.py b/hbw/production/normalized_weights.py index 324668e..074fc3b 100644 --- a/hbw/production/normalized_weights.py +++ b/hbw/production/normalized_weights.py @@ -89,9 +89,7 @@ def normalized_weight_requires(self: Producer, reqs: dict) -> None: from columnflow.tasks.selection import MergeSelectionStats reqs["selection_stats"] = MergeSelectionStats.req( self.task, - tree_index=0, branch=-1, - _exclude=MergeSelectionStats.exclude_params_forest_merge, ) @normalized_weight.setup diff --git a/hbw/production/process_ids.py b/hbw/production/process_ids.py index 25f78b8..c326bfb 100644 --- a/hbw/production/process_ids.py +++ b/hbw/production/process_ids.py @@ -213,7 +213,7 @@ def dy_nlo_process_producer(self: Producer, events: ak.Array, **kwargs) -> ak.Ar """ n_partons = events.LHE.NpNLO - genjet_mask = (events.GenJet.pt >= 20) & (abs(events.GenJet.eta) < 2.4) + genjet_mask = (events.GenJet["pt"] >= 20) & (abs(events.GenJet["eta"]) < 2.4) genjet = (events.GenJet[genjet_mask]) hf_genjet_mask = (genjet.hadronFlavour == 4) | (genjet.hadronFlavour == 5) is_hf = ak.any(hf_genjet_mask, axis=1) diff --git a/hbw/tasks/corrections.py b/hbw/tasks/corrections.py index 841da01..2ed4c5a 100644 --- a/hbw/tasks/corrections.py +++ b/hbw/tasks/corrections.py @@ -65,9 +65,7 @@ def requires(self): dataset.name: self.reqs.MergeSelectionStats.req( self, dataset=dataset.name, - tree_index=0, branch=-1, - _exclude=self.reqs.MergeSelectionStats.exclude_params_forest_merge, ) for dataset in self.dataset_insts } diff --git a/hbw/tasks/inspection.py b/hbw/tasks/inspection.py index be49ac7..df60235 100644 --- a/hbw/tasks/inspection.py +++ b/hbw/tasks/inspection.py @@ -76,9 +76,7 @@ def requires(self): reqs[dataset] = self.reqs.MergeSelectionStats.req( self, dataset=dataset, - tree_index=0, branch=-1, - _exclude=self.reqs.MergeSelectionStats.exclude_params_forest_merge, ) return reqs diff --git a/hbw/tasks/resolve_dummy.py b/hbw/tasks/resolve_dummy.py new file mode 100644 index 0000000..4e1788a --- /dev/null +++ b/hbw/tasks/resolve_dummy.py @@ -0,0 +1,59 @@ +import law + +from columnflow.util import DotDict + +from columnflow.tasks.framework.base import MultiConfigTask +from columnflow.tasks.framework.remote import RemoteWorkflow +from columnflow.tasks.framework.mixins import ( + CalibratorsMixin, SelectorStepsMixin, ProducersMixin, MLModelsMixin, WeightProducerMixin, + CategoriesMixin, HistHookMixin, MultiConfigDatasetsProcessesMixin, + # ShiftSourcesMixin, +) +from columnflow.tasks.framework.plotting import ( + ProcessPlotSettingMixin, VariablePlotSettingMixin, PlotBase, +) + +from hbw.util import timeit_multiple + + +class ResolveDummy( + HistHookMixin, + VariablePlotSettingMixin, + ProcessPlotSettingMixin, + MultiConfigDatasetsProcessesMixin, + CategoriesMixin, + MLModelsMixin, + WeightProducerMixin, + ProducersMixin, + SelectorStepsMixin, + CalibratorsMixin, + MultiConfigTask, + law.LocalWorkflow, + RemoteWorkflow, +): + plot_function = PlotBase.plot_function.copy( + default="columnflow.plotting.plot_functions_2d.plot_2d", + add_default_to_description=True, + ) + + def create_branch_map(self): + return [ + DotDict({"category": cat_name, "variable": var_name}) + for cat_name in sorted(self.categories) + for var_name in sorted(self.variables) + ] + + @classmethod + @timeit_multiple + def resolve_param_values(cls, params): + params = super().resolve_param_values(params) + return params + + def run(self): + pass + + def output(self): + output = { + "always_incomplete_dummy": self.target("dummy.txt"), + } + return output diff --git a/modules/columnflow b/modules/columnflow index 608ef91..cb362cf 160000 --- a/modules/columnflow +++ b/modules/columnflow @@ -1 +1 @@ -Subproject commit 608ef912fa7ba33a82fa1e0af868380030fb3e75 +Subproject commit cb362cf8b00b074847de4ff270836599230b9fbd From c8bd74f47a349ea42dba1feb175e6bfcdc5cf80f Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Fri, 17 Jan 2025 10:51:03 +0100 Subject: [PATCH 28/29] cleanup variables --- hbw/config/defaults_and_groups.py | 2 +- hbw/config/variables.py | 21 ++++++++++----------- hbw/selection/common.py | 10 ++++++++++ hbw/selection/dl_remastered.py | 6 +++++- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/hbw/config/defaults_and_groups.py b/hbw/config/defaults_and_groups.py index fc16a50..87d7cf1 100644 --- a/hbw/config/defaults_and_groups.py +++ b/hbw/config/defaults_and_groups.py @@ -310,7 +310,7 @@ def set_config_defaults_and_groups(config_inst): "met_{pt,phi}", "jet{0,1,2,3}_{pt,eta,phi,mass,btagpnetb}", "bjet{0,1}_{pt,eta,phi,mass,btagpnetb}", - "ht", "lt", "mll", "ptll", + "ht", "lt", "mll", "ptll", "npvs", ]), "dl_resolved": ["n_*", "electron_*", "muon_*", "met_*", "jet*", "bjet*", "ht", "lt", "mll", "ptll"], "dl_boosted": ["n_*", "electron_*", "muon_*", "met_*", "fatjet_*", "lt", "mll", "ptll"], diff --git a/hbw/config/variables.py b/hbw/config/variables.py index ad0cf22..98d42e8 100644 --- a/hbw/config/variables.py +++ b/hbw/config/variables.py @@ -191,7 +191,7 @@ def add_variables(config: od.Config) -> None: name="high_jet_pt_strcat", # NOTE: for some reason passing the string directly produces ValueError due to different shapes, e.g. # ValueError: cannot broadcast RegularArray of size 7 with RegularArray of size 264 - expression=lambda events: ak.where(events.Jet.pt > 50, ["high_pt"], ["low_pt"]), + expression=lambda events: ak.where(events.Jet["pt"] > 50, ["high_pt"], ["low_pt"]), aux={ "inputs": {"Jet.pt"}, "axis_type": "strcat", @@ -203,7 +203,7 @@ def add_variables(config: od.Config) -> None: # h[{"high_jet_pt_intcat": hist.loc(0)}] picks the bin with value 0 config.add_variable( name="high_jet_pt_intcat", - expression=lambda events: ak.where(events.Jet.pt > 50, 1, 0), + expression=lambda events: ak.where(events.Jet["pt"] > 50, 1, 0), aux={ "inputs": {"Jet.pt"}, "axis_type": "intcat", @@ -212,7 +212,7 @@ def add_variables(config: od.Config) -> None: ) config.add_variable( name="high_jet_pt_bool", - expression=lambda events: events.Jet.pt > 50, + expression=lambda events: events.Jet["pt"] > 50, aux={ "inputs": {"Jet.pt"}, "axis_type": "bool", @@ -258,19 +258,18 @@ def add_variables(config: od.Config) -> None: config.add_variable( name="n_jet", - expression=lambda events: ak.num(events.Jet.pt, axis=1), + expression=lambda events: ak.num(events.Jet["pt"], axis=1), aux={"inputs": {"Jet.pt"}}, binning=(12, -0.5, 11.5), x_title="Number of jets", discrete_x=True, ) - if config.x.run == 2: deepjet_wps = config.x.btag_working_points.deepjet config.add_variable( name="n_deepjet_loose", expression=lambda events: ak.sum(events.Jet.btagDeepFlavB > deepjet_wps.loose, axis=1), - aux={"inputs": {"Jet.pt", "Jet.btagDeepFlavB"}}, + aux={"inputs": {"Jet.btagDeepFlavB"}}, binning=(7, -0.5, 6.5), x_title="Number of deepjets (loose WP)", discrete_x=True, @@ -278,7 +277,7 @@ def add_variables(config: od.Config) -> None: config.add_variable( name="n_deepjet_medium", expression=lambda events: ak.sum(events.Jet.btagDeepFlavB > deepjet_wps.medium, axis=1), - aux={"inputs": {"Jet.pt", "Jet.btagDeepFlavB"}}, + aux={"inputs": {"Jet.btagDeepFlavB"}}, binning=(7, -0.5, 6.5), x_title="Number of deepjets (medium WP)", discrete_x=True, @@ -286,7 +285,7 @@ def add_variables(config: od.Config) -> None: config.add_variable( name="n_deepjet_tight", expression=lambda events: ak.sum(events.Jet.btagDeepFlavB > deepjet_wps.tight, axis=1), - aux={"inputs": {"Jet.pt", "Jet.btagDeepFlavB"}}, + aux={"inputs": {"Jet.btagDeepFlavB"}}, binning=(7, -0.5, 6.5), x_title="Number of deepjets (tight WP)", discrete_x=True, @@ -296,7 +295,7 @@ def add_variables(config: od.Config) -> None: config.add_variable( name="n_particlenet_loose", expression=lambda events: ak.sum(events.Jet.btagPNetB > particlenet_wps.loose, axis=1), - aux={"inputs": {"Jet.pt", "Jet.btagPNetB"}}, + aux={"inputs": {"Jet.btagPNetB"}}, binning=(7, -0.5, 6.5), x_title="Number of pnet jets (loose WP)", discrete_x=True, @@ -304,7 +303,7 @@ def add_variables(config: od.Config) -> None: config.add_variable( name="n_particlenet_medium", expression=lambda events: ak.sum(events.Jet.btagPNetB > particlenet_wps.medium, axis=1), - aux={"inputs": {"Jet.pt", "Jet.btagPNetB"}}, + aux={"inputs": {"Jet.btagPNetB"}}, binning=(7, -0.5, 6.5), x_title="Number of pnet jets (medium WP)", discrete_x=True, @@ -312,7 +311,7 @@ def add_variables(config: od.Config) -> None: config.add_variable( name="n_particlenet_tight", expression=lambda events: ak.sum(events.Jet.btagPNetB > particlenet_wps.tight, axis=1), - aux={"inputs": {"Jet.pt", "Jet.btagPNetB"}}, + aux={"inputs": {"Jet.btagPNetB"}}, binning=(7, -0.5, 6.5), x_title="Number of pnet jets (tight WP)", discrete_x=True, diff --git a/hbw/selection/common.py b/hbw/selection/common.py index abf90ea..af91ee4 100644 --- a/hbw/selection/common.py +++ b/hbw/selection/common.py @@ -312,3 +312,13 @@ def configure_selector(self: Selector): self.config_inst.x.btag_wp_score = ( self.config_inst.x.btag_working_points[self.config_inst.x.b_tagger][self.config_inst.x.btag_wp] ) + + btag_column = self.config_inst.x.btag_column + self.config_inst.add_variable( + name="n_btag", + expression=lambda events: ak.num(events.Jet[btag_column] > self.config_inst.x.btag_wp_score, axis=1), + aux={"inputs": {f"Jet.{btag_column}"}}, + binning=(7, -0.5, 6.5), + x_title=f"Number of b-tagged jets ({btag_column})", + discrete_x=True, + ) diff --git a/hbw/selection/dl_remastered.py b/hbw/selection/dl_remastered.py index 486038d..d3412d2 100644 --- a/hbw/selection/dl_remastered.py +++ b/hbw/selection/dl_remastered.py @@ -8,6 +8,8 @@ from collections import defaultdict +import law + from cmsdb.constants import m_z from columnflow.util import maybe_import, DotDict @@ -211,7 +213,7 @@ def dl_lepton_selection_init(self: Selector) -> None: b_tagger=None, btag_wp=None, n_btag=None, - version=1, + version=law.config.get_expanded("analysis", "dl1_version", 2), ) @timeit def dl1( @@ -294,6 +296,8 @@ def dl1_init(self: Selector) -> None: # by only adding the used selectors in the init configure_selector(self) + # NOTE: since we add these uses so late, init's of these Producers will not run + # e.g. during Plotting tasks self.uses = { pre_selection, vbf_jet_selection, dl_boosted_jet_selection, From 8399405f7f0452bf65d627bc5d4094ad6301cb7b Mon Sep 17 00:00:00 2001 From: Mathis Frahm Date: Fri, 17 Jan 2025 11:32:44 +0100 Subject: [PATCH 29/29] rename IF_DY deferred column --- hbw/production/weights.py | 6 +++--- hbw/util.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hbw/production/weights.py b/hbw/production/weights.py index fd431f3..8b6f4d4 100644 --- a/hbw/production/weights.py +++ b/hbw/production/weights.py @@ -28,7 +28,7 @@ from hbw.production.normalized_btag import normalized_btag_weights from hbw.production.dataset_normalization import dataset_normalization_weight from hbw.production.trigger import sl_trigger_weights, dl_trigger_weights -from hbw.util import has_tag, IF_DY, IF_TOP +from hbw.util import has_tag, IF_VJETS, IF_TOP np = maybe_import("numpy") @@ -40,8 +40,8 @@ @producer( - uses={IF_TOP(gen_parton_top), IF_DY(gen_v_boson), pu_weight}, - produces={IF_TOP(gen_parton_top), IF_DY(gen_v_boson), pu_weight}, + uses={IF_TOP(gen_parton_top), IF_VJETS(gen_v_boson), pu_weight}, + produces={IF_TOP(gen_parton_top), IF_VJETS(gen_v_boson), pu_weight}, mc_only=True, ) def event_weights_to_normalize(self: Producer, events: ak.Array, results: SelectionResult, **kwargs) -> ak.Array: diff --git a/hbw/util.py b/hbw/util.py index 09ee8f6..598f3ec 100644 --- a/hbw/util.py +++ b/hbw/util.py @@ -666,7 +666,7 @@ def IF_MC(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[ @deferred_column -def IF_DY(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: +def IF_VJETS(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: if getattr(func, "dataset_inst", None) is None: return self.get()