From 2074153fae326ea7494349cb126232f711f2ec35 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 20 Dec 2024 15:51:15 +0100
Subject: [PATCH 01/29] minor fixes

---
 hbw/config/datasets.py | 10 ++++++----
 hbw/ml/base.py         |  4 +---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/hbw/config/datasets.py b/hbw/config/datasets.py
index a919347..6b3eaa6 100644
--- a/hbw/config/datasets.py
+++ b/hbw/config/datasets.py
@@ -116,10 +116,12 @@ def hbw_dataset_names(config: od.Config, as_list: bool = False) -> DotDict[str:
         ],
         "ttv": [
             "ttw_wlnu_amcatnlo",
-            "ttz_zll_m4to50_amcatnlo",
-            "ttz_zll_m50toinf_amcatnlo",
-            "ttz_znunu_amcatnlo",
-            "ttz_zqq_amcatnlo",
+            *config.x.if_era(run=3, values=[
+                "ttz_zll_m4to50_amcatnlo",
+                "ttz_zll_m50toinf_amcatnlo",
+                "ttz_znunu_amcatnlo",
+                "ttz_zqq_amcatnlo",
+            ]),
         ],
         "h": [
             *config.x.if_era(run=3, values=[
diff --git a/hbw/ml/base.py b/hbw/ml/base.py
index e03c3bd..9a22c72 100644
--- a/hbw/ml/base.py
+++ b/hbw/ml/base.py
@@ -210,7 +210,7 @@ def setup(self) -> None:
                         expression=f"mlscore.{proc}",
                         null_value=-1,
                         binning=(1000, 0., 1.),
-                        x_title=f"DNN output score {config_inst.get_process(proc).x.ml_label}",
+                        x_title=f"DNN output score {config_inst.get_process(proc).x('ml_label', proc)}",
                         aux={
                             "rebin": 25,
                             "rebin_config": {
@@ -305,11 +305,9 @@ def output(self, task: law.Task) -> dict[str, law.FileSystemTarget]:
         # declare the main target
         target = task.target(f"mlmodel_f{task.branch}of{self.folds}", dir=True)
 
-        # TODO: cleanup (produce plots, stats in separate task)
         outp = {
             "mlmodel": target,
             "plots": target.child("plots", type="d", optional=True),
-            # "dummy": target.child("dummy", type="d", optional=True),
             "checkpoint": target.child("checkpoint", type="d", optional=True),
         }
 

From 82ecce32acc724ecdbb70e216353d26fae05360b Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 20 Dec 2024 15:51:26 +0100
Subject: [PATCH 02/29] switch MET column in calibration and add FatJet
 calibration

---
 hbw/calibration/default.py | 102 +++++++++++++++++++++++++----
 hbw/config/config_run2.py  | 130 +++++++++++++++++--------------------
 2 files changed, 148 insertions(+), 84 deletions(-)

diff --git a/hbw/calibration/default.py b/hbw/calibration/default.py
index 405d3da..331ade8 100644
--- a/hbw/calibration/default.py
+++ b/hbw/calibration/default.py
@@ -7,11 +7,12 @@
 import law
 
 from columnflow.calibration import Calibrator, calibrator
+from columnflow.calibration.cms.met import met_phi
 from columnflow.calibration.cms.jets import jec, jer
 from columnflow.production.cms.seeds import deterministic_seeds
 from columnflow.util import maybe_import
 
-from hbw.calibration.jet import jec_nominal, bjet_regression
+from hbw.calibration.jet import bjet_regression
 
 ak = maybe_import("awkward")
 
@@ -19,13 +20,67 @@
 logger = law.logger.get_logger(__name__)
 
 
+@calibrator(
+    # jec uncertainty_sources: set to None to use config default
+    jec_sources=["Total"],
+    version=1,
+)
+def fatjet(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
+    """
+    FatJet calibrator, combining JEC and JER.
+    """
+    if self.task.local_shift != "nominal":
+        raise Exception("FatJet Calibrator should not be run for shifts other than nominal")
+
+    # apply the fatjet JEC and JER
+    events = self[self.fatjet_jec_cls](events, **kwargs)
+    if self.dataset_inst.is_mc:
+        events = self[self.fatjet_jer_cls](events, **kwargs)
+
+    return events
+
+
+@fatjet.init
+def fatjet_init(self: Calibrator) -> None:
+    if not getattr(self, "dataset_inst", None):
+        return
+
+    # list of calibrators to apply (in that order)
+    self.calibrators = []
+
+    fatjet_jec_cls_dict = {
+        "jet_name": "FatJet",
+        "gen_jet_name": "GenJetAK8",
+        # MET propagation is performed in AK4 jet calibrator; fatjet should never use any MET columns
+        "propagate_met": False,
+        "met_name": "DO_NOT_USE",
+        "raw_met_name": "DO_NOT_USE",
+    }
+    fatjet_jer_cls_dict = fatjet_jec_cls_dict.copy()
+    # NOTE: deterministic FatJet seeds are not yet possible to produce
+    # fatjet_jer_cls_dict["deterministic_seed_index"] = 0
+
+    uncertainty_sources = [] if self.dataset_inst.is_data else self.jec_sources
+    jec_cls_name = f"fatjet_jec{'_nominal' if uncertainty_sources == [] else ''}"
+    self.fatjet_jec_cls = jec.derive(jec_cls_name, cls_dict={
+        **fatjet_jec_cls_dict,
+        "uncertainty_sources": uncertainty_sources,
+    })
+    self.fatjet_jer_cls = jer.derive("deterministic_fatjet_jer", cls_dict=fatjet_jer_cls_dict)
+
+    self.uses |= {self.fatjet_jec_cls, self.fatjet_jer_cls}
+    self.produces |= {self.fatjet_jec_cls, self.fatjet_jer_cls}
+
+
 @calibrator(
     uses={deterministic_seeds},
     produces={deterministic_seeds},
-    skip_jecunc=True,
+    # jec uncertainty_sources: set to None to use config default
+    jec_sources=["Total"],
     bjet_regression=True,
+    version=1,
 )
-def base(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
+def jet_base(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
     events = self[deterministic_seeds](events, **kwargs)
 
     logger.info(f"Running calibrators '{[calib.cls_name for calib in self.calibrators]}' (in that order)")
@@ -35,31 +90,50 @@ def base(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
     return events
 
 
-@base.init
-def base_init(self: Calibrator) -> None:
+@jet_base.init
+def jet_base_init(self: Calibrator) -> None:
     if not getattr(self, "dataset_inst", None):
         return
 
+    met_name = self.config_inst.x.met_name
+    raw_met_name = self.config_inst.x.raw_met_name
+
     # list of calibrators to apply (in that order)
     self.calibrators = []
 
-    if self.dataset_inst.is_data or self.skip_jecunc:
-        self.calibrators.append(jec_nominal)
-    else:
-        self.calibrators.append(jec)
+    uncertainty_sources = [] if self.dataset_inst.is_data else self.jec_sources
+    jec_cls_name = f"ak4_jec{'_nominal' if uncertainty_sources == [] else ''}"
+
+    jec_cls = jec.derive(
+        jec_cls_name,
+        cls_dict={
+            "uncertainty_sources": uncertainty_sources,
+            "met_name": met_name,
+            "raw_met_name": raw_met_name,
+        },
+    )
+    self.calibrators.append(jec_cls)
 
     if self.bjet_regression:
         self.calibrators.append(bjet_regression)
 
     # run JER only on MC
     if self.dataset_inst.is_mc:
-        self.calibrators.append(jer)
+        # version of jer that uses the first random number from deterministic_seeds
+        deterministic_jer_cls = jer.derive(
+            "deterministic_jer",
+            cls_dict={"deterministic_seed_index": 0, "met_name": met_name},
+        )
+        self.calibrators.append(deterministic_jer_cls)
+
+    if self.config_inst.x.run == 2:
+        # derive met_phi calibrator (currently only for run 2)
+        met_phi_cls = met_phi.derive("met_phi", cls_dict={"met_name": met_name})
+        self.calibrators.append(met_phi_cls)
 
     self.uses |= set(self.calibrators)
     self.produces |= set(self.calibrators)
 
 
-default = base.derive("default", cls_dict=dict(skip_jecunc=False, bjet_regression=False))
-skip_jecunc = base.derive("skip_jecunc", cls_dict=dict(skip_jecunc=True, bjet_regression=False))
-with_b_reg = base.derive("with_b_reg", cls_dict=dict(skip_jecunc=True, bjet_regression=True))
-full = base.derive("full", cls_dict=dict(skip_jecunc=False, bjet_regression=True))
+skip_jecunc = jet_base.derive("skip_jecunc", cls_dict=dict(bjet_regression=False))
+with_b_reg = jet_base.derive("with_b_reg", cls_dict=dict(bjet_regression=True))
diff --git a/hbw/config/config_run2.py b/hbw/config/config_run2.py
index 7fac547..d5718e1 100644
--- a/hbw/config/config_run2.py
+++ b/hbw/config/config_run2.py
@@ -192,83 +192,62 @@ def if_era(
     if cfg.x.run == 2:
         jerc_campaign = f"Summer19UL{year2}{jerc_postfix}"
         jet_type = "AK4PFchs"
+        fatjet_type = "AK8PFchs"
     elif cfg.x.run == 3:
         jerc_campaign = f"Summer{year2}{jerc_postfix}_22Sep2023"
         jet_type = "AK4PFPuppi"
+        fatjet_type = "AK8PFPuppi"
+
+    jec_uncertainties = [
+        # NOTE: there are many more sources available, but it is likely that we only need Total
+        "Total",
+        # "CorrelationGroupMPFInSitu",
+        # "CorrelationGroupIntercalibration",
+        # "CorrelationGroupbJES",
+        # "CorrelationGroupFlavor",
+        # "CorrelationGroupUncorrelated",
+    ]
 
-    cfg.x.jec = DotDict.wrap({"Jet": {
-        "campaign": jerc_campaign,
-        "version": {2016: "V7", 2017: "V5", 2018: "V5", 2022: "V2"}[year],
-        "jet_type": jet_type,
-        "levels": ["L1FastJet", "L2Relative", "L2L3Residual", "L3Absolute"],
-        "levels_for_type1_met": ["L1FastJet"],
-        "uncertainty_sources": [
-            # "AbsoluteStat",
-            # "AbsoluteScale",
-            # "AbsoluteSample",
-            # "AbsoluteFlavMap",
-            # "AbsoluteMPFBias",
-            # "Fragmentation",
-            # "SinglePionECAL",
-            # "SinglePionHCAL",
-            # "FlavorQCD",
-            # "TimePtEta",
-            # "RelativeJEREC1",
-            # "RelativeJEREC2",
-            # "RelativeJERHF",
-            # "RelativePtBB",
-            # "RelativePtEC1",
-            # "RelativePtEC2",
-            # "RelativePtHF",
-            # "RelativeBal",
-            # "RelativeSample",
-            # "RelativeFSR",
-            # "RelativeStatFSR",
-            # "RelativeStatEC",
-            # "RelativeStatHF",
-            # "PileUpDataMC",
-            # "PileUpPtRef",
-            # "PileUpPtBB",
-            # "PileUpPtEC1",
-            # "PileUpPtEC2",
-            # "PileUpPtHF",
-            # "PileUpMuZero",
-            # "PileUpEnvelope",
-            # "SubTotalPileUp",
-            # "SubTotalRelative",
-            # "SubTotalPt",
-            # "SubTotalScale",
-            # "SubTotalAbsolute",
-            # "SubTotalMC",
-            "Total",
-            # "TotalNoFlavor",
-            # "TotalNoTime",
-            # "TotalNoFlavorNoTime",
-            # "FlavorZJet",
-            # "FlavorPhotonJet",
-            # "FlavorPureGluon",
-            # "FlavorPureQuark",
-            # "FlavorPureCharm",
-            # "FlavorPureBottom",
-            # "TimeRunA",
-            # "TimeRunB",
-            # "TimeRunC",
-            # "TimeRunD",
-            "CorrelationGroupMPFInSitu",
-            "CorrelationGroupIntercalibration",
-            "CorrelationGroupbJES",
-            "CorrelationGroupFlavor",
-            "CorrelationGroupUncorrelated",
-        ],
-    }})
+    cfg.x.jec = DotDict.wrap({
+        # NOTE: currently, we set the uncertainty_sources in the calibrator itself
+        "Jet": {
+            "campaign": jerc_campaign,
+            "version": {2016: "V7", 2017: "V5", 2018: "V5", 2022: "V2"}[year],
+            "jet_type": jet_type,
+            "external_file_key": "jet_jerc",
+            "levels": ["L1FastJet", "L2Relative", "L2L3Residual", "L3Absolute"],
+            "levels_for_type1_met": ["L1FastJet"],
+            "uncertainty_sources": jec_uncertainties,
+        },
+        "FatJet": {
+            "campaign": jerc_campaign,
+            "version": {2016: "V7", 2017: "V5", 2018: "V5", 2022: "V2"}[year],
+            "jet_type": fatjet_type,
+            "external_file_key": "fat_jet_jerc",
+            "levels": ["L1FastJet", "L2Relative", "L2L3Residual", "L3Absolute"],
+            "levels_for_type1_met": ["L1FastJet"],
+            "uncertainty_sources": jec_uncertainties,
+        },
+    })
 
     # JER
     # https://twiki.cern.ch/twiki/bin/view/CMS/JetResolution?rev=107
-    cfg.x.jer = DotDict.wrap({"Jet": {
-        "campaign": jerc_campaign,
-        "version": {2016: "JRV3", 2017: "JRV2", 2018: "JRV2", 2022: "JRV1"}[year],
-        "jet_type": jet_type,
-    }})
+    cfg.x.jer = DotDict.wrap({
+        "Jet": {
+            "campaign": jerc_campaign,
+            "version": {2016: "JRV3", 2017: "JRV2", 2018: "JRV2", 2022: "JRV1"}[year],
+            "jet_type": jet_type,
+            "external_file_key": "jet_jerc",
+        },
+        "FatJet": {
+            "campaign": jerc_campaign,
+            "version": {2016: "JRV3", 2017: "JRV2", 2018: "JRV2", 2022: "JRV1"}[year],
+            # "jet_type": "fatjet_type",
+            # JER info only for AK4 jets, stored in AK4 file
+            "jet_type": jet_type,
+            "external_file_key": "jet_jerc",
+        },
+    })
 
     # JEC uncertainty sources propagated to btag scale factors
     # (names derived from contents in BTV correctionlib file)
@@ -344,6 +323,16 @@ def if_era(
     }[cfg.x.run]
     cfg.x.btag_wp = "medium"
 
+    # met configuration
+    cfg.x.met_name = {
+        2: "MET",
+        3: "PuppiMET",
+    }[cfg.x.run]
+    cfg.x.raw_met_name = {
+        2: "RawMET",
+        3: "RawPuppiMET",
+    }[cfg.x.run]
+
     # top pt reweighting parameters
     # https://twiki.cern.ch/twiki/bin/viewauth/CMS/TopPtReweighting#TOP_PAG_corrections_based_on_dat?rev=31
     cfg.x.top_pt_reweighting_params = {
@@ -603,6 +592,7 @@ def add_external(name, value):
     add_external("pu_sf", (f"{json_mirror}/POG/LUM/{corr_tag}/puWeights.json.gz", "v1"))
     # jet energy correction
     add_external("jet_jerc", (f"{json_mirror}/POG/JME/{corr_tag}/jet_jerc.json.gz", "v1"))
+    add_external("fat_jet_jerc", (f"{json_mirror}/POG/JME/{corr_tag}/fatJet_jerc.json.gz", "v1"))
     # jet veto map
     add_external("jet_veto_map", (f"{json_mirror}/POG/JME/{corr_tag}/jetvetomaps.json.gz", "v1"))
     # electron scale factors

From a35f433f3bb03259a8d91fc42bb55efbb1f1abd4 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 20 Dec 2024 16:13:06 +0100
Subject: [PATCH 03/29] add correct met_column in downstream modules

---
 hbw/categorization/categories.py    |  10 ++-
 hbw/config/variables.py             |  12 +--
 hbw/production/ml_inputs.py         |  53 ++++++-----
 hbw/production/neutrino.py          |   7 +-
 hbw/production/prepare_objects.py   |   9 +-
 hbw/production/resonant_features.py |   3 +-
 hbw/production/synchronization.py   |   5 +-
 hbw/production/trigger.py           | 134 ++++++++++++++++++++++++++++
 hbw/scripts/synchronization.py      |   5 +-
 hbw/util.py                         |  20 +++++
 10 files changed, 214 insertions(+), 44 deletions(-)
 create mode 100644 hbw/production/trigger.py

diff --git a/hbw/categorization/categories.py b/hbw/categorization/categories.py
index c5799e3..bfde28f 100644
--- a/hbw/categorization/categories.py
+++ b/hbw/categorization/categories.py
@@ -13,6 +13,8 @@
 from columnflow.selection import SelectionResult
 from columnflow.columnar_util import has_ak_column, optional_column
 
+from hbw.util import MET_COLUMN
+
 np = maybe_import("numpy")
 ak = maybe_import("awkward")
 
@@ -160,15 +162,15 @@ def catid_fake(
     return events, mask
 
 
-@categorizer(uses={"MET.pt"})
+@categorizer(uses={MET_COLUMN("pt")})
 def catid_highmet(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]:
-    mask = events.MET.pt >= 20
+    mask = events[self.config_inst.x.met_name].pt >= 20
     return events, mask
 
 
-@categorizer(uses={"MET.pt"})
+@categorizer(uses={MET_COLUMN("pt")})
 def catid_lowmet(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]:
-    mask = events.MET.pt < 20
+    mask = events[self.config_inst.x.met_name].pt < 20
     return events, mask
 
 #
diff --git a/hbw/config/variables.py b/hbw/config/variables.py
index 20fbe34..c48992a 100644
--- a/hbw/config/variables.py
+++ b/hbw/config/variables.py
@@ -365,10 +365,11 @@ def add_variables(config: od.Config) -> None:
         unit="GeV",
         x_title="HT",
     )
+    met_name = config.x.met_name
     config.add_variable(
         name="lt",
         expression=lambda events: (
-            ak.sum(events.Muon.pt, axis=1) + ak.sum(events.Muon.pt, axis=1) + events.MET.pt
+            ak.sum(events.Muon.pt, axis=1) + ak.sum(events.Muon.pt, axis=1) + events[met_name].pt
         ),
         aux={"inputs": {"Muon.pt", "Electron.pt", "MET.pt"}},
         binning=(40, 0, 1200),
@@ -646,16 +647,17 @@ def add_variables(config: od.Config) -> None:
         )
 
     # MET
+
     config.add_variable(
         name="met_pt",
-        expression="MET.pt",
+        expression=f"{met_name}.pt",
         binning=(40, 0., 400.),
         unit="GeV",
-        x_title=r"MET $p_{T}$",
+        x_title=r"{met_name} $p_{{T}}$".format(met_name=met_name),
     )
     config.add_variable(
         name="met_phi",
-        expression="MET.phi",
+        expression=f"{met_name}.phi",
         binning=(40, -3.2, 3.2),
-        x_title=r"MET $\phi$",
+        x_title=r"{met_name} $\phi$".format(met_name=met_name),
     )
diff --git a/hbw/production/ml_inputs.py b/hbw/production/ml_inputs.py
index c56f4f9..d5612ed 100644
--- a/hbw/production/ml_inputs.py
+++ b/hbw/production/ml_inputs.py
@@ -17,6 +17,8 @@
 from hbw.config.dl.variables import add_dl_ml_variables
 from hbw.config.sl_res.variables import add_sl_res_ml_variables
 
+from hbw.util import MET_COLUMN
+
 ak = maybe_import("awkward")
 np = maybe_import("numpy")
 
@@ -60,7 +62,7 @@ def check_column_bookkeeping(self: Producer, events: ak.Array) -> None:
         prepare_objects,
         "HbbJet.msoftdrop",
         "{Electron,Muon,Jet,Bjet,Lightjet,VBFJet,HbbJet}.{pt,eta,phi,mass}",
-        "MET.{pt,phi}",
+        MET_COLUMN("pt"), MET_COLUMN("phi"),
     },
     # produced columns set in the init function
 )
@@ -71,6 +73,8 @@ def common_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
     # add behavior and define new collections (e.g. Lepton)
     events = self[prepare_objects](events, **kwargs)
 
+    met_name = self.config_inst.x.met_name
+
     # object padding
     events = set_ak_column(events, "Lightjet", ak.pad_none(events.Lightjet, 2))
     events = set_ak_column(events, "Bjet", ak.pad_none(events.Bjet, 2))
@@ -99,12 +103,12 @@ def common_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
 
     events = set_ak_column_f32(events, "mli_lep_pt", events.Lepton[:, 0].pt)
     events = set_ak_column_f32(events, "mli_lep_eta", events.Lepton[:, 0].eta)
-    events = set_ak_column_f32(events, "mli_met_pt", events.MET.pt)
-    events = set_ak_column_f32(events, "mli_met_phi", events.MET.phi)
+    events = set_ak_column_f32(events, "mli_met_pt", events[met_name].pt)
+    events = set_ak_column_f32(events, "mli_met_phi", events[met_name].phi)
 
     # general
     events = set_ak_column_f32(events, "mli_ht", ak.sum(events.Jet.pt, axis=1))
-    events = set_ak_column_f32(events, "mli_lt", ak.sum(events.Lepton.pt, axis=1) + events.MET.pt)
+    events = set_ak_column_f32(events, "mli_lt", ak.sum(events.Lepton.pt, axis=1) + events[met_name].pt)
     events = set_ak_column_f32(events, "mli_n_jet", ak.num(events.Jet.pt, axis=1))
 
     # vbf jet pair features
@@ -197,6 +201,7 @@ def sl_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
     """
     Producer used for ML Training in the SL analysis.
     """
+    met_name = self.config_inst.x.met_name
     # produce common input features
     events = self[common_ml_inputs](events, **kwargs)
 
@@ -209,9 +214,9 @@ def sl_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
 
     # wlnu features
     # NOTE: we might want to consider neutrino reconstruction or transverse masses instead when including MET
-    wlnu = events.MET + events.Lepton[:, 0]
+    wlnu = events[met_name] + events.Lepton[:, 0]
     events = set_ak_column_f32(events, "mli_mlnu", wlnu.mass)
-    events = set_ak_column_f32(events, "mli_dphi_lnu", abs(events.Lepton[:, 0].delta_phi(events.MET)))
+    events = set_ak_column_f32(events, "mli_dphi_lnu", abs(events.Lepton[:, 0].delta_phi(events[met_name])))
     events = set_ak_column_f32(events, "mli_dphi_wl", abs(wlnu.delta_phi(events.Lepton[:, 0])))
 
     # hww features
@@ -230,8 +235,8 @@ def sl_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
     events = set_ak_column_f32(events, "mli_dphi_bb_jjl", abs(hbb.delta_phi(hww_vis)))
     events = set_ak_column_f32(events, "mli_dr_bb_jjl", hbb.delta_r(hww_vis))
 
-    events = set_ak_column_f32(events, "mli_dphi_bb_nu", abs(hbb.delta_phi(events.MET)))
-    events = set_ak_column_f32(events, "mli_dphi_jj_nu", abs(wjj.delta_phi(events.MET)))
+    events = set_ak_column_f32(events, "mli_dphi_bb_nu", abs(hbb.delta_phi(events[met_name])))
+    events = set_ak_column_f32(events, "mli_dphi_jj_nu", abs(wjj.delta_phi(events[met_name])))
     events = set_ak_column_f32(events, "mli_dr_bb_l", hbb.delta_r(events.Lepton[:, 0]))
     events = set_ak_column_f32(events, "mli_dr_jj_l", hbb.delta_r(events.Lepton[:, 0]))
 
@@ -243,8 +248,8 @@ def sl_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
     events = set_ak_column_f32(events, "mli_mbbjjl", hh_vis.mass)
 
     s_min = (
-        2 * events.MET.pt * ((hh_vis.mass ** 2 + hh_vis.energy ** 2) ** 0.5 -
-        hh_vis.pt * np.cos(hh_vis.delta_phi(events.MET)) + hh_vis.mass ** 2)
+        2 * events[met_name].pt * ((hh_vis.mass ** 2 + hh_vis.energy ** 2) ** 0.5 -
+        hh_vis.pt * np.cos(hh_vis.delta_phi(events[met_name])) + hh_vis.mass ** 2)
     ) ** 0.5
     events = set_ak_column_f32(events, "mli_s_min", s_min)
 
@@ -294,6 +299,7 @@ def dl_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
     """
     Producer used for ML Training in the DL analysis.
     """
+    met_name = self.config_inst.x.met_name
     # produce common input features
     events = self[common_ml_inputs](events, **kwargs)
 
@@ -307,7 +313,7 @@ def dl_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
     hll = (events.Lepton[:, 0] + events.Lepton[:, 1])
     events = set_ak_column_f32(events, "mli_ll_pt", hll.pt)
     events = set_ak_column_f32(events, "mli_mll", hll.mass)
-    events = set_ak_column_f32(events, "mli_mllMET", (hll + events.MET[:]).mass)
+    events = set_ak_column_f32(events, "mli_mllMET", (hll + events[met_name][:]).mass)
     events = set_ak_column_f32(events, "mli_dr_ll", events.Lepton[:, 0].delta_r(events.Lepton[:, 1]))
     events = set_ak_column_f32(events, "mli_dphi_ll", events.Lepton[:, 0].delta_phi(events.Lepton[:, 1]))
 
@@ -319,10 +325,10 @@ def dl_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
 
     # hh system
     hbb = (events.Bjet[:, 0] + events.Bjet[:, 1]) * 1  # NOTE: *1 so it is a Lorentzvector not a candidate vector
-    events = set_ak_column_f32(events, "mli_mbbllMET", (hll + hbb + events.MET[:]).mass)
-    events = set_ak_column_f32(events, "mli_dr_bb_llMET", hbb.delta_r(hll + events.MET[:]))
-    events = set_ak_column_f32(events, "mli_dphi_bb_nu", abs(hbb.delta_phi(events.MET)))
-    events = set_ak_column_f32(events, "mli_dphi_bb_llMET", hbb.delta_phi(hll + events.MET[:]))
+    events = set_ak_column_f32(events, "mli_mbbllMET", (hll + hbb + events[met_name][:]).mass)
+    events = set_ak_column_f32(events, "mli_dr_bb_llMET", hbb.delta_r(hll + events[met_name][:]))
+    events = set_ak_column_f32(events, "mli_dphi_bb_nu", abs(hbb.delta_phi(events[met_name])))
+    events = set_ak_column_f32(events, "mli_dphi_bb_llMET", hbb.delta_phi(hll + events[met_name][:]))
 
     # fill nan/none values of all produced columns
     for col in self.ml_input_columns:
@@ -363,6 +369,7 @@ def sl_res_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
     """
     Producer used for ML Training in the SL analysis.
     """
+    met_name = self.config_inst.x.met_name
     # produce common input features
     events = self[common_ml_inputs](events, **kwargs)
 
@@ -382,8 +389,8 @@ def sl_res_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
     events = set_ak_column_f32(events, "mli_phi_jj", wjj.phi)
 
     # wlnu features
-    wlnu = events.MET + events.Lepton[:, 0]
-    events = set_ak_column_f32(events, "mli_dphi_lnu", abs(events.Lepton[:, 0].delta_phi(events.MET)))
+    wlnu = events[met_name] + events.Lepton[:, 0]
+    events = set_ak_column_f32(events, "mli_dphi_lnu", abs(events.Lepton[:, 0].delta_phi(events[met_name])))
     # NOTE: this column can be set to nan value
     events = set_ak_column_f32(events, "mli_mlnu", wlnu.mass)
     events = set_ak_column_f32(events, "mli_pt_lnu", wlnu.pt)
@@ -410,10 +417,10 @@ def sl_res_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
     events = set_ak_column_f32(events, "mli_dphi_bb_jjl", abs(hbb.delta_phi(hww_vis)))
     events = set_ak_column_f32(events, "mli_dr_bb_jjl", hbb.delta_r(hww_vis))
 
-    events = set_ak_column_f32(events, "mli_dphi_bb_nu", abs(hbb.delta_phi(events.MET)))
-    events = set_ak_column_f32(events, "mli_dphi_jj_nu", abs(wjj.delta_phi(events.MET)))
-    events = set_ak_column_f32(events, "mli_dr_bb_l", hbb.delta_r(events.MET))
-    events = set_ak_column_f32(events, "mli_dr_jj_l", hbb.delta_r(events.MET))
+    events = set_ak_column_f32(events, "mli_dphi_bb_nu", abs(hbb.delta_phi(events[met_name])))
+    events = set_ak_column_f32(events, "mli_dphi_jj_nu", abs(wjj.delta_phi(events[met_name])))
+    events = set_ak_column_f32(events, "mli_dr_bb_l", hbb.delta_r(events[met_name]))
+    events = set_ak_column_f32(events, "mli_dr_jj_l", hbb.delta_r(events[met_name]))
 
     # hh features
     hh = hbb + hww
@@ -423,8 +430,8 @@ def sl_res_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
     events = set_ak_column_f32(events, "mli_mbbjjl", hh_vis.mass)
 
     s_min = (
-        2 * events.MET.pt * ((hh_vis.mass ** 2 + hh_vis.energy ** 2) ** 0.5 -
-        hh_vis.pt * np.cos(hh_vis.delta_phi(events.MET)) + hh_vis.mass ** 2)
+        2 * events[met_name].pt * ((hh_vis.mass ** 2 + hh_vis.energy ** 2) ** 0.5 -
+        hh_vis.pt * np.cos(hh_vis.delta_phi(events[met_name])) + hh_vis.mass ** 2)
     ) ** 0.5
     events = set_ak_column_f32(events, "mli_s_min", s_min)
 
diff --git a/hbw/production/neutrino.py b/hbw/production/neutrino.py
index 963f32e..0483100 100644
--- a/hbw/production/neutrino.py
+++ b/hbw/production/neutrino.py
@@ -39,6 +39,7 @@ def neutrino_reconstruction(self: Producer, events: ak.Array, **kwargs) -> ak.Ar
 
     TODO: reference
     """
+    met_name = self.config_inst.x.met_name
     # add behavior and define new collections (e.g. Lepton)
     events = self[prepare_objects](events, **kwargs)
 
@@ -49,9 +50,9 @@ def neutrino_reconstruction(self: Producer, events: ak.Array, **kwargs) -> ak.Ar
     E_l = events.Lepton.E[:, 0]
     pt_l = events.Lepton.pt[:, 0]
     pz_l = events.Lepton.pz[:, 0]
-    pt_nu = events.MET.pt
+    pt_nu = events[met_name].pt
 
-    delta_phi = abs(events.Lepton[:, 0].delta_phi(events.MET))
+    delta_phi = abs(events.Lepton[:, 0].delta_phi(events[met_name]))
     mu = w_mass**2 / 2 + pt_nu * pt_l * np.cos(delta_phi)
 
     # Neutrino pz will be calculated as: pz_nu = A +- sqrt(B-C)
@@ -86,7 +87,7 @@ def neutrino_reconstruction(self: Producer, events: ak.Array, **kwargs) -> ak.Ar
         p_nu_1 = np.sqrt(pt_nu**2 + pz_nu**2)
         eta_nu_1 = np.log((p_nu_1 + pz_nu) / (p_nu_1 - pz_nu)) / 2
         # store Neutrino 4 vector components
-        events[f"Neutrino{i}"] = events.MET
+        events[f"Neutrino{i}"] = events[met_name]
         events = set_ak_column_f32(events, f"Neutrino{i}.eta", eta_nu_1)
 
         # sanity check: Neutrino pz should be the same as pz_nu within rounding errors
diff --git a/hbw/production/prepare_objects.py b/hbw/production/prepare_objects.py
index fdc73c5..7c32069 100644
--- a/hbw/production/prepare_objects.py
+++ b/hbw/production/prepare_objects.py
@@ -110,9 +110,10 @@ def prepare_objects(self: Producer, events: ak.Array, results: SelectionResult =
         events = set_ak_column(events, "Lepton", lepton[ak.argsort(lepton.pt, ascending=False)])
 
     # transform MET into 4-vector
-    if "MET" in events.fields:
-        events["MET"] = set_ak_column(events.MET, "mass", 0)
-        events["MET"] = set_ak_column(events.MET, "eta", 0)
-        events["MET"] = ak.with_name(events["MET"], "PtEtaPhiMLorentzVector")
+    met_name = self.config_inst.x.met_name
+    if met_name in events.fields:
+        events[met_name] = set_ak_column(events[met_name], "mass", 0)
+        events[met_name] = set_ak_column(events[met_name], "eta", 0)
+        events[met_name] = ak.with_name(events[met_name], "PtEtaPhiMLorentzVector")
 
     return events
diff --git a/hbw/production/resonant_features.py b/hbw/production/resonant_features.py
index d8aafe5..b408344 100644
--- a/hbw/production/resonant_features.py
+++ b/hbw/production/resonant_features.py
@@ -53,6 +53,7 @@ def resonant_features(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
     #                                           \
     #                                            q'
     #
+    met_name = self.config_inst.x.met_name
 
     # object padding
     events = set_ak_column(events, "Jet", ak.pad_none(events.Jet, 2))
@@ -64,7 +65,7 @@ def resonant_features(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
     if "Whadron" not in events.fields:
         events = set_ak_column(events, "Whadron", events.Lightjet[:, 0] + events.Lightjet[:, 1])
     if "Wlepton" not in events.fields:
-        events = set_ak_column(events, "Wlepton", events.Lepton[:, 0] + events.MET[:])
+        events = set_ak_column(events, "Wlepton", events.Lepton[:, 0] + events[met_name][:])
     if "Higgs_WW" not in events.fields:
         events = set_ak_column(events, "Higgs_WW", events.Whadron[:] + events.Wlepton[:])
     if "Higgs_bb" not in events.fields:
diff --git a/hbw/production/synchronization.py b/hbw/production/synchronization.py
index b1aa3a5..4ce0dbe 100644
--- a/hbw/production/synchronization.py
+++ b/hbw/production/synchronization.py
@@ -19,6 +19,7 @@
 
 
 def get_columns_to_store(config_inst):
+    met_name = config_inst.x.met_name
     columns_to_store = {
         "event_nr": lambda events: events.event,
         "run_nr": lambda events: events.run,
@@ -66,8 +67,8 @@ def get_columns_to_store(config_inst):
         "ak8jet0_eta": lambda events: events.HbbJet.eta[:, 0],
         "ak8jet0_phi": lambda events: events.HbbJet.phi[:, 0],
         "ak8jet0_msoftdrop": lambda events: events.HbbJet.msoftdrop[:, 0],
-        "met_pt": lambda events: events.MET.pt,
-        "met_phi": lambda events: events.MET.phi,
+        "met_pt": lambda events: events[met_name].pt,
+        "met_phi": lambda events: events[met_name].phi,
         "mc_weight": lambda events: events.mc_weight,
         "normalized_pu_weight": lambda events: events.pu_weight,
         "normalized_btag_weight": lambda events: events.normalized_btag_weight,
diff --git a/hbw/production/trigger.py b/hbw/production/trigger.py
new file mode 100644
index 0000000..61ea829
--- /dev/null
+++ b/hbw/production/trigger.py
@@ -0,0 +1,134 @@
+# coding: utf-8
+
+"""
+Trigger related event weights.
+"""
+
+from __future__ import annotations
+
+from columnflow.production import Producer, producer
+from columnflow.util import maybe_import, InsertableDict
+from columnflow.columnar_util import set_ak_column, flat_np_view, layout_ak_array
+
+np = maybe_import("numpy")
+ak = maybe_import("awkward")
+
+
+@producer(
+    uses={
+        "Trigger.pt", "Trigger.eta",
+    },
+    # produces in the init
+    # only run on mc
+    mc_only=True,
+    # function to determine the correction file
+    get_trigger_file=(lambda self, external_files: external_files.trigger_sf),
+    # function to determine the trigger weight config
+    # get_trigger_config=(lambda self: self.config_inst.x.trigger_sf_names),
+    weight_name="trigger_weight",
+)
+def trigger_weights(
+    self: Producer,
+    events: ak.Array,
+    trigger_mask: ak.Array | type(Ellipsis) = Ellipsis,
+    **kwargs,
+) -> ak.Array:
+    """
+    Creates trigger weights using the correctionlib. Requires an external file in the config under
+    ``trigger_sf``:
+
+    .. code-block:: python
+
+        cfg.x.external_files = DotDict.wrap({
+            "trigger_sf": "/afs/cern.ch/work/m/mrieger/public/mirrors/jsonpog-integration-9ea86c4c/POG/MUO/2017_UL/trigger_z.json.gz",  # noqa
+        })
+
+    *get_trigger_file* can be adapted in a subclass in case it is stored differently in the external
+    files.
+
+    The name of the correction set and the year string for the weight evaluation should be given as
+    an auxiliary entry in the config:
+
+    .. code-block:: python
+
+        cfg.x.trigger_sf_names = ("NUM_TightRelIso_DEN_TightIDandIPCut", "2017_UL")
+
+    *get_trigger_config* can be adapted in a subclass in case it is stored differently in the config.
+
+    Optionally, a *trigger_mask* can be supplied to compute the scale factor weight based only on a
+    subset of triggers.
+    """
+    # flat absolute eta and pt views
+    abs_eta = flat_np_view(abs(events.Trigger.eta[trigger_mask]), axis=1)
+    pt = flat_np_view(events.Trigger.pt[trigger_mask], axis=1)
+
+    variable_map = {
+        "year": self.year,
+        "abseta": abs_eta,
+        "eta": abs_eta,
+        "pt": pt,
+    }
+
+    # loop over systematics
+    for syst, postfix in [
+        ("sf", ""),
+        ("systup", "_up"),
+        ("systdown", "_down"),
+    ]:
+        # get the inputs for this type of variation
+        variable_map_syst = {
+            **variable_map,
+            "scale_factors": "nominal" if syst == "sf" else syst,  # syst key in 2022
+            "ValType": syst,  # syst key in 2017
+        }
+        inputs = [variable_map_syst[inp.name] for inp in self.trigger_sf_corrector.inputs]
+        sf_flat = self.trigger_sf_corrector(*inputs)
+
+        # add the correct layout to it
+        sf = layout_ak_array(sf_flat, events.Trigger.pt[trigger_mask])
+
+        # create the product over all triggers in one event
+        weight = ak.prod(sf, axis=1, mask_identity=False)
+
+        # store it
+        events = set_ak_column(events, f"{self.weight_name}{postfix}", weight, value_type=np.float32)
+
+    return events
+
+
+@trigger_weights.requires
+def trigger_weights_requires(self: Producer, reqs: dict) -> None:
+    if "external_files" in reqs:
+        return
+
+    from columnflow.tasks.external import BundleExternalFiles
+    reqs["external_files"] = BundleExternalFiles.req(self.task)
+
+
+@trigger_weights.setup
+def trigger_weights_setup(
+    self: Producer,
+    reqs: dict,
+    inputs: dict,
+    reader_targets: InsertableDict,
+) -> None:
+    bundle = reqs["external_files"]
+
+    # create the corrector
+    import correctionlib
+    correctionlib.highlevel.Correction.__call__ = correctionlib.highlevel.Correction.evaluate
+    correction_set = correctionlib.CorrectionSet.from_string(
+        self.get_trigger_file(bundle.files),
+    )
+    corrector_name, self.year = self.get_trigger_config()
+    self.trigger_sf_corrector = correction_set[corrector_name]
+
+    # check versions
+    if self.supported_versions and self.trigger_sf_corrector.version not in self.supported_versions:
+        raise Exception(f"unsuppprted trigger sf corrector version {self.trigger_sf_corrector.version}")
+
+
+@trigger_weights.init
+def trigger_weights_init(self: Producer, **kwargs) -> None:
+    weight_name = self.weight_name
+    self.produces |= {weight_name, f"{weight_name}_up", f"{weight_name}_down"}
diff --git a/hbw/scripts/synchronization.py b/hbw/scripts/synchronization.py
index 5dd2a89..5d44e95 100644
--- a/hbw/scripts/synchronization.py
+++ b/hbw/scripts/synchronization.py
@@ -17,6 +17,7 @@
 
 
 def get_columns_to_store(config_inst):
+    met_name = config_inst.x.met_name
     columns_to_store = {
         "event_nr": lambda events: events.event,
         "run_nr": lambda events: events.run,
@@ -64,8 +65,8 @@ def get_columns_to_store(config_inst):
         "ak8jet0_eta": lambda events: events.HbbJet.eta[:, 0],
         "ak8jet0_phi": lambda events: events.HbbJet.phi[:, 0],
         "ak8jet0_msoftdrop": lambda events: events.HbbJet.msoftdrop[:, 0],
-        "met_pt": lambda events: events.MET.pt,
-        "met_phi": lambda events: events.MET.phi,
+        "met_pt": lambda events: events[met_name].pt,
+        "met_phi": lambda events: events[met_name].phi,
         "mc_weight": lambda events: events.mc_weight,
         "pu_weight": lambda events: events.pu_weight,
         "btag_weight": lambda events: events.btag_weight,
diff --git a/hbw/util.py b/hbw/util.py
index 67082e9..aae4bea 100644
--- a/hbw/util.py
+++ b/hbw/util.py
@@ -615,6 +615,26 @@ def my_producer(self, events):
     return f"{self.get()}.{btag_column}"
 
 
+@deferred_column
+def MET_COLUMN(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]:
+    """
+    This helper allows adding the correct btag column based on the b_tagger configuration.
+    Requires the b_tagger aux to be set in the config. Example usecase:
+
+    .. code-block:: python
+
+        @producer(uses={MET_COLUMN("pt")})
+        def my_producer(self, events):
+            met_pt = events[self.config_inst.x.met_name].pt
+            ...
+            return events
+    """
+    met_name = func.config_inst.x("met_name", None)
+    if not met_name:
+        raise Exception("the met_name has not been configured")
+    return f"{met_name}.{self.get()}"
+
+
 @deferred_column
 def IF_DATASET_HAS_LHE_WEIGHTS(
     self: ArrayFunction.DeferredColumn,

From 508dc2fecd09cdfb2c5e76beb00f23e06010ecfb Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 20 Dec 2024 16:23:17 +0100
Subject: [PATCH 04/29] loosen FatJet pt cut

---
 hbw/selection/jet.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/hbw/selection/jet.py b/hbw/selection/jet.py
index b80558a..9141b92 100644
--- a/hbw/selection/jet.py
+++ b/hbw/selection/jet.py
@@ -294,9 +294,9 @@ def sl_boosted_jet_selection(
 
     # baseline fatjet selection
     fatjet_mask = (
-        (events.FatJet.pt > 200) &
+        (events.FatJet.pt > 170) &
         (abs(events.FatJet.eta) < 2.4) &
-        (events.FatJet.jetId == 6) &
+        (events.FatJet.jetId >= 6) &
         (ak.all(events.FatJet.metric_table(electron) > 0.8, axis=2)) &
         (ak.all(events.FatJet.metric_table(muon) > 0.8, axis=2))
     )
@@ -305,6 +305,7 @@ def sl_boosted_jet_selection(
     # H->bb fatjet definition based on Aachen analysis
     hbbJet_mask = (
         fatjet_mask &
+        (events.FatJet.pt > 200) &
         (events.FatJet.msoftdrop > 30) &
         (events.FatJet.msoftdrop < 210) &
         (events.FatJet.subJetIdx1 >= 0) &

From 8df61dc3f117763bf0cf29957520be3033364a26 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 20 Dec 2024 16:24:27 +0100
Subject: [PATCH 05/29] redefine order of output paths

---
 hbw/analysis/create_analysis.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/hbw/analysis/create_analysis.py b/hbw/analysis/create_analysis.py
index 5879523..71ac05a 100644
--- a/hbw/analysis/create_analysis.py
+++ b/hbw/analysis/create_analysis.py
@@ -113,7 +113,7 @@ def analysis_factory(configs: od.UniqueObjectIndex):
                     )
                 else:
                     raise ValueError(
-                        f"Campaign used for {config_name} is not yet initialized; to initialize, run: \n",
+                        f"Campaign used for {config_name} is not yet initialized; to initialize, run: \n"
                         f"law run {cpn_task.task_family} --config {config_name} --remove-output 0,a,y",
                     )
                     # cpn_task.run()
@@ -176,7 +176,7 @@ def analysis_factory(configs: od.UniqueObjectIndex):
     software_tasks = ("cf.BundleBashSandbox", "cf.BundleCMSSWSandbox", "cf.BundleSoftware")
     shareable_analysis_tasks = ("cf.CalibrateEvents", "cf.GetDatasetLFNs")
     limited_config_shared_tasks = ("cf.CalibrateEvents", "cf.GetDatasetLFNs", "cf.SelectEvents", "cf.ReduceEvents")
-    skip_new_version_schema = ("cf.CalibrateEvents", "cf.GetDatasetLFNs")
+    skip_new_version_schema = ()
     known_parts = (
         # from cf
         "analysis", "task_family", "config", "configs", "dataset", "shift", "version",
@@ -237,11 +237,13 @@ def reorganize_parts(task, store_parts):
             "analysis",
             "calibrator", "calibrators", "calib",
             "selector", "sel",
-            "producer", "producers", "prod",
+            "config", "configs",
+            "producers", "prod",
             "ml_data", "ml_model", "ml_models",
             "weightprod", "inf_model",
             "task_family",
-            "config", "dataset", "shift",
+            "calibrator", "producer",
+            "shift", "dataset",
         ]
         parts_order_end = ["version"]
 

From f621b9cfe62d41a4d6cf373f43afc9e13dc83101 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 20 Dec 2024 16:55:16 +0100
Subject: [PATCH 06/29] update default calibrators

---
 hbw/config/defaults_and_groups.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hbw/config/defaults_and_groups.py b/hbw/config/defaults_and_groups.py
index 4bb544b..bb59762 100644
--- a/hbw/config/defaults_and_groups.py
+++ b/hbw/config/defaults_and_groups.py
@@ -8,7 +8,7 @@
 
 
 def default_calibrator(container):
-    return "with_b_reg"
+    return ["with_b_reg", "fatjet"]
 
 
 def default_selector(container):

From 252938631010cbb0fb11ae4a1c4cb5e14698edd9 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 20 Dec 2024 16:55:35 +0100
Subject: [PATCH 07/29] add nonisolated electron triggers

---
 hbw/config/trigger.py | 65 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 55 insertions(+), 10 deletions(-)

diff --git a/hbw/config/trigger.py b/hbw/config/trigger.py
index 4ebe5b2..65e7a76 100644
--- a/hbw/config/trigger.py
+++ b/hbw/config/trigger.py
@@ -249,6 +249,11 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]:
     Electron Trigger: https://twiki.cern.ch/twiki/bin/view/CMS/EgHLTRunIIISummary
     Muon Trigger: https://twiki.cern.ch/twiki/bin/view/CMS/MuonHLT2022
 
+    trigger_bits are obtained from the TrigObj.filterBits docstring, by running some task and
+    starting an embed shell, e.g. via:
+        law run cf.SelectEvents --selector check_columns
+        events.TrigObj.filterBits?
+
     Auxiliary data in use:
     - "channels": list of channels during selection that the trigger applies to,
     e.g. ["e", "ee", "emu", "mue"] (TODO: use this in SL aswell)
@@ -264,7 +269,7 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]:
         legs=[
             TriggerLeg(
                 pdg_id=13,
-                min_pt=25.0,
+                min_pt=24.0,
                 # filter names:
                 # hltL3crIsoL1sSingleMu22L1f0L2f10QL3f24QL3trkIsoFiltered0p08 (1mu + Iso)
                 trigger_bits=2**1 + 2**3,  # Iso (bit 1) + 1mu (bit 3)
@@ -282,14 +287,14 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]:
         legs=[
             TriggerLeg(
                 pdg_id=13,
-                min_pt=18.0,
+                min_pt=17.0,
                 # filter names:
                 # TODO
                 trigger_bits=2**0 + 2**4,  # TrkIsoVVL (bit 0) + 2mu (bit 4)
             ),
             TriggerLeg(
                 pdg_id=13,
-                min_pt=9.0,
+                min_pt=8.0,
                 # filter names:
                 # TODO
                 trigger_bits=2**0 + 2**4,  # TrkIsoVVL (bit 0) + 2mu (bit 4) + DZ_Mass3p8 (bit ?)
@@ -307,7 +312,7 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]:
         legs=[
             TriggerLeg(
                 pdg_id=11,
-                min_pt=31.0,
+                min_pt=30.0,
                 # filter names:
                 # hltEle30WPTightGsfTrackIsoFilter
                 trigger_bits=2**1,  # 1e (WPTight) (bit 1)
@@ -326,14 +331,14 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]:
         legs=[
             TriggerLeg(
                 pdg_id=11,
-                min_pt=24.0,
+                min_pt=23.0,
                 # filter names:
                 # TODO
                 trigger_bits=2**4 + 2**0,  # 2e (bit 4) + CaloIdL_TrackIdL_IsoVL (bit 0)
             ),
             TriggerLeg(
                 pdg_id=11,
-                min_pt=13.0,
+                min_pt=12.0,
                 # filter names:
                 # TODO
                 trigger_bits=2**4 + 2**0,  # 2e (bit 4) + CaloIdL_TrackIdL_IsoVL (bit 0)
@@ -345,20 +350,58 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]:
         },
         tags={"di_trigger", "di_e"},
     )
+    single_e50_noniso = Trigger(
+        name="HLT_Ele50_CaloIdVT_GsfTrkIdT_PFJet165",
+        id=203,
+        legs=[
+            TriggerLeg(
+                pdg_id=11,
+                min_pt=50.0,
+                # filter names: TODO
+                trigger_bits=2**11 + 2**12,  # CaloIdVT_GsfTrkIdT (bit 11) + PFJet (bit 12)
+            ),
+        ],
+        aux={
+            "channels": ["e", "ee", "emu", "mue", "mixed"],
+            "data_stream": "data_egamma" if config.x.run == 3 else "data_e",
+        }
+    )
+    di_e33_noniso = Trigger(
+        name="HLT_DoubleEle33_CaloIdL_MW",
+        id=204,
+        legs=[
+            TriggerLeg(
+                pdg_id=11,
+                min_pt=33.0,
+                # filter names: TODO
+                trigger_bits=2**4,  # 2e (bit 4) + CaloIdL_MW (no bit?)
+            ),
+            TriggerLeg(
+                pdg_id=11,
+                min_pt=33.0,
+                # filter names: TODO
+                trigger_bits=2**4,  # 2e (bit 4) + CaloIdL_MW (no bit?)
+            ),
+        ],
+        aux={
+            "channels": ["ee"],
+            "data_stream": "data_egamma" if config.x.run == 3 else "data_e",
+        }
+    )
     mixed_mue = Trigger(
         name="HLT_Mu23_TrkIsoVVL_Ele12_CaloIdL_TrackIdL_IsoVL",
         id=301,
         legs=[
             TriggerLeg(
                 pdg_id=13,
-                min_pt=24.0,
+                min_pt=23.0,
                 # filter names:
                 # TODO
                 trigger_bits=2**5 + 2**0,  # 1e-1mu (bit 5) + TrkIsoVVL (bit 0)
             ),
             TriggerLeg(
                 pdg_id=11,
-                min_pt=13.0,
+                min_pt=12.0,
                 # filter names:
                 # TODO
                 trigger_bits=2**5 + 2**0,  # 1mu-1e (bit 5) + CaloIdL_TrackIdL_IsoVL (bit 0)
@@ -376,14 +419,14 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]:
         legs=[
             TriggerLeg(
                 pdg_id=13,
-                min_pt=9.0,
+                min_pt=8.0,
                 # filter names:
                 # TODO
                 trigger_bits=2**5 + 2**0,  # 1mu-1e (bit 5) + TrkIsoVVL (bit 0)
             ),
             TriggerLeg(
                 pdg_id=11,
-                min_pt=24.0,
+                min_pt=23.0,
                 # filter names:
                 # TODO
                 trigger_bits=2**5 + 2**0,  # 1mu-1e (bit 5) + CaloIdL_TrackIdL_IsoVL (bit 0)
@@ -400,8 +443,10 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]:
     if config.has_tag("is_dl"):
         config.x.triggers = od.UniqueObjectIndex(Trigger, [
             single_e,
+            single_e50_noniso,
             single_mu,
             di_e,
+            di_e33_noniso,
             di_mu,
             mixed_mue,
             mixed_emu,

From 5932aaa61b80e8c6e737a3123693c84d6f061b5f Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 20 Dec 2024 16:56:10 +0100
Subject: [PATCH 08/29] update columnflow

---
 hbw/config/trigger.py | 4 ++--
 modules/columnflow    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/hbw/config/trigger.py b/hbw/config/trigger.py
index 65e7a76..2b906ab 100644
--- a/hbw/config/trigger.py
+++ b/hbw/config/trigger.py
@@ -364,7 +364,7 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]:
         aux={
             "channels": ["e", "ee", "emu", "mue", "mixed"],
             "data_stream": "data_egamma" if config.x.run == 3 else "data_e",
-        }
+        },
     )
     di_e33_noniso = Trigger(
         name="HLT_DoubleEle33_CaloIdL_MW",
@@ -386,7 +386,7 @@ def add_triggers(config: od.Config) -> od.UniqueObjectIndex[Trigger]:
         aux={
             "channels": ["ee"],
             "data_stream": "data_egamma" if config.x.run == 3 else "data_e",
-        }
+        },
     )
     mixed_mue = Trigger(
         name="HLT_Mu23_TrkIsoVVL_Ele12_CaloIdL_TrackIdL_IsoVL",
diff --git a/modules/columnflow b/modules/columnflow
index 312bd05..c4139ca 160000
--- a/modules/columnflow
+++ b/modules/columnflow
@@ -1 +1 @@
-Subproject commit 312bd05015de0f6edfea656353cd60ff02d8c608
+Subproject commit c4139cabf44246492b4afa8217750612d401d3d3

From 28738c131cb538f8ce4b3b61fa1ef1bae481339a Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 20 Dec 2024 17:17:14 +0100
Subject: [PATCH 09/29] use uhh campaigns per default

---
 hbw/tasks/campaigns.py | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/hbw/tasks/campaigns.py b/hbw/tasks/campaigns.py
index 71cc3d1..b31d5a9 100644
--- a/hbw/tasks/campaigns.py
+++ b/hbw/tasks/campaigns.py
@@ -23,14 +23,14 @@
         "cmsdb.campaigns.run2_2017_nano_v9": "campaign_run2_2017_nano_v9",
     },
     "c22pre": {
+        "cmsdb.campaigns.run3_2022_preEE_nano_uhh_v12": "campaign_run3_2022_preEE_nano_uhh_v12",
         "cmsdb.campaigns.run3_2022_preEE_nano_v12": "campaign_run3_2022_preEE_nano_v12",
         "cmsdb.campaigns.run3_2022_preEE_nano_v13": "campaign_run3_2022_preEE_nano_v13",
-        "cmsdb.campaigns.run3_2022_preEE_nano_uhh_v12": "campaign_run3_2022_preEE_nano_uhh_v12",
     },
     "c22post": {
+        "cmsdb.campaigns.run3_2022_postEE_nano_uhh_v12": "campaign_run3_2022_postEE_nano_uhh_v12",
         "cmsdb.campaigns.run3_2022_postEE_nano_v12": "campaign_run3_2022_postEE_nano_v12",
         "cmsdb.campaigns.run3_2022_postEE_nano_v13": "campaign_run3_2022_postEE_nano_v13",
-        "cmsdb.campaigns.run3_2022_postEE_nano_uhh_v12": "campaign_run3_2022_postEE_nano_uhh_v12",
     },
 }
 
@@ -69,25 +69,12 @@ def campaign_insts(self):
             for mod, campaign in self.campaigns.items()
         ]
 
-    dataset_from_uhh_identifier = {
-        # TODO: use DY from uhh campaign
-        # "dy_m10to50_amcatnlo",
-        # "dy_m4to10_amcatnlo",
-        "ttw_",
-        "ttz_",
-    }
-
     def get_dataset_prio(self, dataset_name, campaign):
         """
         If dataset should be overwritten from this campaign, return True.
         Otherwise, return False.
+        (not currently used, but could be used to prioritize e.g. the central tt dataset (less stats))
         """
-        if "uhh" in campaign.name and any(
-            dataset_identifier in dataset_name
-            for dataset_identifier in self.dataset_from_uhh_identifier
-        ):
-            return True
-
         return False
 
     def output(self):

From 2e529678bcbd61aaf578c2c3eba2e94e99e28e04 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 20 Dec 2024 19:24:51 +0100
Subject: [PATCH 10/29] prioritize central data datasets

---
 hbw/tasks/campaigns.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/hbw/tasks/campaigns.py b/hbw/tasks/campaigns.py
index b31d5a9..f000a45 100644
--- a/hbw/tasks/campaigns.py
+++ b/hbw/tasks/campaigns.py
@@ -75,6 +75,11 @@ def get_dataset_prio(self, dataset_name, campaign):
         Otherwise, return False.
         (not currently used, but could be used to prioritize e.g. the central tt dataset (less stats))
         """
+        if "v12" in campaign.name and "uhh" not in campaign.name:
+            # Take data from the central v12 campaign
+            if "data" in dataset_name:
+                return True
+
         return False
 
     def output(self):

From 4eb31b7de6b3dabfdb60d7abee8a0395a84b39e6 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 20 Dec 2024 19:25:09 +0100
Subject: [PATCH 11/29] reduce init overhead

---
 hbw/calibration/default.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/hbw/calibration/default.py b/hbw/calibration/default.py
index 331ade8..928de15 100644
--- a/hbw/calibration/default.py
+++ b/hbw/calibration/default.py
@@ -42,6 +42,10 @@ def fatjet(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
 
 @fatjet.init
 def fatjet_init(self: Calibrator) -> None:
+    if not self.task or self.task.task_family != "cf.CalibrateEvents":
+        # init only required for task itself
+        return
+
     if not getattr(self, "dataset_inst", None):
         return
 
@@ -92,6 +96,10 @@ def jet_base(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
 
 @jet_base.init
 def jet_base_init(self: Calibrator) -> None:
+    if not self.task or self.task.task_family != "cf.CalibrateEvents":
+        # init only required for task itself
+        return
+
     if not getattr(self, "dataset_inst", None):
         return
 
@@ -122,7 +130,10 @@ def jet_base_init(self: Calibrator) -> None:
         # version of jer that uses the first random number from deterministic_seeds
         deterministic_jer_cls = jer.derive(
             "deterministic_jer",
-            cls_dict={"deterministic_seed_index": 0, "met_name": met_name},
+            cls_dict={
+                "deterministic_seed_index": 0,
+                "met_name": met_name,
+            },
         )
         self.calibrators.append(deterministic_jer_cls)
 

From 11724049e82ece05204d416d6750b4b385de7f31 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Tue, 7 Jan 2025 08:59:12 +0100
Subject: [PATCH 12/29] minor fixes

---
 hbw/ml/base.py                 | 1 +
 hbw/production/neutrino.py     | 2 +-
 hbw/selection/dl_remastered.py | 7 ++++++-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/hbw/ml/base.py b/hbw/ml/base.py
index 9a22c72..dfb134a 100644
--- a/hbw/ml/base.py
+++ b/hbw/ml/base.py
@@ -110,6 +110,7 @@ def __init__(
 
         for param in self.settings_parameters:
             # overwrite the default value with the value from the parameters
+            # TODO: this is quite dangerous, as it overwrites a class attribute instead of an instance attribute
             setattr(self, param, self.parameters.get(param, getattr(self, param)))
 
         # cast the ml parameters to the correct types if necessary
diff --git a/hbw/production/neutrino.py b/hbw/production/neutrino.py
index 0483100..a89dd4f 100644
--- a/hbw/production/neutrino.py
+++ b/hbw/production/neutrino.py
@@ -167,7 +167,7 @@ def top_reconstruction(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
         # replace nan, none, and inf values with EMPTY_FLOAT
         col = route.apply(events)
         col = ak.fill_none(ak.nan_to_none(route.apply(events)), EMPTY_FLOAT)
-        col = ak.where(np.isinf(col), EMPTY_FLOAT, col)
+        col = ak.where(~np.isfinite(col), EMPTY_FLOAT, col)
 
         events = set_ak_column(events, route.string_column, col)
 
diff --git a/hbw/selection/dl_remastered.py b/hbw/selection/dl_remastered.py
index 89afc61..486038d 100644
--- a/hbw/selection/dl_remastered.py
+++ b/hbw/selection/dl_remastered.py
@@ -106,7 +106,12 @@ def dl_lepton_selection(
 
     dilepton = ak.pad_none(lepton, 2)
     dilepton = dilepton[:, 0] + dilepton[:, 1]
-    events = set_ak_column(events, "mll", ak.fill_none(dilepton.mass, EMPTY_FLOAT), value_type=np.float32)
+    events = set_ak_column(
+        events,
+        "mll",
+        ak.fill_none(ak.nan_to_none(dilepton.mass), EMPTY_FLOAT),
+        value_type=np.float32,
+    )
     lepton_results.steps["DiLeptonMass81"] = ak.fill_none(dilepton.mass <= m_z.nominal - 10, False)
     # lepton channel masks
     lepton_results.steps["Lep_mm"] = mm_mask = (

From 5dada6849f1c53d0534dc91c3a05a4257d542fd7 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Tue, 7 Jan 2025 09:00:09 +0100
Subject: [PATCH 13/29] fix infinite values in MET during calibration

---
 hbw/calibration/default.py | 26 +++++++++++++++++++++++++-
 hbw/selection/stats.py     |  8 ++++++--
 hbw/util.py                | 11 +++++++++++
 3 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/hbw/calibration/default.py b/hbw/calibration/default.py
index 928de15..1a0e793 100644
--- a/hbw/calibration/default.py
+++ b/hbw/calibration/default.py
@@ -10,11 +10,13 @@
 from columnflow.calibration.cms.met import met_phi
 from columnflow.calibration.cms.jets import jec, jer
 from columnflow.production.cms.seeds import deterministic_seeds
-from columnflow.util import maybe_import
+from columnflow.util import maybe_import, try_float
+from columnflow.columnar_util import set_ak_column, EMPTY_FLOAT
 
 from hbw.calibration.jet import bjet_regression
 
 ak = maybe_import("awkward")
+np = maybe_import("numpy")
 
 
 logger = law.logger.get_logger(__name__)
@@ -24,6 +26,9 @@
     # jec uncertainty_sources: set to None to use config default
     jec_sources=["Total"],
     version=1,
+    # add dummy produces such that this calibrator will always be run when requested
+    # (temporary workaround until init's are only run as often as necessary)
+    produces={"FatJet.pt"},
 )
 def fatjet(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
     """
@@ -87,10 +92,29 @@ def fatjet_init(self: Calibrator) -> None:
 def jet_base(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
     events = self[deterministic_seeds](events, **kwargs)
 
+    # keep a copy of non-propagated MET to replace infinite values
+    pre_calib_met = events[self.config_inst.x.met_name]
+
     logger.info(f"Running calibrators '{[calib.cls_name for calib in self.calibrators]}' (in that order)")
     for calibrator_inst in self.calibrators:
         events = self[calibrator_inst](events, **kwargs)
 
+    # workaround for infinite values in MET pt/phi
+    for route in self.produced_columns:
+        col = route.string_column
+        m = ~np.isfinite(route.apply(events))
+        if ak.any(m):
+            # replace infinite values
+            replace_value = EMPTY_FLOAT
+            if self.config_inst.x.met_name in col:
+                # use pre-calibrated MET to replace infinite values of MET pt/phi
+                replace_value = pre_calib_met[col.split(".")[-1].split("_")[0]]
+            logger.info(
+                f"Found infinite values in {col}; Values will be replaced with "
+                f"{replace_value if try_float(replace_value) else replace_value[m]}"
+            )
+            events = set_ak_column(events, col, ak.where(m, replace_value, route.apply(events)))
+
     return events
 
 
diff --git a/hbw/selection/stats.py b/hbw/selection/stats.py
index af31f37..17d96cf 100644
--- a/hbw/selection/stats.py
+++ b/hbw/selection/stats.py
@@ -11,7 +11,7 @@
 from columnflow.columnar_util import optional_column as optional
 
 from columnflow.util import maybe_import
-from hbw.util import has_tag
+from hbw.util import has_tag, RAW_MET_COLUMN
 
 np = maybe_import("numpy")
 ak = maybe_import("awkward")
@@ -50,7 +50,7 @@ def hbw_selection_step_stats(
 
 
 @selector(
-    uses={increment_stats, event_weights_to_normalize},
+    uses={increment_stats, event_weights_to_normalize, RAW_MET_COLUMN("pt")},
 )
 def hbw_increment_stats(
     self: Selector,
@@ -79,6 +79,10 @@ def hbw_increment_stats(
         weight_map["num_negative_weights"] = (events.mc_weight < 0)
         weight_map["num_pu_0"] = (events.pu_weight == 0)
         weight_map["num_pu_100"] = (events.pu_weight >= 100)
+
+        raw_puppi_met = events[self.config_inst.x.raw_met_name]
+        weight_map["num_raw_met_isinf"] = (~np.isfinite(raw_puppi_met.pt))
+        weight_map["num_raw_met_isinf_selected"] = (~np.isfinite(raw_puppi_met.pt) & event_mask)
         # "sum" operations
         weight_map["sum_mc_weight"] = events.mc_weight  # weights of all events
         weight_map["sum_mc_weight_selected"] = (events.mc_weight, event_mask)  # weights of selected events
diff --git a/hbw/util.py b/hbw/util.py
index aae4bea..261ed3f 100644
--- a/hbw/util.py
+++ b/hbw/util.py
@@ -635,6 +635,17 @@ def my_producer(self, events):
     return f"{met_name}.{self.get()}"
 
 
+@deferred_column
+def RAW_MET_COLUMN(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]:
+    """
+    Similar to MET_COLUMN, see MET_COLUMN for more information.
+    """
+    raw_met_name = func.config_inst.x("raw_met_name", None)
+    if not raw_met_name:
+        raise Exception("the raw_met_name has not been configured")
+    return f"{raw_met_name}.{self.get()}"
+
+
 @deferred_column
 def IF_DATASET_HAS_LHE_WEIGHTS(
     self: ArrayFunction.DeferredColumn,

From 6f43549a3d78a4ed5669aa2f2d78b36b845db532 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Tue, 7 Jan 2025 09:47:09 +0100
Subject: [PATCH 14/29] remove broken files from uhh 22postEE campaign

---
 hbw/config/datasets.py |  3 +++
 hbw/tasks/campaigns.py | 33 +++++++++++++++++++++++++++++----
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/hbw/config/datasets.py b/hbw/config/datasets.py
index 6b3eaa6..300fda3 100644
--- a/hbw/config/datasets.py
+++ b/hbw/config/datasets.py
@@ -570,10 +570,13 @@ def get_dataset_lfns_uhh(
             fs=f"wlcg_fs_{cpn_name}",
         )
 
+        broken_files = dataset_inst[shift_inst.name].get_aux("broken_files", [])
+        print(broken_files)
         # loop though files and interpret paths as lfns
         return [
             lfn_base.child(basename, type="f").path
             for basename in lfn_base.listdir(pattern="*.root")
+            if lfn_base.child(basename, type="f").path not in broken_files
         ]
 
     if any("uhh" in cpn_name for cpn_name in cfg.campaign.x("campaigns", [])):
diff --git a/hbw/tasks/campaigns.py b/hbw/tasks/campaigns.py
index f000a45..ab3a2d7 100644
--- a/hbw/tasks/campaigns.py
+++ b/hbw/tasks/campaigns.py
@@ -62,12 +62,37 @@ def campaigns(self):
             raise ValueError(f"Unknown config {self.config}")
         return campaign_map[self.config]
 
+    def modify_campaign(self, campaign_inst):
+        """
+        Modify the campaign instance, e.g. by adding datasets or changing dataset properties.
+        """
+        if campaign_inst.name == "run3_2022_postEE_nano_uhh_v12":
+            # remove broken files
+            dy_m10to50_nominal = campaign_inst.get_dataset("dy_m10to50_amcatnlo").info["nominal"]
+            dy_m10to50_nominal.x.broken_files = [
+                # missing scale weights
+                "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/4B7063C8-D7B7-A45F-0B56-817AECEAFB43.root",  # noqa: E501
+            ]
+            dy_m10to50_nominal.n_files = dy_m10to50_nominal.n_files - 1
+            dy_m10to50_nominal.n_events = dy_m10to50_nominal.n_events - 1651814
+
+            dy_m50toinf_nominal = campaign_inst.get_dataset("dy_m50toinf_amcatnlo").info["nominal"]
+            dy_m50toinf_nominal.x.broken_files = [
+                # broken file
+                "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/3FE6B8C0-4234-4EE4-5BEA-E232539E0D85.root",  # noqa: E501
+            ]
+            dy_m50toinf_nominal.n_files = dy_m50toinf_nominal.n_files - 1
+            dy_m50toinf_nominal.n_events = -1
+            # dy_m50toinf.x.n_events = dy_m50toinf.x.n_events - ????
+
     @cached_property
     def campaign_insts(self):
-        return [
-            getattr(importlib.import_module(mod), campaign).copy()
-            for mod, campaign in self.campaigns.items()
-        ]
+        campaign_insts = []
+        for mod, campaign in self.campaigns.items():
+            campaign_inst = getattr(importlib.import_module(mod), campaign).copy()
+            self.modify_campaign(campaign_inst)
+            campaign_insts.append(campaign_inst)
+        return campaign_insts
 
     def get_dataset_prio(self, dataset_name, campaign):
         """

From e787673cbd024a7b4e83099723937fc8ecc0d78d Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Thu, 9 Jan 2025 10:39:07 +0100
Subject: [PATCH 15/29] fix non-unique processes issue and add broken files

---
 hbw/analysis/create_analysis.py |  21 ++---
 hbw/analysis/processes.py       |  83 ++++++++++++++++
 hbw/config/config_run2.py       |  17 ++++
 hbw/config/datasets.py          |   8 +-
 hbw/config/processes.py         | 162 +++++---------------------------
 hbw/tasks/campaigns.py          | 128 +++++++++++++++++++++----
 6 files changed, 245 insertions(+), 174 deletions(-)
 create mode 100644 hbw/analysis/processes.py

diff --git a/hbw/analysis/create_analysis.py b/hbw/analysis/create_analysis.py
index 71ac05a..21e5aba 100644
--- a/hbw/analysis/create_analysis.py
+++ b/hbw/analysis/create_analysis.py
@@ -141,34 +141,31 @@ def analysis_factory(configs: od.UniqueObjectIndex):
 
     # 2017
     add_lazy_config(
-        # {
-        #     "cmsdb.campaigns.run2_2017_nano_v9": "campaign_run2_2017_nano_v9",
-        # },
         "c17",
         1700,
     )
 
     # 2022 preEE
     add_lazy_config(
-        # {
-        #     "cmsdb.campaigns.run3_2022_preEE_nano_v12": "campaign_run3_2022_preEE_nano_v12",
-        #     "cmsdb.campaigns.run3_2022_preEE_nano_v13": "campaign_run3_2022_preEE_nano_v13",
-        # },
         "c22pre",
         2200,
     )
 
     # 2022 postEE
     add_lazy_config(
-        # {
-        #     "cmsdb.campaigns.run3_2022_postEE_nano_v12": "campaign_run3_2022_postEE_nano_v12",
-        #     "cmsdb.campaigns.run3_2022_postEE_nano_v13": "campaign_run3_2022_postEE_nano_v13",
-        #     "cmsdb.campaigns.run3_2022_postEE_nano_uhh_v12": "campaign_run3_2022_postEE_nano_uhh_v12",
-        # },
         "c22post",
         2210,
     )
 
+    add_lazy_config(
+        "c22pre_das",
+        2201,
+    )
+    add_lazy_config(
+        "c22post_das",
+        2211,
+    )
+
     #
     # modify store_parts
     #
diff --git a/hbw/analysis/processes.py b/hbw/analysis/processes.py
new file mode 100644
index 0000000..2ece609
--- /dev/null
+++ b/hbw/analysis/processes.py
@@ -0,0 +1,83 @@
+# coding: utf-8
+
+"""
+Creation and modification of processes in the HH -> bbWW analysis.
+NOTE: it is crucial to modify processes before the campaign is created. Otherwise,
+the changes will not be reflected in the campaign and there will be inconsistencies.
+"""
+
+# import order as od
+
+
+from hbw.config.processes import create_parent_process
+from hbw.config.styling import color_palette
+from cmsdb.util import add_decay_process
+
+
+def modify_cmsdb_processes():
+    from cmsdb.processes import (
+        qcd_mu, qcd_em, qcd_bctoe,
+        tt, ttv, st, w_lnu, vv, h,
+        dy, dy_m4to10, dy_m10to50, dy_m50toinf, dy_m50toinf_0j, dy_m50toinf_1j, dy_m50toinf_2j,
+    )
+
+    qcd_mu.label = "QCD Muon enriched"
+    qcd_ele = create_parent_process(
+        [qcd_em, qcd_bctoe],
+        name="qcd_ele",
+        id=31199,
+        label="QCD Electron enriched",
+    )
+
+    v_lep = create_parent_process(
+        [w_lnu, dy],
+        name="v_lep",
+        id=64575573,  # random number
+        label="W and DY",
+    )
+
+    t_bkg = create_parent_process(
+        [st, tt, ttv],
+        name="t_bkg",
+        id=97842611,  # random number
+        label="tt + st",
+    )
+
+    background = create_parent_process(  # noqa: F841
+        [t_bkg, v_lep, vv, w_lnu, h, qcd_ele, qcd_mu],
+        name="background",
+        id=99999,
+        label="background",
+        color=color_palette["blue"],
+    )
+
+    decay_map = {
+        "lf": {
+            "name": "lf",
+            "id": 50,
+            "label": "(lf)",
+            "br": -1,
+        },
+        "hf": {
+            "name": "hf",
+            "id": 60,
+            "label": "(hf)",
+            "br": -1,
+        },
+    }
+
+    for dy_proc_inst in (
+        dy, dy_m4to10, dy_m10to50, dy_m50toinf, dy_m50toinf_0j, dy_m50toinf_1j, dy_m50toinf_2j,
+    ):
+        add_production_mode_parent = dy_proc_inst.name != "dy"
+        for flavour in ("hf", "lf"):
+            # the 'add_decay_process' function helps us to create all parent-daughter relationships
+            add_decay_process(
+                dy_proc_inst,
+                decay_map[flavour],
+                add_production_mode_parent=add_production_mode_parent,
+                name_func=lambda parent_name, decay_name: f"{parent_name}_{decay_name}",
+                label_func=lambda parent_label, decay_label: f"{parent_label} {decay_label}",
+                xsecs=None,
+                aux={"flavour": flavour},
+            )
diff --git a/hbw/config/config_run2.py b/hbw/config/config_run2.py
index d5718e1..d89932a 100644
--- a/hbw/config/config_run2.py
+++ b/hbw/config/config_run2.py
@@ -169,6 +169,17 @@ def if_era(
                 "lumi_13TeV_2022": 0.01j,
                 "lumi_13TeV_correlated": 0.006j,
             })
+    elif year == 2023:
+        if campaign.has_tag("preBPix"):
+            cfg.x.luminosity = Number(17.794, {
+                "lumi_13TeV_2023": 0.01j,
+                "lumi_13TeV_correlated": 0.006j,
+            })
+        elif campaign.has_tag("postBPix"):
+            cfg.x.luminosity = Number(9.451, {
+                "lumi_13TeV_2023": 0.01j,
+                "lumi_13TeV_correlated": 0.006j,
+            })
     else:
         raise NotImplementedError(f"Luminosity for year {year} is not defined.")
 
@@ -738,4 +749,10 @@ def add_external(name, value):
         from hbw.config.sl_res import configure_sl_res
         configure_sl_res(cfg)
 
+    # sanity check: sometimes the process is not the same as the one in the dataset
+    p1 = cfg.get_process("dy_m50toinf")
+    p2 = campaign.get_dataset("dy_m50toinf_amcatnlo").processes.get_first()
+    if p1 != p2:
+        raise Exception(f"Processes are not the same: {repr(p1)} != {repr(p2)}")
+
     return cfg
diff --git a/hbw/config/datasets.py b/hbw/config/datasets.py
index 300fda3..a4041f1 100644
--- a/hbw/config/datasets.py
+++ b/hbw/config/datasets.py
@@ -571,13 +571,13 @@ def get_dataset_lfns_uhh(
         )
 
         broken_files = dataset_inst[shift_inst.name].get_aux("broken_files", [])
-        print(broken_files)
+
         # loop though files and interpret paths as lfns
-        return [
+        lfns = [
             lfn_base.child(basename, type="f").path
             for basename in lfn_base.listdir(pattern="*.root")
-            if lfn_base.child(basename, type="f").path not in broken_files
         ]
+        return [lfn for lfn in lfns if lfn not in broken_files]
 
     if any("uhh" in cpn_name for cpn_name in cfg.campaign.x("campaigns", [])):
         # define the lfn retrieval function
@@ -586,6 +586,6 @@ def get_dataset_lfns_uhh(
         # define custom remote fs's to look at
         cfg.x.get_dataset_lfns_remote_fs = lambda dataset_inst: (
             None if "uhh" not in dataset_inst.x("campaign", "") else [
-                f"local_fs_{dataset_inst.x.campaign}",
                 f"wlcg_fs_{dataset_inst.x.campaign}",
+                f"local_fs_{dataset_inst.x.campaign}",
             ])
diff --git a/hbw/config/processes.py b/hbw/config/processes.py
index ac624a6..4659d86 100644
--- a/hbw/config/processes.py
+++ b/hbw/config/processes.py
@@ -4,18 +4,14 @@
 Configuration of the Run 2 HH -> bbWW processes.
 """
 
-import cmsdb
 import order as od
 
 from scinum import Number
 
-from cmsdb.util import add_decay_process
 from columnflow.util import DotDict
 
-from hbw.config.styling import color_palette
 
-
-def add_parent_process(config: od.Config, child_procs: list[od.Process], **kwargs):
+def create_parent_process(child_proces: list[od.Process], **kwargs):
     """
     Helper function to create processes from multiple processes *child_procs*
     """
@@ -28,18 +24,26 @@ def add_parent_process(config: od.Config, child_procs: list[od.Process], **kwarg
 
     if "xsecs" not in kwargs:
         # set the xsec as sum of all xsecs when the ecm key exists for all processes
-        valid_ecms = set.intersection(*[set(proc.xsecs.keys()) for proc in child_procs])
-        proc_kwargs["xsecs"] = {ecm: sum([proc.get_xsec(ecm) for proc in child_procs]) for ecm in valid_ecms}
+        valid_ecms = set.intersection(*[set(proc.xsecs.keys()) for proc in child_proces])
+        proc_kwargs["xsecs"] = {ecm: sum([proc.get_xsec(ecm) for proc in child_proces]) for ecm in valid_ecms}
 
-    parent_process = config.add_process(**proc_kwargs)
+    parent_process = od.Process(**proc_kwargs)
 
     # add child processes to parent
-    for child_proc in child_procs:
+    for child_proc in child_proces:
         parent_process.add_process(child_proc)
 
     return parent_process
 
 
+def add_parent_process(config: od.Config, child_procs: list[od.Process], **kwargs):
+    """
+    Helper function to create a parent process and add it to the config instance
+    """
+    parent_process = config.add_process(create_parent_process(child_procs, **kwargs))
+    return parent_process
+
+
 def add_dummy_xsecs(config: od.Config, dummy_xsec: float = 0.1):
     """ Helper that adds some dummy  xsecs when missing for the campaign's correspondign ecm """
     ecm = config.campaign.ecm
@@ -65,142 +69,20 @@ def add_dummy_xsecs(config: od.Config, dummy_xsec: float = 0.1):
 
 
 def configure_hbw_processes(config: od.Config):
+    """
+    Function to modify the processes present in the config instance.
+    NOTE: we should not rely on modifying process instances themselves as part of the config initialization.
+    """
     # add main HH process
-    config.add_process(cmsdb.processes.hh_ggf.copy())
+    config.add_process(config.x.procs.n.hh_ggf)
+
+    config.add_process(config.x.procs.n.t_bkg)
+    config.add_process(config.x.procs.n.v_lep)
+    config.add_process(config.x.procs.n.background)
 
     # Set dummy xsec for all processes if missing
     add_dummy_xsecs(config)
 
-    # QCD process customization
-    qcd_mu = config.get_process("qcd_mu", default=None)
-    if qcd_mu:
-        qcd_mu = "QCD Muon enriched"
-
-    # add custom qcd_ele process
-    qcd_em = config.get_process("qcd_em", default=None)
-    qcd_bctoe = config.get_process("qcd_bctoe", default=None)
-    if qcd_em and qcd_bctoe:
-        qcd_ele = add_parent_process(  # noqa
-            config,
-            [qcd_em, qcd_bctoe],
-            name="qcd_ele",
-            id=31199,
-            label="QCD Electron enriched",
-        )
-    elif qcd_em:
-        qcd_ele = add_parent_process(  # noqa
-            config,
-            [qcd_em],
-            name="qcd_ele",
-            id=31199,
-            label="QCD Electron enriched",
-        )
-
-    # custom v_lep process for ML Training, combining W+DY
-    w_lnu = config.get_process("w_lnu", default=None)
-    dy = config.get_process("dy", default=None)
-    if w_lnu and dy:
-        v_lep = add_parent_process(  # noqa
-            config,
-            [w_lnu, dy],
-            name="v_lep",
-            id=64575573,  # random number
-            label="W and DY",
-        )
-
-    # Custom t_bkg process for ML Training, combining tt+st
-    st = config.get_process("st", default=None)
-    tt = config.get_process("tt", default=None)
-    if st and tt:
-        t_bkg = add_parent_process(  # noqa
-            config,
-            [st, tt],
-            name="t_bkg",
-            id=97842611,  # random number
-            label="tt + st",
-        )
-
-    if config.has_tag("is_dl") and config.has_tag("is_nonresonant") and config.x.run == 2:
-        # Custom signal  process for ML Training, combining multiple kl signal samples
-        # NOTE: only built for run 2 because kl variations are missing in run 3
-        signal_processes = [
-            config.get_process(f"hh_ggf_hbb_hvv2l2nu_kl{kl}_kt1", deep=True)
-            for kl in [0, 1, "2p45"]
-        ]
-        sig = config.add_process(
-            name="sig",
-            id=75835213,  # random number
-            xsecs={
-                13: sum([proc.get_xsec(13) for proc in signal_processes]),
-            },
-            label="signal",
-        )
-        for proc in signal_processes:
-            try:
-                sig.add_process(proc)
-            except Exception:
-                # this also adds 'sig' as parent to 'proc', but sometimes this is happening
-                # multiple times, since we create multiple configs
-                pass
-
-    # add auxiliary information if process is signal
-    for proc_inst, _, _ in config.walk_processes():
-        is_signal = any([
-            signal_tag in proc_inst.name
-            for signal_tag in ("hh_vbf", "hh_ggf", "radion", "gravition")
-        ])
-        if is_signal:
-            proc_inst.add_tag("is_signal")
-
-    decay_map = {
-        "lf": {
-            "name": "lf",
-            "id": 50,
-            "label": "(lf)",
-            "br": -1,
-        },
-        "hf": {
-            "name": "hf",
-            "id": 60,
-            "label": "(hf)",
-            "br": -1,
-        },
-    }
-
-    # add heavy flavour and light flavour dy processes
-    for proc in (
-        "dy",
-        "dy_m4to10", "dy_m10to50",
-        "dy_m50toinf",
-        "dy_m50toinf_0j", "dy_m50toinf_1j", "dy_m50toinf_2j",
-    ):
-        dy_proc_inst = config.get_process(proc, default=None)
-        if dy_proc_inst:
-            add_production_mode_parent = proc != "dy"
-            for flavour in ("hf", "lf"):
-                # the 'add_decay_process' function helps us to create all parent-daughter relationships
-                add_decay_process(
-                    dy_proc_inst,
-                    decay_map[flavour],
-                    add_production_mode_parent=add_production_mode_parent,
-                    name_func=lambda parent_name, decay_name: f"{parent_name}_{decay_name}",
-                    label_func=lambda parent_label, decay_label: f"{parent_label} {decay_label}",
-                    xsecs=None,
-                    aux={"flavour": flavour},
-                )
-
-    # create main background process
-    background = config.add_process(
-        name="background",
-        id=99999,
-        label="background",
-        color=color_palette["blue"],
-    )
-    for bg in ["tt", "dy", "st", "vv", "w_lnu", "h"]:
-        if config.has_process(bg):
-            bg = config.get_process(bg)
-            background.add_process(bg)
-
 
 from random import randint
 
diff --git a/hbw/tasks/campaigns.py b/hbw/tasks/campaigns.py
index ab3a2d7..16df5b5 100644
--- a/hbw/tasks/campaigns.py
+++ b/hbw/tasks/campaigns.py
@@ -32,6 +32,101 @@
         "cmsdb.campaigns.run3_2022_postEE_nano_v12": "campaign_run3_2022_postEE_nano_v12",
         "cmsdb.campaigns.run3_2022_postEE_nano_v13": "campaign_run3_2022_postEE_nano_v13",
     },
+    "c22post_das": {
+        "cmsdb.campaigns.run3_2022_postEE_nano_v12": "campaign_run3_2022_postEE_nano_v12",
+        "cmsdb.campaigns.run3_2022_postEE_nano_v13": "campaign_run3_2022_postEE_nano_v13",
+        "cmsdb.campaigns.run3_2022_postEE_nano_uhh_v12": "campaign_run3_2022_postEE_nano_uhh_v12",
+    },
+    "c22pre_das": {
+        "cmsdb.campaigns.run3_2022_preEE_nano_v12": "campaign_run3_2022_preEE_nano_v12",
+        "cmsdb.campaigns.run3_2022_preEE_nano_v13": "campaign_run3_2022_preEE_nano_v13",
+        "cmsdb.campaigns.run3_2022_preEE_nano_uhh_v12": "campaign_run3_2022_preEE_nano_uhh_v12",
+    },
+}
+
+broken_files = {
+    "run3_2022_postEE_nano_uhh_v12": {
+        "dy_m10to50_amcatnlo": [
+            # missing LHEScaleWeights
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/4B7063C8-D7B7-A45F-0B56-817AECEAFB43.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/D4D70538-4AF1-A95C-3A57-5EB5D2FFAB08.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/24934037-F730-CFB5-A82E-5D6669E8C85B.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/EB93CCFF-F013-D816-7586-1051CA0BC3C8.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/08C7ABCF-F7DE-F73F-218E-12A85C1A6E89.root",  # noqa: E501
+        ],
+        "dy_m50toinf_amcatnlo": [
+            # broken
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/3FE6B8C0-4234-4EE4-5BEA-E232539E0D85.root",  # noqa: E501
+            # missing LHEScaleWeights
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/10B3DD52-F1B9-F8FD-E6FD-D59ECCE90963.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/13DA9D04-5A59-51B8-67EC-54723C6DB4F3.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/F31A7CD7-F9CF-2A51-42B6-26E82E134DE7.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/5B7AFD98-EC30-D01C-59FA-162D86E82C61.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/4E19FA69-9612-E1AF-A537-099F0119CC60.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/197F9F10-660F-AC8B-83DF-AE02CA2AEA71.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/FAFAECB9-A1C2-A07C-16F7-C7A8008A404E.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/F541A987-BD0F-09AA-156F-2836570E8886.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/B600A38B-9418-1EA3-8B4E-8969BE8ECDDE.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/3B2EEDD7-0767-6112-8C60-B522A4A1910C.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/5AC98FFE-A2A1-EAD4-BFD5-59F64E2A3465.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/442D7323-2E81-9EFA-B9C1-E3414FF2C5B4.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/198C8C10-BD66-5B2B-C70A-34EC4EEFB65C.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/C0D75D2C-1A95-A416-E2BA-3E16E3249333.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/DDBA1F4E-4795-A218-E0A0-4FF036B5CB68.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/770ADB5F-4F37-50A4-1FA2-34D04AD062B8.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/81E8769E-6D9A-674A-419A-40227862E8CC.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/7CA623D4-4E9F-E689-ECFA-6F251291FAB3.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/5FDA4334-32A3-0262-C0F5-5AA2AF906F94.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/CE34DA61-BB00-E50C-76F6-591032050F6F.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/0E2C013A-B1CD-63AA-4FBC-92AB1171BDF7.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/4CC2A468-5DC4-3513-C484-CF10B96DD7E1.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/0568BFAE-B3C4-FF86-2E8E-ABFEB3F418BC.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/A95D7402-87A5-C41B-3B89-211DCE48A4BB.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/836369D5-B667-F3DD-78D7-9D075766A182.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/65B3170C-8F95-F3AA-2B8F-056AAF05905D.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/EE0F4D89-CC83-02CC-19EE-8BEA0AC9EB88.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/69DBFCA4-B503-DA49-8972-D8EFFA69DA7C.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/20988817-825D-C5DD-3AC0-5A929F768A5F.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/BD755398-6344-C786-2BBE-B648C5056544.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/E80FA718-9A41-22EF-B2C2-ABE91B334447.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/F095A2DC-3D9D-540A-D77A-E0881A062F06.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/7BAFD1A3-6EF1-18A5-AC03-9158A4D965E0.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/C71C0C70-9F3D-C581-219E-6FE00957D3CB.root",  # noqa: E501
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext2-v2/0/2307BFB8-74FF-BB2A-52A2-909D5F57C221.root",  # noqa: E501
+            # missing LHEScaleWeights
+        ],
+        "dy_m50toinf_2j_amcatnlo": [
+            # broken
+            "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_2J_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/FD427E18-2F78-5055-7B38-8929DDF4F1EA.root",  # noqa: E501
+        ],
+    },
+    "run3_2022_preEE_nano_uhh_v12": {
+        "dy_m50toinf_amcatnlo": [
+            # missing LHEScaleWeights
+            "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5-v2/0/140BABD5-F5C1-543C-7425-92CDA4A385B9.root",  # noqa: E501
+            "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5-v2/0/F96C5BD4-8AFF-3B01-A17B-62F17F74895B.root",  # noqa: E501
+            "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5_ext2-v2/0/39B93A78-FF63-8552-5C58-257144882E6B.root",  # noqa: E501
+            "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5_ext2-v2/0/12615068-4201-0739-6128-21B694B3CF6E.root",  # noqa: E501
+            "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5_ext2-v2/0/3163B05D-3FFB-1C6B-60BB-B5CD14166ACE.root",  # noqa: E501
+            "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5-v2/0/7BE26782-9B31-D8AC-E317-EF6F32C391BF.root",  # noqa: E501
+            "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5_ext1-v1/0/17AB951D-549D-89D1-345D-CE6CD5B5B3D0.root",  # noqa: E501
+            "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5_ext2-v2/0/22B6C39B-3332-7E8A-B8C7-F23367A5F297.root",  # noqa: E501
+            "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5_ext2-v2/0/BAE33AA8-086B-7D5D-26EA-C52C9C6D31FE.root",  # noqa: E501
+            "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5_ext2-v2/0/C70989AC-334C-EB82-4ACC-B8C48FFE2433.root",  # noqa: E501
+            "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5_ext2-v2/0/385E6DCC-4FB6-ED71-2B3A-5B23C5A3ACC2.root",  # noqa: E501
+
+        ],
+        "dy_m10to50_amcatnlo": [
+            # missing LHEScaleWeights
+            "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5-v2/0/315BBEDB-FF7D-B3FB-0355-F6DA23E297BE.root",  # noqa: E501
+            "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5-v2/0/2E0573CC-695C-340B-5720-85278B31496E.root",  # noqa: E501
+            "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5_ext1-v1/0/31669005-269B-419C-B93E-E3E4A607B644.root",  # noqa: E501
+        ],
+        "w_lnu_amcatnlo": [
+            # missing LHEScaleWeights
+            "/store/mc/Run3Summer22MiniAODv4_NanoAODv12UHH/WtoLNu-2Jets_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5-v2/0/33E9A5A9-73C5-42C6-D337-08D23E9144BF.root",  # noqa: E501
+        ],
+    },
 }
 
 
@@ -66,24 +161,19 @@ def modify_campaign(self, campaign_inst):
         """
         Modify the campaign instance, e.g. by adding datasets or changing dataset properties.
         """
-        if campaign_inst.name == "run3_2022_postEE_nano_uhh_v12":
-            # remove broken files
-            dy_m10to50_nominal = campaign_inst.get_dataset("dy_m10to50_amcatnlo").info["nominal"]
-            dy_m10to50_nominal.x.broken_files = [
-                # missing scale weights
-                "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6-v2/0/4B7063C8-D7B7-A45F-0B56-817AECEAFB43.root",  # noqa: E501
-            ]
-            dy_m10to50_nominal.n_files = dy_m10to50_nominal.n_files - 1
-            dy_m10to50_nominal.n_events = dy_m10to50_nominal.n_events - 1651814
-
-            dy_m50toinf_nominal = campaign_inst.get_dataset("dy_m50toinf_amcatnlo").info["nominal"]
-            dy_m50toinf_nominal.x.broken_files = [
-                # broken file
-                "/store/mc/Run3Summer22EEMiniAODv4_NanoAODv12UHH/DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_postEE_v6_ext1-v1/0/3FE6B8C0-4234-4EE4-5BEA-E232539E0D85.root",  # noqa: E501
-            ]
-            dy_m50toinf_nominal.n_files = dy_m50toinf_nominal.n_files - 1
-            dy_m50toinf_nominal.n_events = -1
-            # dy_m50toinf.x.n_events = dy_m50toinf.x.n_events - ????
+        if campaign_inst.name not in broken_files:
+            return
+
+        for dataset_name, broken_files_list in broken_files[campaign_inst.name].items():
+            dataset_inst_nominal = campaign_inst.get_dataset(dataset_name).info["nominal"]
+
+            if len(set(broken_files_list)) != len(broken_files_list):
+                raise ValueError(f"Duplicate broken files in {dataset_name}")
+
+            dataset_inst_nominal.x.broken_files = dataset_inst_nominal.x("broken_files", []) + broken_files_list
+            dataset_inst_nominal.n_files = dataset_inst_nominal.n_files - len(broken_files_list)
+            # n_events not known for all broken files, but is not used anyways
+            dataset_inst_nominal.n_events = -1
 
     @cached_property
     def campaign_insts(self):
@@ -163,6 +253,8 @@ def get_custom_campaign(self):
 
     @timeit_multiple
     def run(self):
+        from hbw.analysis.processes import modify_cmsdb_processes
+        modify_cmsdb_processes()
         output = self.output()
 
         # cross check if the dataset summary did change

From bdb3a1d399760c626a8878fe4a89213030347cdf Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Thu, 9 Jan 2025 10:39:58 +0100
Subject: [PATCH 16/29] load MET for data in calibration

---
 hbw/calibration/default.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/hbw/calibration/default.py b/hbw/calibration/default.py
index 1a0e793..b3e48cf 100644
--- a/hbw/calibration/default.py
+++ b/hbw/calibration/default.py
@@ -13,6 +13,8 @@
 from columnflow.util import maybe_import, try_float
 from columnflow.columnar_util import set_ak_column, EMPTY_FLOAT
 
+from hbw.util import MET_COLUMN
+
 from hbw.calibration.jet import bjet_regression
 
 ak = maybe_import("awkward")
@@ -82,7 +84,7 @@ def fatjet_init(self: Calibrator) -> None:
 
 
 @calibrator(
-    uses={deterministic_seeds},
+    uses={deterministic_seeds, MET_COLUMN("{pt,phi}")},
     produces={deterministic_seeds},
     # jec uncertainty_sources: set to None to use config default
     jec_sources=["Total"],
@@ -111,7 +113,7 @@ def jet_base(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
                 replace_value = pre_calib_met[col.split(".")[-1].split("_")[0]]
             logger.info(
                 f"Found infinite values in {col}; Values will be replaced with "
-                f"{replace_value if try_float(replace_value) else replace_value[m]}"
+                f"{replace_value if try_float(replace_value) else replace_value[m]}",
             )
             events = set_ak_column(events, col, ak.where(m, replace_value, route.apply(events)))
 

From 99d81e623ccee8ca4023f7fe8c1f30c8a8ea4444 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Thu, 9 Jan 2025 10:48:59 +0100
Subject: [PATCH 17/29] add memory configuration and load gen columns only when
 required

---
 hbw/production/weights.py |  6 +++---
 hbw/util.py               | 24 ++++++++++++++++++++++++
 law.cfg                   | 12 ++++++++++++
 3 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/hbw/production/weights.py b/hbw/production/weights.py
index 4fa4b49..7b3b658 100644
--- a/hbw/production/weights.py
+++ b/hbw/production/weights.py
@@ -27,7 +27,7 @@
 from hbw.production.normalized_weights import normalized_weight_factory
 from hbw.production.normalized_btag import normalized_btag_weights
 from hbw.production.dataset_normalization import dataset_normalization_weight
-from hbw.util import has_tag
+from hbw.util import has_tag, IF_DY, IF_TOP
 
 
 np = maybe_import("numpy")
@@ -39,8 +39,8 @@
 
 
 @producer(
-    uses={gen_parton_top, gen_v_boson, pu_weight},
-    produces={gen_parton_top, gen_v_boson, pu_weight},
+    uses={IF_TOP(gen_parton_top), IF_DY(gen_v_boson), pu_weight},
+    produces={IF_TOP(gen_parton_top), IF_DY(gen_v_boson), pu_weight},
     mc_only=True,
 )
 def event_weights_to_normalize(self: Producer, events: ak.Array, results: SelectionResult, **kwargs) -> ak.Array:
diff --git a/hbw/util.py b/hbw/util.py
index 261ed3f..09ee8f6 100644
--- a/hbw/util.py
+++ b/hbw/util.py
@@ -663,3 +663,27 @@ def IF_MC(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[
         return self.get()
 
     return self.get() if func.dataset_inst.is_mc else None
+
+
+@deferred_column
+def IF_DY(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]:
+    if getattr(func, "dataset_inst", None) is None:
+        return self.get()
+
+    return self.get() if func.dataset_inst.has_tag("is_v_jets") else None
+
+
+@deferred_column
+def IF_TOP(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]:
+    if getattr(func, "dataset_inst", None) is None:
+        return self.get()
+
+    return self.get() if func.dataset_inst.has_tag("has_top") else None
+
+
+@deferred_column
+def IF_TT(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]:
+    if getattr(func, "dataset_inst", None) is None:
+        return self.get()
+
+    return self.get() if func.dataset_inst.has_tag("is_ttbar") else None
diff --git a/law.cfg b/law.cfg
index edebd04..3f50a3c 100644
--- a/law.cfg
+++ b/law.cfg
@@ -126,6 +126,18 @@ lfn_sources: local_desy_dcache, wlcg_fs_desy_store, wlcg_fs_infn_redirector, wlc
 ; c22post__cf.CalibrateEvents__nomin*: htcondor_memory=5GB
 ; cf.MLTraining: htcondor_memory=10GB, htcondor_gpus=1
 
+# use cluster default for signals
+# cf.SelectEvents__hh*: dummy=0
+# cf.ReduceEvents__hh*: dummy=0
+# DY and W needs more memory due to gen_v_boson Producer
+cf.SelectEvents__dy*: htcondor_memory=3GB
+cf.ReduceEvents__dy*: htcondor_memory=3GB
+cf.SelectEvents__w_lnu: htcondor_memory=3GB
+cf.ReduceEvents__w_lnu: htcondor_memory=3GB
+# default resources for all other datasets
+cf.SelectEvents: htcondor_memory=2GB
+cf.ReduceEvents: htcondor_memory=2GB
+
 
 [luigi_cf.DummyTask]
 # To set defaults on a per-task basis

From cea5ad6379ac29050e7e56340641c7dcbc5bbcff Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Thu, 9 Jan 2025 16:04:11 +0100
Subject: [PATCH 18/29] use stitching producer only where necessary

---
 hbw/production/weights.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hbw/production/weights.py b/hbw/production/weights.py
index 7b3b658..b122a09 100644
--- a/hbw/production/weights.py
+++ b/hbw/production/weights.py
@@ -241,8 +241,10 @@ def combined_normalization_weights_init(self: Producer) -> None:
 
     if self.dataset_inst.has_tag("is_hbv"):
         self.norm_weights_producer = stitched_normalization_weights_brs_from_processes
-    else:
+    elif "dy_m50toinf" in self.dataset_inst.name:
         self.norm_weights_producer = stitched_normalization_weights
+    else:
+        self.norm_weights_producer = normalization_weights
 
     self.norm_weights_producer.weight_name = "stitched_normalization_weight"
 

From 4f74b018f603c12b146a7b385c4c45b34ae280c6 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Thu, 9 Jan 2025 16:04:25 +0100
Subject: [PATCH 19/29] keep columns for electronSS corrections

---
 hbw/config/config_run2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hbw/config/config_run2.py b/hbw/config/config_run2.py
index d89932a..92e73b0 100644
--- a/hbw/config/config_run2.py
+++ b/hbw/config/config_run2.py
@@ -709,7 +709,7 @@ def add_external(name, value):
         "{FatJet,HbbJet}.{pt,eta,phi,mass,msoftdrop,tau1,tau2,tau3,btagHbb,deepTagMD_HbbvsQCD,particleNet_HbbvsQCD}",
         # Leptons
         "{Electron,Muon}.{pt,eta,phi,mass,charge,pdgId,jetRelIso,is_tight,dxy,dz}",
-        "Electron.deltaEtaSC", "mll",
+        "Electron.{deltaEtaSC,r9,seedGain}", "mll",
         # MET
         "{MET,PuppiMET}.{pt,phi}",
         # all columns added during selection using a ColumnCollection flag, but skip cutflow ones

From 6c2652243280084d85d785220be7f4ace986bf49 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Thu, 9 Jan 2025 16:05:08 +0100
Subject: [PATCH 20/29] change retries parameter default

---
 hbw/columnflow_patches.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/hbw/columnflow_patches.py b/hbw/columnflow_patches.py
index 06ba7c5..45f3375 100644
--- a/hbw/columnflow_patches.py
+++ b/hbw/columnflow_patches.py
@@ -136,6 +136,10 @@ def patched_init(self, *args, **kwargs):
 
 @memoize
 def patch_all():
+    # change the "retries" parameter default
+    from columnflow.tasks.framework.remote import RemoteWorkflow
+    RemoteWorkflow.retries = RemoteWorkflow.retries.copy(default=2)
+
     patch_mltraining()
     patch_htcondor_workflow_naf_resources()
     # patch_column_alias_strategy()

From 5f6942c790a7224d58d186c6893c64586d1339b9 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 10 Jan 2025 22:20:23 +0100
Subject: [PATCH 21/29] update columnflow and fixes

---
 hbw/columnflow_patches.py            |  6 ++----
 hbw/config/variables.py              | 18 ++++++++++--------
 hbw/production/normalized_weights.py |  3 ++-
 hbw/production/weights.py            |  2 +-
 hbw/selection/stats.py               |  2 +-
 modules/columnflow                   |  2 +-
 6 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/hbw/columnflow_patches.py b/hbw/columnflow_patches.py
index 45f3375..d40214b 100644
--- a/hbw/columnflow_patches.py
+++ b/hbw/columnflow_patches.py
@@ -93,9 +93,7 @@ def TaskArrayFunction_str(self):
 
     TaskArrayFunction.__str__ = TaskArrayFunction_str
     logger.info(
-        "patched TaskArrayFunction.__str__ to include the CSP version attribute "
-        "(NOTE that this currently does not work for the "
-        "MLTrainingMixin tasks (e.g. MLPreTraining and MLTraining))",
+        "patched TaskArrayFunction.__str__ to include the CSP version attribute",
     )
 
 
@@ -138,7 +136,7 @@ def patched_init(self, *args, **kwargs):
 def patch_all():
     # change the "retries" parameter default
     from columnflow.tasks.framework.remote import RemoteWorkflow
-    RemoteWorkflow.retries = RemoteWorkflow.retries.copy(default=2)
+    RemoteWorkflow.retries = RemoteWorkflow.retries.copy(default=3)
 
     patch_mltraining()
     patch_htcondor_workflow_naf_resources()
diff --git a/hbw/config/variables.py b/hbw/config/variables.py
index c48992a..a1878a7 100644
--- a/hbw/config/variables.py
+++ b/hbw/config/variables.py
@@ -317,9 +317,11 @@ def add_variables(config: od.Config) -> None:
             x_title="Number of pnet jets (tight WP)",
             discrete_x=True,
         )
+    # NOTE: there is some issue when loading columns via aux, but not loading all 4-vector components
+    # but no error is raised, when changing to the `object["pt"]` notation
     config.add_variable(
         name="n_fatjet",
-        expression=lambda events: ak.num(events.FatJet.pt, axis=1),
+        expression=lambda events: ak.num(events.FatJet["pt"], axis=1),
         aux={"inputs": {"FatJet.pt"}},
         binning=(7, -0.5, 6.5),
         x_title="Number of fatjets",
@@ -327,7 +329,7 @@ def add_variables(config: od.Config) -> None:
     )
     config.add_variable(
         name="n_hbbjet",
-        expression=lambda events: ak.num(events.HbbJet.pt, axis=1),
+        expression=lambda events: ak.num(events.HbbJet["pt"], axis=1),
         aux={"inputs": {"HbbJet.pt"}},
         binning=(4, -0.5, 3.5),
         x_title="Number of hbbjets",
@@ -335,7 +337,7 @@ def add_variables(config: od.Config) -> None:
     )
     config.add_variable(
         name="n_electron",
-        expression=lambda events: ak.num(events.Electron.pt, axis=1),
+        expression=lambda events: ak.num(events.Electron["pt"], axis=1),
         aux={"inputs": {"Electron.pt"}},
         binning=(4, -0.5, 3.5),
         x_title="Number of electrons",
@@ -343,7 +345,7 @@ def add_variables(config: od.Config) -> None:
     )
     config.add_variable(
         name="n_muon",
-        expression=lambda events: ak.num(events.Muon.pt, axis=1),
+        expression=lambda events: ak.num(events.Muon["pt"], axis=1),
         aux={"inputs": {"Muon.pt"}},
         binning=(4, -0.5, 3.5),
         x_title="Number of muons",
@@ -351,7 +353,7 @@ def add_variables(config: od.Config) -> None:
     )
     config.add_variable(
         name="n_bjet",
-        expression=lambda events: ak.num(events.Bjet.pt, axis=1),
+        expression=lambda events: ak.num(events.Bjet["pt"], axis=1),
         aux={"inputs": {"Bjet.pt"}},
         binning=(4, -0.5, 3.5),
         x_title="Number of bjets",
@@ -359,7 +361,7 @@ def add_variables(config: od.Config) -> None:
     )
     config.add_variable(
         name="ht",
-        expression=lambda events: ak.sum(events.Jet.pt, axis=1),
+        expression=lambda events: ak.sum(events.Jet["pt"], axis=1),
         aux={"inputs": {"Jet.pt"}},
         binning=(40, 0, 1200),
         unit="GeV",
@@ -369,7 +371,7 @@ def add_variables(config: od.Config) -> None:
     config.add_variable(
         name="lt",
         expression=lambda events: (
-            ak.sum(events.Muon.pt, axis=1) + ak.sum(events.Muon.pt, axis=1) + events[met_name].pt
+            ak.sum(events.Muon["pt"], axis=1) + ak.sum(events.Muon["pt"], axis=1) + events[met_name]["pt"]
         ),
         aux={"inputs": {"Muon.pt", "Electron.pt", "MET.pt"}},
         binning=(40, 0, 1200),
@@ -378,7 +380,7 @@ def add_variables(config: od.Config) -> None:
     )
     config.add_variable(
         name="ht_bjet_norm",
-        expression=lambda events: ak.sum(events.Jet.pt, axis=1),
+        expression=lambda events: ak.sum(events.Jet["pt"], axis=1),
         aux={"inputs": {"Jet.pt"}},
         binning=[0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1450, 1700, 2400],
         unit="GeV",
diff --git a/hbw/production/normalized_weights.py b/hbw/production/normalized_weights.py
index 3dc8948..324668e 100644
--- a/hbw/production/normalized_weights.py
+++ b/hbw/production/normalized_weights.py
@@ -26,7 +26,8 @@ def normalized_weight_factory(
 ) -> Callable:
 
     @producer(
-        uses=set(weight_producers) | set().union(*[w.produces for w in weight_producers]) | {"process_id"},
+        # TODO: w.produces does not work as intended anymore, so we have to initialize the Producers here
+        uses=set(weight_producers) | set().union(*[w().produced_columns for w in weight_producers]) | {"process_id"},
         cls_name=producer_name,
         mc_only=True,
         # skip the checking existence of used/produced columns because not all columns are there
diff --git a/hbw/production/weights.py b/hbw/production/weights.py
index b122a09..28e9e6f 100644
--- a/hbw/production/weights.py
+++ b/hbw/production/weights.py
@@ -241,7 +241,7 @@ def combined_normalization_weights_init(self: Producer) -> None:
 
     if self.dataset_inst.has_tag("is_hbv"):
         self.norm_weights_producer = stitched_normalization_weights_brs_from_processes
-    elif "dy_m50toinf" in self.dataset_inst.name:
+    elif "dy_" in self.dataset_inst.name:
         self.norm_weights_producer = stitched_normalization_weights
     else:
         self.norm_weights_producer = normalization_weights
diff --git a/hbw/selection/stats.py b/hbw/selection/stats.py
index 17d96cf..b7ab446 100644
--- a/hbw/selection/stats.py
+++ b/hbw/selection/stats.py
@@ -50,7 +50,7 @@ def hbw_selection_step_stats(
 
 
 @selector(
-    uses={increment_stats, event_weights_to_normalize, RAW_MET_COLUMN("pt")},
+    uses={increment_stats, event_weights_to_normalize, RAW_MET_COLUMN("{pt,phi}")},
 )
 def hbw_increment_stats(
     self: Selector,
diff --git a/modules/columnflow b/modules/columnflow
index c4139ca..608ef91 160000
--- a/modules/columnflow
+++ b/modules/columnflow
@@ -1 +1 @@
-Subproject commit c4139cabf44246492b4afa8217750612d401d3d3
+Subproject commit 608ef912fa7ba33a82fa1e0af868380030fb3e75

From d4f57f0357817a64844fc9e12d5d1fa409ab4c16 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Mon, 13 Jan 2025 14:19:09 +0100
Subject: [PATCH 22/29] add missing variable titles

---
 hbw/config/variables.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/hbw/config/variables.py b/hbw/config/variables.py
index a1878a7..c1f6fef 100644
--- a/hbw/config/variables.py
+++ b/hbw/config/variables.py
@@ -569,6 +569,7 @@ def add_variables(config: od.Config) -> None:
             binning=(40, 0., 400.),
             unit="GeV",
             null_value=EMPTY_FLOAT,
+            x_title=f"Lepton {i} $p_{{T}}$",
         )
         config.add_variable(
             name=f"lepton{i}_eta",
@@ -579,6 +580,7 @@ def add_variables(config: od.Config) -> None:
             binning=(40, -3.2, 3.2),
             unit="GeV",
             null_value=EMPTY_FLOAT,
+            x_title=f"Lepton {i} $\eta$",
         )
         config.add_variable(
             name=f"lepton{i}_phi",
@@ -589,6 +591,7 @@ def add_variables(config: od.Config) -> None:
             binning=(50, -2.5, 2.5),
             unit="GeV",
             null_value=EMPTY_FLOAT,
+            x_title=f"Lepton {i} $\phi$",
         )
         config.add_variable(
             name=f"lepton{i}_mass",
@@ -599,6 +602,7 @@ def add_variables(config: od.Config) -> None:
             binning=(40, 0., 400.),
             unit="GeV",
             null_value=EMPTY_FLOAT,
+            x_title=f"Lepton {i} mass",
         )
 
     for obj in ["Electron", "Muon"]:

From ad5b98d402683401b62617a8b3280963159c70c3 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Mon, 13 Jan 2025 15:56:30 +0100
Subject: [PATCH 23/29] store ml inputs as float32

---
 hbw/production/ml_inputs.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/hbw/production/ml_inputs.py b/hbw/production/ml_inputs.py
index d5612ed..2fc06e0 100644
--- a/hbw/production/ml_inputs.py
+++ b/hbw/production/ml_inputs.py
@@ -147,7 +147,7 @@ def common_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
 
     # fill nan/none values of all produced columns
     for col in self.ml_input_columns:
-        events = set_ak_column(events, col, ak.fill_none(ak.nan_to_none(events[col]), ZERO_PADDING_VALUE))
+        events = set_ak_column_f32(events, col, ak.fill_none(ak.nan_to_none(events[col]), ZERO_PADDING_VALUE))
 
     check_column_bookkeeping(self, events)
 
@@ -257,7 +257,7 @@ def sl_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
 
     # fill nan/none values of all produced columns
     for col in self.ml_input_columns:
-        events = set_ak_column(events, col, ak.fill_none(ak.nan_to_none(events[col]), ZERO_PADDING_VALUE))
+        events = set_ak_column_f32(events, col, ak.fill_none(ak.nan_to_none(events[col]), ZERO_PADDING_VALUE))
 
     check_column_bookkeeping(self, events)
     return events
@@ -332,7 +332,7 @@ def dl_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
 
     # fill nan/none values of all produced columns
     for col in self.ml_input_columns:
-        events = set_ak_column(events, col, ak.fill_none(ak.nan_to_none(events[col]), ZERO_PADDING_VALUE))
+        events = set_ak_column_f32(events, col, ak.fill_none(ak.nan_to_none(events[col]), ZERO_PADDING_VALUE))
 
     check_column_bookkeeping(self, events)
     return events
@@ -360,6 +360,9 @@ def dl_ml_inputs_init(self: Producer) -> None:
     check_variable_existence(self)
 
 
+test_dl_ml_inputs = dl_ml_inputs.derive("test_dl_ml_inputs")
+
+
 @producer(
     uses={common_ml_inputs},
     produces={common_ml_inputs},
@@ -437,7 +440,7 @@ def sl_res_ml_inputs(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
 
     # fill nan/none values of all produced columns
     for col in self.ml_input_columns:
-        events = set_ak_column(events, col, ak.fill_none(ak.nan_to_none(events[col]), ZERO_PADDING_VALUE))
+        events = set_ak_column_f32(events, col, ak.fill_none(ak.nan_to_none(events[col]), ZERO_PADDING_VALUE))
 
     check_column_bookkeeping(self, events)
     return events

From 63a6bf35645f2e6f8cacb43d9e1c8a9b967a5442 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Wed, 15 Jan 2025 17:25:37 +0100
Subject: [PATCH 24/29] add implementation + first set of (incomplete) trigger
 SFs

---
 hbw/config/config_run2.py |  17 ++++
 hbw/production/trigger.py | 198 ++++++++++++++++++++++++--------------
 hbw/production/weights.py |  63 +++---------
 hbw/weight/default.py     |   8 ++
 4 files changed, 166 insertions(+), 120 deletions(-)

diff --git a/hbw/config/config_run2.py b/hbw/config/config_run2.py
index 92e73b0..8089952 100644
--- a/hbw/config/config_run2.py
+++ b/hbw/config/config_run2.py
@@ -608,8 +608,22 @@ def add_external(name, value):
     add_external("jet_veto_map", (f"{json_mirror}/POG/JME/{corr_tag}/jetvetomaps.json.gz", "v1"))
     # electron scale factors
     add_external("electron_sf", (f"{json_mirror}/POG/EGM/{corr_tag}/electron.json.gz", "v1"))
+    add_external("electron_ss", (f"{json_mirror}/POG/EGM/{corr_tag}/electronSS.json.gz", "v1"))
     # muon scale factors
     add_external("muon_sf", (f"{json_mirror}/POG/MUO/{corr_tag}/muon_Z.json.gz", "v1"))
+    # trigger_sf from Balduin
+    # # files with uncertainties, not loadable because there are some NaNs in the json :/
+    # trigger_sf_path = "/afs/desy.de/user/f/frahmmat/Projects/hh2bbww/data/software/trig_sf"
+    # add_external("trigger_sf_ee", (f"{trigger_sf_path}/sf_ee+Ele50_CaloI+DoubleEle33_mli_lep_pt-trig_ids.json", "v1"))
+    # add_external("trigger_sf_mm", (f"{trigger_sf_path}/sf_mm_mli_lep_pt-trig_ids.json", "v1"))
+    # add_external("trigger_sf_mixed", (f"{trigger_sf_path}/sf_mixed+Ele50_CaloI+DoubleEle33_mli_lep_pt-trig_ids.json", "v1"))  # noqa: E501
+
+    # files without uncertainties and with wrong triggers
+    trigger_sf_path = "/nfs/dust/cms/user/letzerba/hh2bbww/data/cf_store/hbw_dl/cf.CalculateTriggerScaleFactors/c22post/nominal/calib__with_b_reg/sel__dl1_no_triggerV11__steps_no_trigger/prod__event_weightsV2__trigger_prodV2__pre_ml_catsV1__dl_ml_inputsV1/weight__ref_cut/datasets_4_10839b14e3/prod3/"  # noqa: E501
+    add_external("trigger_sf_ee", (f"{trigger_sf_path}/sf_ee_mli_lep_pt-trig_ids.json", "v1"))
+    add_external("trigger_sf_mm", (f"{trigger_sf_path}/sf_mm_mli_lep_pt-trig_ids.json", "v1"))
+    add_external("trigger_sf_mixed", (f"{trigger_sf_path}/sf_mixed_mli_lep_pt-trig_ids.json", "v1"))  # noqa: E501
+
     # btag scale factor
     add_external("btag_sf_corr", (f"{json_mirror}/POG/BTV/{corr_tag}/btagging.json.gz", "v1"))
     # V+jets reweighting (derived for 13 TeV, custom json converted from ROOT, not centrally produced)
@@ -710,6 +724,9 @@ def add_external(name, value):
         # Leptons
         "{Electron,Muon}.{pt,eta,phi,mass,charge,pdgId,jetRelIso,is_tight,dxy,dz}",
         "Electron.{deltaEtaSC,r9,seedGain}", "mll",
+        # isolations for testing
+        "Electron.{pfRelIso03_all,miniPFRelIso_all,mvaIso,mvaTTH}",
+        "Muon.{pfRelIso03_all,miniPFRelIso_all,mvaMuID,mvaTTH}",
         # MET
         "{MET,PuppiMET}.{pt,phi}",
         # all columns added during selection using a ColumnCollection flag, but skip cutflow ones
diff --git a/hbw/production/trigger.py b/hbw/production/trigger.py
index 61ea829..bb08c15 100644
--- a/hbw/production/trigger.py
+++ b/hbw/production/trigger.py
@@ -6,98 +6,122 @@
 
 from __future__ import annotations
 
+import functools
+
+# from dataclasses import dataclass
+
 from columnflow.production import Producer, producer
 from columnflow.util import maybe_import, InsertableDict
-from columnflow.columnar_util import set_ak_column, flat_np_view, layout_ak_array
+from columnflow.columnar_util import set_ak_column, fill_at
+from columnflow.production.cms.muon import muon_weights, MuonSFConfig
+
+from hbw.production.prepare_objects import prepare_objects
 
 np = maybe_import("numpy")
 ak = maybe_import("awkward")
 
 
-@producer(
-    uses={
-        "Trigger.pt", "Trigger.eta",
+set_ak_column_f32 = functools.partial(set_ak_column, value_type=np.float32)
+fill_at_f32 = functools.partial(fill_at, value_type=np.float32)
+
+
+# @dataclass
+# class TriggerSFConfig:
+
+
+from hbw.categorization.categories import catid_2e, catid_2mu, catid_emu
+
+
+# NOTE: dummy up/down variation at the moment
+trigger_sf_config = {
+    "trigger_sf_ee": {
+        "corr_keys": {
+            "nominal": "sf_ee_mli_lep_pt-trig_ids",
+            "up": "sf_ee_mli_lep_pt-trig_ids",
+            "down": "sf_ee_mli_lep_pt-trig_ids",
+        },
+        "category": catid_2e,
+    },
+    "trigger_sf_mm": {
+        "corr_keys": {
+            "nominal": "sf_mm_mli_lep_pt-trig_ids",
+            "up": "sf_mm_mli_lep_pt-trig_ids",
+            "down": "sf_mm_mli_lep_pt-trig_ids",
+        },
+        "category": catid_2mu,
     },
+    "trigger_sf_mixed": {
+        "corr_keys": {
+            "nominal": "sf_mixed_mli_lep_pt-trig_ids",
+            "up": "sf_mixed_mli_lep_pt-trig_ids",
+            "down": "sf_mixed_mli_lep_pt-trig_ids",
+        },
+        "category": catid_emu,
+    },
+}
+
+
+@producer(
+    uses={"{Electron,Muon}.{pt,eta,phi,mass}", prepare_objects},
     # produces in the init
     # only run on mc
     mc_only=True,
     # function to determine the correction file
-    get_trigger_file=(lambda self, external_files: external_files.trigger_sf),
-    # function to determine the trigger weight config
-    # get_trigger_config=(lambda self: self.config_inst.x.trigger_sf_names),
+    trigger_sf_config=trigger_sf_config,
     weight_name="trigger_weight",
 )
-def trigger_weights(
+def dl_trigger_weights(
     self: Producer,
     events: ak.Array,
     trigger_mask: ak.Array | type(Ellipsis) = Ellipsis,
     **kwargs,
 ) -> ak.Array:
     """
-    Creates trigger weights using the correctionlib. Requires an external file in the config under
-    ``trigger_sf``:
+    Creates trigger weights using custom trigger SF jsons.
+    """
 
-    .. code-block:: python
+    events = self[prepare_objects](events, **kwargs)
 
-        cfg.x.external_files = DotDict.wrap({
-            "trigger_sf": "/afs/cern.ch/work/m/mrieger/public/mirrors/jsonpog-integration-9ea86c4c/POG/MUO/2017_UL/trigger_z.json.gz",  # noqa
-        })
+    variable_map = {
+        "mli_lep_pt": events.Lepton[:, 0].pt,
+    }
 
-    *get_trigger_file* can be adapted in a subclass in case it is stored differently in the external
-    files.
+    full_mask = ak.zeros_like(events.event, dtype=bool)
 
-    The name of the correction set and the year string for the weight evaluation should be given as
-    an auxiliary entry in the config:
+    for key, corr_set in self.correction_sets.items():
+        sf_config = self.trigger_sf_config[key]
 
-    .. code-block:: python
+        categorizer = sf_config["category"]
+        events, mask = self[categorizer](events, **kwargs)
 
-        cfg.x.trigger_sf_names = ("NUM_TightRelIso_DEN_TightIDandIPCut", "2017_UL")
+        # ensure that no event is assigned to multiple categories
+        if ak.any(mask & full_mask):
+            raise Exception(f"Overlapping categories in {dl_trigger_weights.cls_name}")
+        full_mask = mask | full_mask
 
-    *get_trigger_config* can be adapted in a subclass in case it is stored differently in the config.
+        for sys, corr_key in sf_config["corr_keys"].items():
+            sysfix = "" if sys == "nominal" else f"_{sys}"
+            col_name = f"{self.weight_name}{sysfix}"
+            if col_name not in events.fields:
+                events = set_ak_column_f32(events, col_name, ak.ones_like(events.event))
 
-    Optionally, a *trigger_mask* can be supplied to compute the scale factor weight based only on a
-    subset of triggers.
-    """
-    # flat absolute eta and pt views
-    abs_eta = flat_np_view(abs(events.Trigger.eta[trigger_mask]), axis=1)
-    pt = flat_np_view(events.Trigger.pt[trigger_mask], axis=1)
+            corr = corr_set[corr_key]
+            inputs = [variable_map[inp.name] for inp in corr.inputs]
 
-    variable_map = {
-        "year": self.year,
-        "abseta": abs_eta,
-        "eta": abs_eta,
-        "pt": pt,
-    }
+            _sf = corr.evaluate(*inputs)
 
-    # loop over systematics
-    for syst, postfix in [
-        ("sf", ""),
-        ("systup", "_up"),
-        ("systdown", "_down"),
-    ]:
-        # get the inputs for this type of variation
-        variable_map_syst = {
-            **variable_map,
-            "scale_factors": "nominal" if syst == "sf" else syst,  # syst key in 2022
-            "ValType": syst,  # syst key in 2017
-        }
-        inputs = [variable_map_syst[inp.name] for inp in self.trigger_sf_corrector.inputs]
-        sf_flat = self.trigger_sf_corrector(*inputs)
-
-        # add the correct layout to it
-        sf = layout_ak_array(sf_flat, events.Trigger.pt[trigger_mask])
-
-        # create the product over all triggers in one event
-        weight = ak.prod(sf, axis=1, mask_identity=False)
-
-        # store it
-        events = set_ak_column(events, f"{self.weight_name}{postfix}", weight, value_type=np.float32)
+            events = fill_at_f32(
+                ak_array=events,
+                where=mask,
+                route=col_name,
+                value=_sf,
+            )
 
     return events
 
 
-@trigger_weights.requires
-def trigger_weights_requires(self: Producer, reqs: dict) -> None:
+@dl_trigger_weights.requires
+def dl_trigger_weights_requires(self: Producer, reqs: dict) -> None:
     if "external_files" in reqs:
         return
 
@@ -105,30 +129,58 @@ def trigger_weights_requires(self: Producer, reqs: dict) -> None:
     reqs["external_files"] = BundleExternalFiles.req(self.task)
 
 
-@trigger_weights.setup
-def trigger_weights_setup(
+@dl_trigger_weights.setup
+def dl_trigger_weights_setup(
     self: Producer,
     reqs: dict,
     inputs: dict,
     reader_targets: InsertableDict,
 ) -> None:
-    bundle = reqs["external_files"]
+    bundle_files = reqs["external_files"].files
 
     # create the corrector
     import correctionlib
-    correctionlib.highlevel.Correction.__call__ = correctionlib.highlevel.Correction.evaluate
-    correction_set = correctionlib.CorrectionSet.from_string(
-        self.get_trigger_file(bundle.files),
-    )
-    corrector_name, self.year = self.get_trigger_config()
-    self.trigger_sf_corrector = correction_set[corrector_name]
+    self.correction_sets = {}
+    for key, sf_config in self.trigger_sf_config.items():
+        target = bundle_files[key]
+        correction_set = correctionlib.CorrectionSet.from_string(target.load(formatter="json"))
+        self.correction_sets[key] = correction_set
 
-    # check versions
-    if self.supported_versions and self.trigger_sf_corrector.version not in self.supported_versions:
-        raise Exception(f"unsuppprted trigger sf corrector version {self.trigger_sf_corrector.version}")
 
-
-@trigger_weights.init
-def trigger_weights_init(self: Producer, **kwargs) -> None:
+@dl_trigger_weights.init
+def dl_trigger_weights_init(self: Producer, **kwargs) -> None:
     weight_name = self.weight_name
     self.produces |= {weight_name, f"{weight_name}_up", f"{weight_name}_down"}
+
+    for key, sf_config in self.trigger_sf_config.items():
+        self.uses.add(sf_config["category"])
+
+
+muon_trigger_weights = muon_weights.derive("muon_trigger_weights", cls_dict={
+    "weight_name": "muon_trigger_weight",
+    "get_muon_config": (lambda self: MuonSFConfig.new(self.config_inst.x.muon_trigger_sf_names)),
+})
+
+
+@producer(
+    uses={muon_trigger_weights},
+)
+def sl_trigger_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
+    """
+    Producer that calculates the single lepton trigger weights.
+    NOTE: this only includes the trigger weights from the muon channel. They should be combined with
+    the electron trigger weights in this producer.
+    """
+    if not self.config_inst.has_aux("muon_trigger_sf_names"):
+        raise Exception(f"In {sl_trigger_weights.__name__}: missing 'muon_trigger_sf_names' in config")
+
+    # compute muon trigger SF weights (NOTE: trigger SFs are only defined for muons with
+    # pt > 26 GeV, so create a copy of the events array with with all muon pt < 26 GeV set to 26 GeV)
+    trigger_sf_events = set_ak_column_f32(events, "Muon.pt", ak.where(events.Muon.pt > 26., events.Muon.pt, 26.))
+    trigger_sf_events = self[muon_trigger_weights](trigger_sf_events, **kwargs)
+    for route in self[muon_trigger_weights].produced_columns:
+        events = set_ak_column_f32(events, route, route.apply(trigger_sf_events))
+    # memory cleanup
+    del trigger_sf_events
+
+    return events
diff --git a/hbw/production/weights.py b/hbw/production/weights.py
index 28e9e6f..6e297f2 100644
--- a/hbw/production/weights.py
+++ b/hbw/production/weights.py
@@ -27,6 +27,7 @@
 from hbw.production.normalized_weights import normalized_weight_factory
 from hbw.production.normalized_btag import normalized_btag_weights
 from hbw.production.dataset_normalization import dataset_normalization_weight
+from hbw.production.trigger import sl_trigger_weights, dl_trigger_weights
 from hbw.util import has_tag, IF_DY, IF_TOP
 
 
@@ -129,10 +130,7 @@ def event_weights_to_normalize_init(self) -> None:
     "weight_name": "muon_iso_weight",
     "get_muon_config": (lambda self: MuonSFConfig.new(self.config_inst.x.muon_id_sf_names)),
 })
-muon_trigger_weights = muon_weights.derive("muon_trigger_weights", cls_dict={
-    "weight_name": "muon_trigger_weight",
-    "get_muon_config": (lambda self: MuonSFConfig.new(self.config_inst.x.muon_trigger_sf_names)),
-})
+
 
 
 @producer(
@@ -151,43 +149,6 @@ def muon_id_iso_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
     return events
 
 
-@producer(
-    uses={muon_trigger_weights},
-)
-def sl_trigger_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
-    """
-    Producer that calculates the single lepton trigger weights.
-    """
-    if not self.config_inst.has_aux("muon_trigger_sf_names"):
-        raise Exception(f"In {sl_trigger_weights.__name__}: missing 'muon_trigger_sf_names' in config")
-
-    # compute muon trigger SF weights (NOTE: trigger SFs are only defined for muons with
-    # pt > 26 GeV, so create a copy of the events array with with all muon pt < 26 GeV set to 26 GeV)
-    trigger_sf_events = set_ak_column_f32(events, "Muon.pt", ak.where(events.Muon.pt > 26., events.Muon.pt, 26.))
-    trigger_sf_events = self[muon_trigger_weights](trigger_sf_events, **kwargs)
-    for route in self[muon_trigger_weights].produced_columns:
-        events = set_ak_column_f32(events, route, route.apply(trigger_sf_events))
-    # memory cleanup
-    del trigger_sf_events
-
-    return events
-
-
-def sl_trigger_weights_skip_func(self: Producer) -> bool:
-    if not getattr(self, "config_inst", None) or not getattr(self, "dataset_inst", None):
-        # do not skip when config or dataset is not set
-        return False
-
-    if self.config_inst.x.lepton_tag == "sl":
-        # do not skip when lepton tag is single lepton
-        return False
-    else:
-        return True
-
-
-sl_trigger_weights.skip_func = sl_trigger_weights_skip_func
-
-
 @producer(
     uses={
         normalization_weights,
@@ -266,7 +227,7 @@ def combined_normalization_weights_init(self: Producer) -> None:
         normalized_pu_weights,
     },
     mc_only=True,
-    version=law.config.get_expanded("analysis", "event_weights_version", 1),
+    version=law.config.get_expanded("analysis", "event_weights_version", 2),
 )
 def event_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
     """
@@ -295,9 +256,8 @@ def event_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
     if not has_tag("skip_muon_weights", self.config_inst, self.dataset_inst, operator=any):
         events = self[muon_id_iso_weights](events, **kwargs)
 
-        if self.config_inst.x.lepton_tag == "sl":
-            # compute single lepton trigger SF weights
-            events = self[sl_trigger_weights](events, **kwargs)
+    if not has_tag("skip_trigger_weights", self.config_inst, self.dataset_inst, operator=any):
+        events = self[self.trigger_weights_producer](events, **kwargs)
 
     # normalize event weights using stats
     events = self[normalized_pu_weights](events, **kwargs)
@@ -324,8 +284,13 @@ def event_weights_init(self: Producer) -> None:
         self.uses |= {muon_id_iso_weights}
         self.produces |= {muon_id_iso_weights}
 
-        self.uses |= {sl_trigger_weights}
-        self.produces |= {sl_trigger_weights}
+    if not has_tag("skip_trigger_weights", self.config_inst, self.dataset_inst, operator=any):
+        self.trigger_weights_producer = (
+            sl_trigger_weights if self.config_inst.x.lepton_tag == "sl"
+            else dl_trigger_weights
+        )
+        self.uses |= {self.trigger_weights_producer}
+        self.produces |= {self.trigger_weights_producer}
 
     if not has_tag("skip_btag_weights", self.config_inst, self.dataset_inst, operator=any):
         self.uses |= {btag_weights, normalized_btag_weights}
@@ -358,3 +323,7 @@ def large_weights_killer(self: Producer, events: ak.Array, **kwargs) -> ak.Array
     events = set_ak_column(events, "mc_weight", ak.where(weight_too_large, 0, events.mc_weight))
 
     return events
+
+
+# for testing
+test_event_weights = event_weights.derive("test_event_weights")
diff --git a/hbw/weight/default.py b/hbw/weight/default.py
index ef5db3c..01a1097 100644
--- a/hbw/weight/default.py
+++ b/hbw/weight/default.py
@@ -191,6 +191,14 @@ def base_init(self: WeightProducer) -> None:
     "vjets_weight": [],  # TODO: corrections/shift missing
     "stitched_normalization_weight": [],
 }})
+with_trigger_weight = default_weight_producer.derive("with_trigger_weight", cls_dict={"weight_columns": {
+    **default_correction_weights,
+    "vjets_weight": [],  # TODO: corrections/shift missing
+    "trigger_weight": [],  # TODO: corrections/shift missing
+    "stitched_normalization_weight": [],
+}})
+
+
 base.derive("unstitched", cls_dict={"weight_columns": {
     **default_correction_weights, "normalization_weight": [],
 }})

From 99847e705435efc60845e50ce0bfcb6c380d1f7f Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Wed, 15 Jan 2025 17:26:13 +0100
Subject: [PATCH 25/29] update applied jetId

---
 hbw/selection/jet.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hbw/selection/jet.py b/hbw/selection/jet.py
index 9141b92..83df528 100644
--- a/hbw/selection/jet.py
+++ b/hbw/selection/jet.py
@@ -55,7 +55,7 @@ def jet_selection(
     jet_mask_loose = (
         (events.Jet.pt >= self.jet_pt) &
         (abs(events.Jet.eta) <= 2.4) &
-        (events.Jet.jetId >= 2)  # 1: loose, 2: tight, 4: isolated, 6: tight+isolated
+        (events.Jet.jetId >= 6)  # 1: loose, 2: tight, 4: isolated, 6: tight+isolated
     )
 
     electron = events.Electron[lepton_results.objects.Electron.LooseElectron]
@@ -64,7 +64,7 @@ def jet_selection(
     jet_mask = (
         (events.Jet.pt >= self.jet_pt) &
         (abs(events.Jet.eta) <= 2.4) &
-        (events.Jet.jetId >= 2) &  # 1: loose, 2: tight, 4: isolated, 6: tight+isolated
+        (events.Jet.jetId >= 6) &  # 1: loose, 2: tight, 4: isolated, 6: tight+isolated
         # ak.all(events.Jet.metric_table(lepton_results.x.lepton) > 0.4, axis=2)
         ak.all(events.Jet.metric_table(electron) > 0.4, axis=2) &
         ak.all(events.Jet.metric_table(muon) > 0.4, axis=2)

From 2a0a7bb73313fbed05eaad95a8e2d001cd58a388 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Wed, 15 Jan 2025 17:30:53 +0100
Subject: [PATCH 26/29] minor configuration

---
 hbw/config/datasets.py            |  5 +++--
 hbw/config/defaults_and_groups.py |  4 +++-
 hbw/config/variables.py           | 15 +++++++++------
 hbw/production/weights.py         |  1 -
 4 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/hbw/config/datasets.py b/hbw/config/datasets.py
index a4041f1..8acdbfd 100644
--- a/hbw/config/datasets.py
+++ b/hbw/config/datasets.py
@@ -152,8 +152,9 @@ def hbw_dataset_names(config: od.Config, as_list: bool = False) -> DotDict[str:
                 "wmh_hzg_zll_powheg",
                 "tth_hbb_powheg",
                 "tth_hnonbb_powheg",  # overlap with other samples, so be careful
-                "ttzh_madgraph",
-                "ttwh_madgraph",
+                # TODO: no cross sections setup for these samples
+                # "ttzh_madgraph",
+                # "ttwh_madgraph",
             ]),
         ],
         "hh_ggf_hbb_hvv": [
diff --git a/hbw/config/defaults_and_groups.py b/hbw/config/defaults_and_groups.py
index bb59762..fc16a50 100644
--- a/hbw/config/defaults_and_groups.py
+++ b/hbw/config/defaults_and_groups.py
@@ -225,7 +225,9 @@ def set_config_defaults_and_groups(config_inst):
         "sl_much_boosted": ["sr__1mu__boosted"],
         "sl_ech_boosted": ["sr__1e__boosted"],
         "dl": ["sr", "dycr", "ttcr", "sr__1b", "sr__2b", "dycr__1b", "dycr__2b", "ttcr__1b", "ttcr__2b"],
-        "dl_preml": bracket_expansion(["incl", "{sr,ttcr,dycr}{,__2e,__2mu,__emu}{,__1b,__2b}"]),
+        "dl_preml_small": bracket_expansion(["incl", "{sr,ttcr,dycr}{,__2e,__2mu,__emu}__resolved{,__1b,__2b}"]),
+        "dl_preml_large": bracket_expansion(["incl", "{,sr__,ttcr__,dycr__}{,2e__,2mu__,emu__}resolved{,__1b,__2b}"]),
+        "dl_preml_boosted": bracket_expansion(["{,sr__,ttcr__,dycr__}{,2e__,2mu__,emu__}boosted"]),
         "dl_ttcr": ["ttcr", "ttcr__1b", "ttcr__2b", "ttcr__2e", "ttcr__2mu", "ttcr__emu"],
         "dl_dycr": ["dycr", "dycr__1b", "dycr__2b", "dycr__2e", "dycr__2mu", "dycr__emu"],
         "dl_sr": ["sr", "sr__1b", "sr__2b", "sr__2e", "sr__2mu", "sr__emu"],
diff --git a/hbw/config/variables.py b/hbw/config/variables.py
index c1f6fef..ad0cf22 100644
--- a/hbw/config/variables.py
+++ b/hbw/config/variables.py
@@ -560,9 +560,12 @@ def add_variables(config: od.Config) -> None:
 
     # Leptons
     for i in range(2):
+        # NOTE: inputs aux is only being used when the expression is a function and not a string;
+        # to define expression as a function, define as lambda function with passing i=i to avoid
+        # the late binding issue
         config.add_variable(
             name=f"lepton{i}_pt",
-            expression=f"Lepton[:, {i}].pt",
+            expression=lambda events, i=i: events.Lepton[:, i].pt,
             aux=dict(
                 inputs={"{Electron,Muon}.{pt,eta,phi,mass}"},
             ),
@@ -573,29 +576,29 @@ def add_variables(config: od.Config) -> None:
         )
         config.add_variable(
             name=f"lepton{i}_eta",
-            expression=f"Lepton[:, {i}].eta",
+            expression=lambda events, i=i: events.Lepton[:, i].eta,
             aux=dict(
                 inputs={"{Electron,Muon}.{pt,eta,phi,mass}"},
             ),
             binning=(40, -3.2, 3.2),
             unit="GeV",
             null_value=EMPTY_FLOAT,
-            x_title=f"Lepton {i} $\eta$",
+            x_title=f"Lepton {i} $\\eta$",
         )
         config.add_variable(
             name=f"lepton{i}_phi",
-            expression=f"Lepton[:, {i}].phi",
+            expression=lambda events, i=i: events.Lepton[:, i].phi,
             aux=dict(
                 inputs={"{Electron,Muon}.{pt,eta,phi,mass}"},
             ),
             binning=(50, -2.5, 2.5),
             unit="GeV",
             null_value=EMPTY_FLOAT,
-            x_title=f"Lepton {i} $\phi$",
+            x_title=f"Lepton {i} $\\phi$",
         )
         config.add_variable(
             name=f"lepton{i}_mass",
-            expression=f"Lepton[:, {i}].mass",
+            expression=lambda events, i=i: events.Lepton[:, i].mass,
             aux=dict(
                 inputs={"{Electron,Muon}.{pt,eta,phi,mass}"},
             ),
diff --git a/hbw/production/weights.py b/hbw/production/weights.py
index 6e297f2..fd431f3 100644
--- a/hbw/production/weights.py
+++ b/hbw/production/weights.py
@@ -132,7 +132,6 @@ def event_weights_to_normalize_init(self) -> None:
 })
 
 
-
 @producer(
     uses={muon_id_weights, muon_iso_weights},
     produces={muon_id_weights, muon_iso_weights},

From 8c4f8f5672dcb2b3af5fc66d4f2c288aeb1000c1 Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 17 Jan 2025 08:44:26 +0100
Subject: [PATCH 27/29] update columnflow + fixes

---
 hbw/production/dataset_normalization.py |  2 -
 hbw/production/normalized_btag.py       |  2 -
 hbw/production/normalized_weights.py    |  2 -
 hbw/production/process_ids.py           |  2 +-
 hbw/tasks/corrections.py                |  2 -
 hbw/tasks/inspection.py                 |  2 -
 hbw/tasks/resolve_dummy.py              | 59 +++++++++++++++++++++++++
 modules/columnflow                      |  2 +-
 8 files changed, 61 insertions(+), 12 deletions(-)
 create mode 100644 hbw/tasks/resolve_dummy.py

diff --git a/hbw/production/dataset_normalization.py b/hbw/production/dataset_normalization.py
index eb92353..cb69dac 100644
--- a/hbw/production/dataset_normalization.py
+++ b/hbw/production/dataset_normalization.py
@@ -50,9 +50,7 @@ def dataset_normalization_weight_requires(self: Producer, reqs: dict) -> None:
     from columnflow.tasks.selection import MergeSelectionStats
     reqs["selection_stats"] = MergeSelectionStats.req(
         self.task,
-        tree_index=0,
         branch=-1,
-        _exclude=MergeSelectionStats.exclude_params_forest_merge,
     )
 
 
diff --git a/hbw/production/normalized_btag.py b/hbw/production/normalized_btag.py
index 7c6bbad..6b3903f 100644
--- a/hbw/production/normalized_btag.py
+++ b/hbw/production/normalized_btag.py
@@ -151,9 +151,7 @@ def normalized_btag_weights_from_json_requires(self: Producer, reqs: dict) -> No
     from columnflow.tasks.selection import MergeSelectionStats
     reqs["selection_stats"] = MergeSelectionStats.req(
         self.task,
-        tree_index=0,
         branch=-1,
-        _exclude=MergeSelectionStats.exclude_params_forest_merge,
     )
 
 
diff --git a/hbw/production/normalized_weights.py b/hbw/production/normalized_weights.py
index 324668e..074fc3b 100644
--- a/hbw/production/normalized_weights.py
+++ b/hbw/production/normalized_weights.py
@@ -89,9 +89,7 @@ def normalized_weight_requires(self: Producer, reqs: dict) -> None:
         from columnflow.tasks.selection import MergeSelectionStats
         reqs["selection_stats"] = MergeSelectionStats.req(
             self.task,
-            tree_index=0,
             branch=-1,
-            _exclude=MergeSelectionStats.exclude_params_forest_merge,
         )
 
     @normalized_weight.setup
diff --git a/hbw/production/process_ids.py b/hbw/production/process_ids.py
index 25f78b8..c326bfb 100644
--- a/hbw/production/process_ids.py
+++ b/hbw/production/process_ids.py
@@ -213,7 +213,7 @@ def dy_nlo_process_producer(self: Producer, events: ak.Array, **kwargs) -> ak.Ar
     """
     n_partons = events.LHE.NpNLO
 
-    genjet_mask = (events.GenJet.pt >= 20) & (abs(events.GenJet.eta) < 2.4)
+    genjet_mask = (events.GenJet["pt"] >= 20) & (abs(events.GenJet["eta"]) < 2.4)
     genjet = (events.GenJet[genjet_mask])
     hf_genjet_mask = (genjet.hadronFlavour == 4) | (genjet.hadronFlavour == 5)
     is_hf = ak.any(hf_genjet_mask, axis=1)
diff --git a/hbw/tasks/corrections.py b/hbw/tasks/corrections.py
index 841da01..2ed4c5a 100644
--- a/hbw/tasks/corrections.py
+++ b/hbw/tasks/corrections.py
@@ -65,9 +65,7 @@ def requires(self):
             dataset.name: self.reqs.MergeSelectionStats.req(
                 self,
                 dataset=dataset.name,
-                tree_index=0,
                 branch=-1,
-                _exclude=self.reqs.MergeSelectionStats.exclude_params_forest_merge,
             )
             for dataset in self.dataset_insts
         }
diff --git a/hbw/tasks/inspection.py b/hbw/tasks/inspection.py
index be49ac7..df60235 100644
--- a/hbw/tasks/inspection.py
+++ b/hbw/tasks/inspection.py
@@ -76,9 +76,7 @@ def requires(self):
             reqs[dataset] = self.reqs.MergeSelectionStats.req(
                 self,
                 dataset=dataset,
-                tree_index=0,
                 branch=-1,
-                _exclude=self.reqs.MergeSelectionStats.exclude_params_forest_merge,
             )
         return reqs
 
diff --git a/hbw/tasks/resolve_dummy.py b/hbw/tasks/resolve_dummy.py
new file mode 100644
index 0000000..4e1788a
--- /dev/null
+++ b/hbw/tasks/resolve_dummy.py
@@ -0,0 +1,59 @@
+import law
+
+from columnflow.util import DotDict
+
+from columnflow.tasks.framework.base import MultiConfigTask
+from columnflow.tasks.framework.remote import RemoteWorkflow
+from columnflow.tasks.framework.mixins import (
+    CalibratorsMixin, SelectorStepsMixin, ProducersMixin, MLModelsMixin, WeightProducerMixin,
+    CategoriesMixin, HistHookMixin, MultiConfigDatasetsProcessesMixin,
+    # ShiftSourcesMixin,
+)
+from columnflow.tasks.framework.plotting import (
+    ProcessPlotSettingMixin, VariablePlotSettingMixin, PlotBase,
+)
+
+from hbw.util import timeit_multiple
+
+
+class ResolveDummy(
+    HistHookMixin,
+    VariablePlotSettingMixin,
+    ProcessPlotSettingMixin,
+    MultiConfigDatasetsProcessesMixin,
+    CategoriesMixin,
+    MLModelsMixin,
+    WeightProducerMixin,
+    ProducersMixin,
+    SelectorStepsMixin,
+    CalibratorsMixin,
+    MultiConfigTask,
+    law.LocalWorkflow,
+    RemoteWorkflow,
+):
+    plot_function = PlotBase.plot_function.copy(
+        default="columnflow.plotting.plot_functions_2d.plot_2d",
+        add_default_to_description=True,
+    )
+
+    def create_branch_map(self):
+        return [
+            DotDict({"category": cat_name, "variable": var_name})
+            for cat_name in sorted(self.categories)
+            for var_name in sorted(self.variables)
+        ]
+
+    @classmethod
+    @timeit_multiple
+    def resolve_param_values(cls, params):
+        params = super().resolve_param_values(params)
+        return params
+
+    def run(self):
+        pass
+
+    def output(self):
+        output = {
+            "always_incomplete_dummy": self.target("dummy.txt"),
+        }
+        return output
diff --git a/modules/columnflow b/modules/columnflow
index 608ef91..cb362cf 160000
--- a/modules/columnflow
+++ b/modules/columnflow
@@ -1 +1 @@
-Subproject commit 608ef912fa7ba33a82fa1e0af868380030fb3e75
+Subproject commit cb362cf8b00b074847de4ff270836599230b9fbd

From c8bd74f47a349ea42dba1feb175e6bfcdc5cf80f Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 17 Jan 2025 10:51:03 +0100
Subject: [PATCH 28/29] cleanup variables

---
 hbw/config/defaults_and_groups.py |  2 +-
 hbw/config/variables.py           | 21 ++++++++++-----------
 hbw/selection/common.py           | 10 ++++++++++
 hbw/selection/dl_remastered.py    |  6 +++++-
 4 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/hbw/config/defaults_and_groups.py b/hbw/config/defaults_and_groups.py
index fc16a50..87d7cf1 100644
--- a/hbw/config/defaults_and_groups.py
+++ b/hbw/config/defaults_and_groups.py
@@ -310,7 +310,7 @@ def set_config_defaults_and_groups(config_inst):
             "met_{pt,phi}",
             "jet{0,1,2,3}_{pt,eta,phi,mass,btagpnetb}",
             "bjet{0,1}_{pt,eta,phi,mass,btagpnetb}",
-            "ht", "lt", "mll", "ptll",
+            "ht", "lt", "mll", "ptll", "npvs",
         ]),
         "dl_resolved": ["n_*", "electron_*", "muon_*", "met_*", "jet*", "bjet*", "ht", "lt", "mll", "ptll"],
         "dl_boosted": ["n_*", "electron_*", "muon_*", "met_*", "fatjet_*", "lt", "mll", "ptll"],
diff --git a/hbw/config/variables.py b/hbw/config/variables.py
index ad0cf22..98d42e8 100644
--- a/hbw/config/variables.py
+++ b/hbw/config/variables.py
@@ -191,7 +191,7 @@ def add_variables(config: od.Config) -> None:
         name="high_jet_pt_strcat",
         # NOTE: for some reason passing the string directly produces ValueError due to different shapes, e.g.
         # ValueError: cannot broadcast RegularArray of size 7 with RegularArray of size 264
-        expression=lambda events: ak.where(events.Jet.pt > 50, ["high_pt"], ["low_pt"]),
+        expression=lambda events: ak.where(events.Jet["pt"] > 50, ["high_pt"], ["low_pt"]),
         aux={
             "inputs": {"Jet.pt"},
             "axis_type": "strcat",
@@ -203,7 +203,7 @@ def add_variables(config: od.Config) -> None:
     # h[{"high_jet_pt_intcat": hist.loc(0)}] picks the bin with value 0
     config.add_variable(
         name="high_jet_pt_intcat",
-        expression=lambda events: ak.where(events.Jet.pt > 50, 1, 0),
+        expression=lambda events: ak.where(events.Jet["pt"] > 50, 1, 0),
         aux={
             "inputs": {"Jet.pt"},
             "axis_type": "intcat",
@@ -212,7 +212,7 @@ def add_variables(config: od.Config) -> None:
     )
     config.add_variable(
         name="high_jet_pt_bool",
-        expression=lambda events: events.Jet.pt > 50,
+        expression=lambda events: events.Jet["pt"] > 50,
         aux={
             "inputs": {"Jet.pt"},
             "axis_type": "bool",
@@ -258,19 +258,18 @@ def add_variables(config: od.Config) -> None:
 
     config.add_variable(
         name="n_jet",
-        expression=lambda events: ak.num(events.Jet.pt, axis=1),
+        expression=lambda events: ak.num(events.Jet["pt"], axis=1),
         aux={"inputs": {"Jet.pt"}},
         binning=(12, -0.5, 11.5),
         x_title="Number of jets",
         discrete_x=True,
     )
-
     if config.x.run == 2:
         deepjet_wps = config.x.btag_working_points.deepjet
         config.add_variable(
             name="n_deepjet_loose",
             expression=lambda events: ak.sum(events.Jet.btagDeepFlavB > deepjet_wps.loose, axis=1),
-            aux={"inputs": {"Jet.pt", "Jet.btagDeepFlavB"}},
+            aux={"inputs": {"Jet.btagDeepFlavB"}},
             binning=(7, -0.5, 6.5),
             x_title="Number of deepjets (loose WP)",
             discrete_x=True,
@@ -278,7 +277,7 @@ def add_variables(config: od.Config) -> None:
         config.add_variable(
             name="n_deepjet_medium",
             expression=lambda events: ak.sum(events.Jet.btagDeepFlavB > deepjet_wps.medium, axis=1),
-            aux={"inputs": {"Jet.pt", "Jet.btagDeepFlavB"}},
+            aux={"inputs": {"Jet.btagDeepFlavB"}},
             binning=(7, -0.5, 6.5),
             x_title="Number of deepjets (medium WP)",
             discrete_x=True,
@@ -286,7 +285,7 @@ def add_variables(config: od.Config) -> None:
         config.add_variable(
             name="n_deepjet_tight",
             expression=lambda events: ak.sum(events.Jet.btagDeepFlavB > deepjet_wps.tight, axis=1),
-            aux={"inputs": {"Jet.pt", "Jet.btagDeepFlavB"}},
+            aux={"inputs": {"Jet.btagDeepFlavB"}},
             binning=(7, -0.5, 6.5),
             x_title="Number of deepjets (tight WP)",
             discrete_x=True,
@@ -296,7 +295,7 @@ def add_variables(config: od.Config) -> None:
         config.add_variable(
             name="n_particlenet_loose",
             expression=lambda events: ak.sum(events.Jet.btagPNetB > particlenet_wps.loose, axis=1),
-            aux={"inputs": {"Jet.pt", "Jet.btagPNetB"}},
+            aux={"inputs": {"Jet.btagPNetB"}},
             binning=(7, -0.5, 6.5),
             x_title="Number of pnet jets (loose WP)",
             discrete_x=True,
@@ -304,7 +303,7 @@ def add_variables(config: od.Config) -> None:
         config.add_variable(
             name="n_particlenet_medium",
             expression=lambda events: ak.sum(events.Jet.btagPNetB > particlenet_wps.medium, axis=1),
-            aux={"inputs": {"Jet.pt", "Jet.btagPNetB"}},
+            aux={"inputs": {"Jet.btagPNetB"}},
             binning=(7, -0.5, 6.5),
             x_title="Number of pnet jets (medium WP)",
             discrete_x=True,
@@ -312,7 +311,7 @@ def add_variables(config: od.Config) -> None:
         config.add_variable(
             name="n_particlenet_tight",
             expression=lambda events: ak.sum(events.Jet.btagPNetB > particlenet_wps.tight, axis=1),
-            aux={"inputs": {"Jet.pt", "Jet.btagPNetB"}},
+            aux={"inputs": {"Jet.btagPNetB"}},
             binning=(7, -0.5, 6.5),
             x_title="Number of pnet jets (tight WP)",
             discrete_x=True,
diff --git a/hbw/selection/common.py b/hbw/selection/common.py
index abf90ea..af91ee4 100644
--- a/hbw/selection/common.py
+++ b/hbw/selection/common.py
@@ -312,3 +312,13 @@ def configure_selector(self: Selector):
     self.config_inst.x.btag_wp_score = (
         self.config_inst.x.btag_working_points[self.config_inst.x.b_tagger][self.config_inst.x.btag_wp]
     )
+
+    btag_column = self.config_inst.x.btag_column
+    self.config_inst.add_variable(
+        name="n_btag",
+        expression=lambda events: ak.num(events.Jet[btag_column] > self.config_inst.x.btag_wp_score, axis=1),
+        aux={"inputs": {f"Jet.{btag_column}"}},
+        binning=(7, -0.5, 6.5),
+        x_title=f"Number of b-tagged jets ({btag_column})",
+        discrete_x=True,
+    )
diff --git a/hbw/selection/dl_remastered.py b/hbw/selection/dl_remastered.py
index 486038d..d3412d2 100644
--- a/hbw/selection/dl_remastered.py
+++ b/hbw/selection/dl_remastered.py
@@ -8,6 +8,8 @@
 
 from collections import defaultdict
 
+import law
+
 from cmsdb.constants import m_z
 
 from columnflow.util import maybe_import, DotDict
@@ -211,7 +213,7 @@ def dl_lepton_selection_init(self: Selector) -> None:
     b_tagger=None,
     btag_wp=None,
     n_btag=None,
-    version=1,
+    version=law.config.get_expanded("analysis", "dl1_version", 2),
 )
 @timeit
 def dl1(
@@ -294,6 +296,8 @@ def dl1_init(self: Selector) -> None:
     # by only adding the used selectors in the init
     configure_selector(self)
 
+    # NOTE: since we add these uses so late, init's of these Producers will not run
+    # e.g. during Plotting tasks
     self.uses = {
         pre_selection,
         vbf_jet_selection, dl_boosted_jet_selection,

From 8399405f7f0452bf65d627bc5d4094ad6301cb7b Mon Sep 17 00:00:00 2001
From: Mathis Frahm <mathisfrahm@gmx.de>
Date: Fri, 17 Jan 2025 11:32:44 +0100
Subject: [PATCH 29/29] rename IF_DY deferred column

---
 hbw/production/weights.py | 6 +++---
 hbw/util.py               | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/hbw/production/weights.py b/hbw/production/weights.py
index fd431f3..8b6f4d4 100644
--- a/hbw/production/weights.py
+++ b/hbw/production/weights.py
@@ -28,7 +28,7 @@
 from hbw.production.normalized_btag import normalized_btag_weights
 from hbw.production.dataset_normalization import dataset_normalization_weight
 from hbw.production.trigger import sl_trigger_weights, dl_trigger_weights
-from hbw.util import has_tag, IF_DY, IF_TOP
+from hbw.util import has_tag, IF_VJETS, IF_TOP
 
 
 np = maybe_import("numpy")
@@ -40,8 +40,8 @@
 
 
 @producer(
-    uses={IF_TOP(gen_parton_top), IF_DY(gen_v_boson), pu_weight},
-    produces={IF_TOP(gen_parton_top), IF_DY(gen_v_boson), pu_weight},
+    uses={IF_TOP(gen_parton_top), IF_VJETS(gen_v_boson), pu_weight},
+    produces={IF_TOP(gen_parton_top), IF_VJETS(gen_v_boson), pu_weight},
     mc_only=True,
 )
 def event_weights_to_normalize(self: Producer, events: ak.Array, results: SelectionResult, **kwargs) -> ak.Array:
diff --git a/hbw/util.py b/hbw/util.py
index 09ee8f6..598f3ec 100644
--- a/hbw/util.py
+++ b/hbw/util.py
@@ -666,7 +666,7 @@ def IF_MC(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[
 
 
 @deferred_column
-def IF_DY(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]:
+def IF_VJETS(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]:
     if getattr(func, "dataset_inst", None) is None:
         return self.get()