Adjustments for 2023 down to produce columns.

uhh-cms · Dec 13, 2024 · d4c9222 · d4c9222
1 parent a952685
commit d4c9222
Show file tree

Hide file tree

Showing 7 changed files with 96 additions and 55 deletions.
diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py
@@ -712,7 +712,7 @@ def if_era(
         elif year == 2023:
             cmpgn = "2023PromptC" if campaign.has_tag("preBPix") else "2023PromptD"
         cfg.x.electron_sf_names = ElectronSFConfig(
-            correction="UL-Electron-ID-SF",
+            correction="Electron-ID-SF",
             campaign=cmpgn,
             working_point="wp80iso",
         )

diff --git a/hbt/production/btag.py b/hbt/production/btag.py
@@ -23,27 +23,28 @@
 
 # custom btag weight producer for deepjet and pnet configs
 btag_weights_deepjet = btag_weights.derive("btag_weights_deepjet", cls_dict={
+    "weight_name": "btag_weight_deepjet",
+    "tagger_name": "deepjet",
     "get_btag_config": (lambda self: self.config_inst.x.btag_sf_deepjet),
 })
 btag_weights_pnet = btag_weights.derive("btag_weights_pnet", cls_dict={
+    "weight_name": "btag_weight_pnet",
+    "tagger_name": "pnet",
     "get_btag_config": (lambda self: self.config_inst.x.btag_sf_pnet),
 })
 
 
 @producer(
-    uses={
-        btag_weights.PRODUCES,
-        # custom columns created upstream, probably by a producer
-        "process_id",
-        # nano columns
-        "Jet.pt",
-    },
+    uses={"process_id", "Jet.pt"},
     # only run on mc
     mc_only=True,
+    # configurable weight producer class
+    btag_weights_cls=None,
 )
-def normalized_btag_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
-    for weight_name in self[btag_weights].produces:
-        if not weight_name.startswith("btag_weight"):
+def _normalized_btag_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
+    for route in self[self.btag_weights_cls].produced_columns:
+        weight_name = str(route)
+        if not weight_name.startswith(self.weight_name):
             continue
 
         # create a weight vectors starting with ones for both weight variations, i.e.,
@@ -71,20 +72,26 @@ def normalized_btag_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Ar
     return events
 
 
-@normalized_btag_weights.init
-def normalized_btag_weights_init(self: Producer) -> None:
+@_normalized_btag_weights.init
+def _normalized_btag_weights_init(self: Producer) -> None:
+    assert self.btag_weights_cls, "btag_weights_cls must be set"
+
     if not getattr(self, "dataset_inst", None):
         return
 
-    for weight_name in self[btag_weights].produces:
-        if not weight_name.startswith("btag_weight"):
-            continue
+    # reuse the weight and tagger names
+    self.weight_name = self.btag_weights_cls.weight_name
+    self.tagger_name = self.btag_weights_cls.tagger_name
 
-        self.produces |= {f"normalized_{weight_name}", f"normalized_njet_{weight_name}"}
+    # add produced columns
+    for route in self[self.btag_weights_cls].produced_columns:
+        name = str(route)
+        if name.startswith(self.weight_name):
+            self.produces.add(f"normalized_{{,njet_}}{name}")
 
 
-@normalized_btag_weights.requires
-def normalized_btag_weights_requires(self: Producer, reqs: dict) -> None:
+@_normalized_btag_weights.requires
+def _normalized_btag_weights_requires(self: Producer, reqs: dict) -> None:
     from columnflow.tasks.selection import MergeSelectionStats
     reqs["selection_stats"] = MergeSelectionStats.req(
         self.task,
@@ -94,37 +101,37 @@ def normalized_btag_weights_requires(self: Producer, reqs: dict) -> None:
     )
 
 
-@normalized_btag_weights.setup
-def normalized_btag_weights_setup(self: Producer, reqs: dict, inputs: dict, reader_targets: InsertableDict) -> None:
+@_normalized_btag_weights.setup
+def _normalized_btag_weights_setup(self: Producer, reqs: dict, inputs: dict, reader_targets: InsertableDict) -> None:
     # load the selection stats
     selection_stats = self.task.cached_value(
         key="selection_stats",
         func=lambda: inputs["selection_stats"]["collection"][0]["stats"].load(formatter="json"),
     )
 
     # get the unique process ids in that dataset
-    key = "sum_mc_weight_selected_nobjet_per_process_and_njet"
+    key = f"sum_mc_weight_selected_nobjet_{self.tagger_name}_per_process_and_njet"
     self.unique_process_ids = list(map(int, selection_stats[key].keys()))
 
     # get the maximum numbers of jets
     max_n_jets = max(map(int, sum((list(d.keys()) for d in selection_stats[key].values()), [])))
 
     # helper to get numerators and denominators
     def numerator_per_pid(pid):
-        key = "sum_mc_weight_selected_nobjet_per_process"
+        key = f"sum_mc_weight_selected_nobjet_{self.tagger_name}_per_process"
         return selection_stats[key].get(str(pid), 0.0)
 
     def denominator_per_pid(weight_name, pid):
-        key = f"sum_mc_weight_{weight_name}_selected_nobjet_per_process"
+        key = f"sum_mc_weight_{weight_name}_selected_nobjet_{self.tagger_name}_per_process"
         return selection_stats[key].get(str(pid), 0.0)
 
     def numerator_per_pid_njet(pid, n_jets):
-        key = "sum_mc_weight_selected_nobjet_per_process_and_njet"
+        key = f"sum_mc_weight_selected_nobjet_{self.tagger_name}_per_process_and_njet"
         d = selection_stats[key].get(str(pid), {})
         return d.get(str(n_jets), 0.0)
 
     def denominator_per_pid_njet(weight_name, pid, n_jets):
-        key = f"sum_mc_weight_{weight_name}_selected_nobjet_per_process_and_njet"
+        key = f"sum_mc_weight_{weight_name}_selected_nobjet_{self.tagger_name}_per_process_and_njet"
         d = selection_stats[key].get(str(pid), {})
         return d.get(str(n_jets), 0.0)
 
@@ -134,8 +141,8 @@ def denominator_per_pid_njet(weight_name, pid, n_jets):
             pid: safe_div(numerator_per_pid(pid), denominator_per_pid(weight_name, pid))
             for pid in self.unique_process_ids
         }
-        for weight_name in self[btag_weights].produces
-        if weight_name.startswith("btag_weight")
+        for weight_name in (str(route) for route in self[self.btag_weights_cls].produced_columns)
+        if weight_name.startswith(self.btag_weights_cls.weight_name)
     }
 
     # extract the ratio per weight, pid and also the jet multiplicity, using the latter as in index
@@ -148,6 +155,17 @@ def denominator_per_pid_njet(weight_name, pid, n_jets):
             ])
             for pid in self.unique_process_ids
         }
-        for weight_name in self[btag_weights].produces
-        if weight_name.startswith("btag_weight")
+        for weight_name in (str(route) for route in self[self.btag_weights_cls].produced_columns)
+        if weight_name.startswith(self.btag_weights_cls.weight_name)
     }
+
+
+# derive for btaggers
+normalized_btag_weights_deepjet = _normalized_btag_weights.derive("normalized_btag_weights_deepjet", cls_dict={
+    "btag_weights_cls": btag_weights_deepjet,
+    "uses": _normalized_btag_weights.uses | {btag_weights_deepjet.PRODUCES},
+})
+normalized_btag_weights_pnet = _normalized_btag_weights.derive("normalized_btag_weights_pnet", cls_dict={
+    "btag_weights_cls": btag_weights_pnet,
+    "uses": _normalized_btag_weights.uses | {btag_weights_pnet.PRODUCES},
+})
diff --git a/hbt/production/default.py b/hbt/production/default.py
@@ -14,22 +14,24 @@
 from hbt.production.weights import (
     normalized_pu_weight, normalized_pdf_weight, normalized_murmuf_weight,
 )
-from hbt.production.btag import normalized_btag_weights
+from hbt.production.btag import normalized_btag_weights_deepjet, normalized_btag_weights_pnet
 from hbt.production.tau import tau_weights, trigger_weights
-from hbt.util import IF_DATASET_HAS_LHE_WEIGHTS
+from hbt.util import IF_DATASET_HAS_LHE_WEIGHTS, IF_RUN_3
 
 ak = maybe_import("awkward")
 
 
 @producer(
     uses={
         category_ids, stitched_normalization_weights, normalized_pu_weight,
-        normalized_btag_weights, tau_weights, electron_weights, muon_weights, trigger_weights,
+        normalized_btag_weights_deepjet, IF_RUN_3(normalized_btag_weights_pnet), tau_weights,
+        electron_weights, muon_weights, trigger_weights,
         IF_DATASET_HAS_LHE_WEIGHTS(normalized_pdf_weight, normalized_murmuf_weight),
     },
     produces={
         category_ids, stitched_normalization_weights, normalized_pu_weight,
-        normalized_btag_weights, tau_weights, electron_weights, muon_weights, trigger_weights,
+        normalized_btag_weights_deepjet, IF_RUN_3(normalized_btag_weights_pnet), tau_weights,
+        electron_weights, muon_weights, trigger_weights,
         IF_DATASET_HAS_LHE_WEIGHTS(normalized_pdf_weight, normalized_murmuf_weight),
     },
 )
@@ -54,7 +56,9 @@ def default(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
         events = self[normalized_pu_weight](events, **kwargs)
 
         # btag weights
-        events = self[normalized_btag_weights](events, **kwargs)
+        events = self[normalized_btag_weights_deepjet](events, **kwargs)
+        if self.has_dep(normalized_btag_weights_pnet):
+            events = self[normalized_btag_weights_pnet](events, **kwargs)
 
         # tau weights
         events = self[tau_weights](events, **kwargs)

diff --git a/hbt/production/weights.py b/hbt/production/weights.py
@@ -24,7 +24,7 @@
     mc_only=True,
 )
 def normalized_pu_weight(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
-    for route in self[pu_weight].produces:
+    for route in self[pu_weight].produced_columns:
         weight_name = str(route)
         if not weight_name.startswith("pu_weight"):
             continue

diff --git a/hbt/selection/default.py b/hbt/selection/default.py
@@ -137,10 +137,18 @@ def default(
     results.event = event_sel
 
     # combined event seleciton after all but the bjet step
-    event_sel_nob = results.steps.all_but_bjet = reduce(
-        and_,
-        [mask for step_name, mask in results.steps.items() if step_name != "bjet"],
-    )
+    tagger_name = btag_weights_deepjet.tagger_name
+    event_sel_nob_deepjet = results.steps[f"all_but_bjet_{tagger_name}"] = reduce(and_, [
+        mask for step_name, mask in results.steps.items()
+        if step_name != f"bjet_{tagger_name}"
+    ])
+    event_sel_nob_pnet = None
+    if self.has_dep(btag_weights_pnet):
+        tagger_name = btag_weights_pnet.tagger_name
+        event_sel_nob_pnet = results.steps[f"all_but_bjet_{tagger_name}"] = reduce(and_, [
+            mask for step_name, mask in results.steps.items()
+            if step_name != f"bjet_{tagger_name}"
+        ])
 
     # increment stats
     events, results = setup_and_increment_stats(
@@ -149,7 +157,8 @@ def default(
         results=results,
         stats=stats,
         event_sel=event_sel,
-        event_sel_nob=event_sel_nob,
+        event_sel_nob_deepjet=event_sel_nob_deepjet,
+        event_sel_nob_pnet=event_sel_nob_pnet,
         njets=results.x.n_central_jets,
     )
 
@@ -278,7 +287,8 @@ def empty_call(
         results=results,
         stats=stats,
         event_sel=results.event,
-        event_sel_nob=results.event,
+        event_sel_nob_deepjet=results.event,
+        event_sel_nob_pnet=results.event if self.has_dep(btag_weights_pnet) else None,
         njets=ak.num(events.Jet, axis=1),
     )
 
@@ -292,7 +302,8 @@ def setup_and_increment_stats(
     results: SelectionResult,
     stats: defaultdict,
     event_sel: np.ndarray | ak.Array,
-    event_sel_nob: np.ndarray | ak.Array | None = None,
+    event_sel_nob_deepjet: np.ndarray | ak.Array | None = None,
+    event_sel_nob_pnet: np.ndarray | ak.Array | None = None,
     njets: np.ndarray | ak.Array | None = None,
     **kwargs,
 ) -> tuple[ak.Array, SelectionResult]:
@@ -305,7 +316,8 @@ def setup_and_increment_stats(
     :param results: The current selection results.
     :param stats: The stats dictionary.
     :param event_sel: The general event selection mask.
-    :param event_sel_nob: The event selection mask without the bjet step.
+    :param event_sel_nob_deepjet: The event selection mask without the bjet step for deepjet.
+    :param event_sel_nob_pnet: The event selection mask without the bjet step for pnet.
     :param njets: The number of central jets.
     :return: The updated events and results objects in a tuple.
     """
@@ -314,17 +326,21 @@ def setup_and_increment_stats(
         "num_events": Ellipsis,
         "num_events_selected": event_sel,
     }
-    if event_sel_nob is not None:
-        weight_map["num_events_selected_nobjet"] = event_sel_nob
+    if event_sel_nob_deepjet is not None:
+        weight_map["num_events_selected_nobjet_deepjet"] = event_sel_nob_deepjet
+    if event_sel_nob_pnet is not None:
+        weight_map["num_events_selected_nobjet_pnet"] = event_sel_nob_pnet
     group_map = {}
     group_combinations = []
 
     # add mc info
     if self.dataset_inst.is_mc:
         weight_map["sum_mc_weight"] = events.mc_weight
         weight_map["sum_mc_weight_selected"] = (events.mc_weight, event_sel)
-        if event_sel_nob is not None:
-            weight_map["sum_mc_weight_selected_nobjet"] = (events.mc_weight, event_sel_nob)
+        if event_sel_nob_deepjet is not None:
+            weight_map["sum_mc_weight_selected_nobjet_deepjet"] = (events.mc_weight, event_sel_nob_deepjet)
+        if event_sel_nob_pnet is not None:
+            weight_map["sum_mc_weight_selected_nobjet_pnet"] = (events.mc_weight, event_sel_nob_pnet)
 
         # pu weights with variations
         for route in sorted(self[pu_weight].produced_columns):
@@ -349,14 +365,16 @@ def setup_and_increment_stats(
                 continue
             for route in sorted(self[prod].produced_columns):
                 name = str(route)
-                if not name.startswith("btag_weight"):
+                if not name.startswith(prod.weight_name):
                     continue
                 weight_map[f"sum_{name}"] = events[name]
                 weight_map[f"sum_{name}_selected"] = (events[name], event_sel)
-                if event_sel_nob is None:
-                    continue
-                weight_map[f"sum_{name}_selected_nobjet"] = (events[name], event_sel_nob)
-                weight_map[f"sum_mc_weight_{name}_selected_nobjet"] = (events.mc_weight * events[name], event_sel_nob)
+                if event_sel_nob_deepjet is not None:
+                    weight_map[f"sum_{name}_selected_nobjet_deepjet"] = (events[name], event_sel_nob_deepjet)
+                    weight_map[f"sum_mc_weight_{name}_selected_nobjet_deepjet"] = (events.mc_weight * events[name], event_sel_nob_deepjet)  # noqa: E501
+                if event_sel_nob_pnet is not None:
+                    weight_map[f"sum_{name}_selected_nobjet_pnet"] = (events[name], event_sel_nob_pnet)
+                    weight_map[f"sum_mc_weight_{name}_selected_nobjet_pnet"] = (events.mc_weight * events[name], event_sel_nob_pnet)  # noqa: E501
 
         # groups
         group_map = {

diff --git a/hbt/selection/jet.py b/hbt/selection/jet.py
@@ -207,7 +207,8 @@ def jet_selection(
             # the btag weight normalization requires a selection with everything but the bjet
             # selection, so add this step here
             # note: there is currently no b-tag discriminant cut at this point, so take jet_sel
-            "bjet": jet_sel,
+            "bjet_deepjet": jet_sel,
+            "bjet_pnet": jet_sel,  # no need in run 2
         },
         objects={
             "Jet": {

diff --git a/modules/columnflow b/modules/columnflow
+15 −13		columnflow/production/cms/btag.py
+5 −3		columnflow/production/cms/electron.py