Skip to content

Commit

Permalink
Adjustments for 2023 down to produce columns.
Browse files Browse the repository at this point in the history
  • Loading branch information
riga committed Dec 13, 2024
1 parent a952685 commit d4c9222
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 55 deletions.
2 changes: 1 addition & 1 deletion hbt/config/configs_hbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -712,7 +712,7 @@ def if_era(
elif year == 2023:
cmpgn = "2023PromptC" if campaign.has_tag("preBPix") else "2023PromptD"
cfg.x.electron_sf_names = ElectronSFConfig(
correction="UL-Electron-ID-SF",
correction="Electron-ID-SF",
campaign=cmpgn,
working_point="wp80iso",
)
Expand Down
76 changes: 47 additions & 29 deletions hbt/production/btag.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,28 @@

# custom btag weight producer for deepjet and pnet configs
btag_weights_deepjet = btag_weights.derive("btag_weights_deepjet", cls_dict={
"weight_name": "btag_weight_deepjet",
"tagger_name": "deepjet",
"get_btag_config": (lambda self: self.config_inst.x.btag_sf_deepjet),
})
btag_weights_pnet = btag_weights.derive("btag_weights_pnet", cls_dict={
"weight_name": "btag_weight_pnet",
"tagger_name": "pnet",
"get_btag_config": (lambda self: self.config_inst.x.btag_sf_pnet),
})


@producer(
uses={
btag_weights.PRODUCES,
# custom columns created upstream, probably by a producer
"process_id",
# nano columns
"Jet.pt",
},
uses={"process_id", "Jet.pt"},
# only run on mc
mc_only=True,
# configurable weight producer class
btag_weights_cls=None,
)
def normalized_btag_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
for weight_name in self[btag_weights].produces:
if not weight_name.startswith("btag_weight"):
def _normalized_btag_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
for route in self[self.btag_weights_cls].produced_columns:
weight_name = str(route)
if not weight_name.startswith(self.weight_name):
continue

# create a weight vectors starting with ones for both weight variations, i.e.,
Expand Down Expand Up @@ -71,20 +72,26 @@ def normalized_btag_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Ar
return events


@normalized_btag_weights.init
def normalized_btag_weights_init(self: Producer) -> None:
@_normalized_btag_weights.init
def _normalized_btag_weights_init(self: Producer) -> None:
assert self.btag_weights_cls, "btag_weights_cls must be set"

if not getattr(self, "dataset_inst", None):
return

for weight_name in self[btag_weights].produces:
if not weight_name.startswith("btag_weight"):
continue
# reuse the weight and tagger names
self.weight_name = self.btag_weights_cls.weight_name
self.tagger_name = self.btag_weights_cls.tagger_name

self.produces |= {f"normalized_{weight_name}", f"normalized_njet_{weight_name}"}
# add produced columns
for route in self[self.btag_weights_cls].produced_columns:
name = str(route)
if name.startswith(self.weight_name):
self.produces.add(f"normalized_{{,njet_}}{name}")


@normalized_btag_weights.requires
def normalized_btag_weights_requires(self: Producer, reqs: dict) -> None:
@_normalized_btag_weights.requires
def _normalized_btag_weights_requires(self: Producer, reqs: dict) -> None:
from columnflow.tasks.selection import MergeSelectionStats
reqs["selection_stats"] = MergeSelectionStats.req(
self.task,
Expand All @@ -94,37 +101,37 @@ def normalized_btag_weights_requires(self: Producer, reqs: dict) -> None:
)


@normalized_btag_weights.setup
def normalized_btag_weights_setup(self: Producer, reqs: dict, inputs: dict, reader_targets: InsertableDict) -> None:
@_normalized_btag_weights.setup
def _normalized_btag_weights_setup(self: Producer, reqs: dict, inputs: dict, reader_targets: InsertableDict) -> None:
# load the selection stats
selection_stats = self.task.cached_value(
key="selection_stats",
func=lambda: inputs["selection_stats"]["collection"][0]["stats"].load(formatter="json"),
)

# get the unique process ids in that dataset
key = "sum_mc_weight_selected_nobjet_per_process_and_njet"
key = f"sum_mc_weight_selected_nobjet_{self.tagger_name}_per_process_and_njet"
self.unique_process_ids = list(map(int, selection_stats[key].keys()))

# get the maximum numbers of jets
max_n_jets = max(map(int, sum((list(d.keys()) for d in selection_stats[key].values()), [])))

# helper to get numerators and denominators
def numerator_per_pid(pid):
key = "sum_mc_weight_selected_nobjet_per_process"
key = f"sum_mc_weight_selected_nobjet_{self.tagger_name}_per_process"
return selection_stats[key].get(str(pid), 0.0)

def denominator_per_pid(weight_name, pid):
key = f"sum_mc_weight_{weight_name}_selected_nobjet_per_process"
key = f"sum_mc_weight_{weight_name}_selected_nobjet_{self.tagger_name}_per_process"
return selection_stats[key].get(str(pid), 0.0)

def numerator_per_pid_njet(pid, n_jets):
key = "sum_mc_weight_selected_nobjet_per_process_and_njet"
key = f"sum_mc_weight_selected_nobjet_{self.tagger_name}_per_process_and_njet"
d = selection_stats[key].get(str(pid), {})
return d.get(str(n_jets), 0.0)

def denominator_per_pid_njet(weight_name, pid, n_jets):
key = f"sum_mc_weight_{weight_name}_selected_nobjet_per_process_and_njet"
key = f"sum_mc_weight_{weight_name}_selected_nobjet_{self.tagger_name}_per_process_and_njet"
d = selection_stats[key].get(str(pid), {})
return d.get(str(n_jets), 0.0)

Expand All @@ -134,8 +141,8 @@ def denominator_per_pid_njet(weight_name, pid, n_jets):
pid: safe_div(numerator_per_pid(pid), denominator_per_pid(weight_name, pid))
for pid in self.unique_process_ids
}
for weight_name in self[btag_weights].produces
if weight_name.startswith("btag_weight")
for weight_name in (str(route) for route in self[self.btag_weights_cls].produced_columns)
if weight_name.startswith(self.btag_weights_cls.weight_name)
}

# extract the ratio per weight, pid and also the jet multiplicity, using the latter as in index
Expand All @@ -148,6 +155,17 @@ def denominator_per_pid_njet(weight_name, pid, n_jets):
])
for pid in self.unique_process_ids
}
for weight_name in self[btag_weights].produces
if weight_name.startswith("btag_weight")
for weight_name in (str(route) for route in self[self.btag_weights_cls].produced_columns)
if weight_name.startswith(self.btag_weights_cls.weight_name)
}


# derive for btaggers
normalized_btag_weights_deepjet = _normalized_btag_weights.derive("normalized_btag_weights_deepjet", cls_dict={
"btag_weights_cls": btag_weights_deepjet,
"uses": _normalized_btag_weights.uses | {btag_weights_deepjet.PRODUCES},
})
normalized_btag_weights_pnet = _normalized_btag_weights.derive("normalized_btag_weights_pnet", cls_dict={
"btag_weights_cls": btag_weights_pnet,
"uses": _normalized_btag_weights.uses | {btag_weights_pnet.PRODUCES},
})
14 changes: 9 additions & 5 deletions hbt/production/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,24 @@
from hbt.production.weights import (
normalized_pu_weight, normalized_pdf_weight, normalized_murmuf_weight,
)
from hbt.production.btag import normalized_btag_weights
from hbt.production.btag import normalized_btag_weights_deepjet, normalized_btag_weights_pnet
from hbt.production.tau import tau_weights, trigger_weights
from hbt.util import IF_DATASET_HAS_LHE_WEIGHTS
from hbt.util import IF_DATASET_HAS_LHE_WEIGHTS, IF_RUN_3

ak = maybe_import("awkward")


@producer(
uses={
category_ids, stitched_normalization_weights, normalized_pu_weight,
normalized_btag_weights, tau_weights, electron_weights, muon_weights, trigger_weights,
normalized_btag_weights_deepjet, IF_RUN_3(normalized_btag_weights_pnet), tau_weights,
electron_weights, muon_weights, trigger_weights,
IF_DATASET_HAS_LHE_WEIGHTS(normalized_pdf_weight, normalized_murmuf_weight),
},
produces={
category_ids, stitched_normalization_weights, normalized_pu_weight,
normalized_btag_weights, tau_weights, electron_weights, muon_weights, trigger_weights,
normalized_btag_weights_deepjet, IF_RUN_3(normalized_btag_weights_pnet), tau_weights,
electron_weights, muon_weights, trigger_weights,
IF_DATASET_HAS_LHE_WEIGHTS(normalized_pdf_weight, normalized_murmuf_weight),
},
)
Expand All @@ -54,7 +56,9 @@ def default(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
events = self[normalized_pu_weight](events, **kwargs)

# btag weights
events = self[normalized_btag_weights](events, **kwargs)
events = self[normalized_btag_weights_deepjet](events, **kwargs)
if self.has_dep(normalized_btag_weights_pnet):
events = self[normalized_btag_weights_pnet](events, **kwargs)

# tau weights
events = self[tau_weights](events, **kwargs)
Expand Down
2 changes: 1 addition & 1 deletion hbt/production/weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
mc_only=True,
)
def normalized_pu_weight(self: Producer, events: ak.Array, **kwargs) -> ak.Array:
for route in self[pu_weight].produces:
for route in self[pu_weight].produced_columns:
weight_name = str(route)
if not weight_name.startswith("pu_weight"):
continue
Expand Down
52 changes: 35 additions & 17 deletions hbt/selection/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,18 @@ def default(
results.event = event_sel

# combined event seleciton after all but the bjet step
event_sel_nob = results.steps.all_but_bjet = reduce(
and_,
[mask for step_name, mask in results.steps.items() if step_name != "bjet"],
)
tagger_name = btag_weights_deepjet.tagger_name
event_sel_nob_deepjet = results.steps[f"all_but_bjet_{tagger_name}"] = reduce(and_, [
mask for step_name, mask in results.steps.items()
if step_name != f"bjet_{tagger_name}"
])
event_sel_nob_pnet = None
if self.has_dep(btag_weights_pnet):
tagger_name = btag_weights_pnet.tagger_name
event_sel_nob_pnet = results.steps[f"all_but_bjet_{tagger_name}"] = reduce(and_, [
mask for step_name, mask in results.steps.items()
if step_name != f"bjet_{tagger_name}"
])

# increment stats
events, results = setup_and_increment_stats(
Expand All @@ -149,7 +157,8 @@ def default(
results=results,
stats=stats,
event_sel=event_sel,
event_sel_nob=event_sel_nob,
event_sel_nob_deepjet=event_sel_nob_deepjet,
event_sel_nob_pnet=event_sel_nob_pnet,
njets=results.x.n_central_jets,
)

Expand Down Expand Up @@ -278,7 +287,8 @@ def empty_call(
results=results,
stats=stats,
event_sel=results.event,
event_sel_nob=results.event,
event_sel_nob_deepjet=results.event,
event_sel_nob_pnet=results.event if self.has_dep(btag_weights_pnet) else None,
njets=ak.num(events.Jet, axis=1),
)

Expand All @@ -292,7 +302,8 @@ def setup_and_increment_stats(
results: SelectionResult,
stats: defaultdict,
event_sel: np.ndarray | ak.Array,
event_sel_nob: np.ndarray | ak.Array | None = None,
event_sel_nob_deepjet: np.ndarray | ak.Array | None = None,
event_sel_nob_pnet: np.ndarray | ak.Array | None = None,
njets: np.ndarray | ak.Array | None = None,
**kwargs,
) -> tuple[ak.Array, SelectionResult]:
Expand All @@ -305,7 +316,8 @@ def setup_and_increment_stats(
:param results: The current selection results.
:param stats: The stats dictionary.
:param event_sel: The general event selection mask.
:param event_sel_nob: The event selection mask without the bjet step.
:param event_sel_nob_deepjet: The event selection mask without the bjet step for deepjet.
:param event_sel_nob_pnet: The event selection mask without the bjet step for pnet.
:param njets: The number of central jets.
:return: The updated events and results objects in a tuple.
"""
Expand All @@ -314,17 +326,21 @@ def setup_and_increment_stats(
"num_events": Ellipsis,
"num_events_selected": event_sel,
}
if event_sel_nob is not None:
weight_map["num_events_selected_nobjet"] = event_sel_nob
if event_sel_nob_deepjet is not None:
weight_map["num_events_selected_nobjet_deepjet"] = event_sel_nob_deepjet
if event_sel_nob_pnet is not None:
weight_map["num_events_selected_nobjet_pnet"] = event_sel_nob_pnet
group_map = {}
group_combinations = []

# add mc info
if self.dataset_inst.is_mc:
weight_map["sum_mc_weight"] = events.mc_weight
weight_map["sum_mc_weight_selected"] = (events.mc_weight, event_sel)
if event_sel_nob is not None:
weight_map["sum_mc_weight_selected_nobjet"] = (events.mc_weight, event_sel_nob)
if event_sel_nob_deepjet is not None:
weight_map["sum_mc_weight_selected_nobjet_deepjet"] = (events.mc_weight, event_sel_nob_deepjet)
if event_sel_nob_pnet is not None:
weight_map["sum_mc_weight_selected_nobjet_pnet"] = (events.mc_weight, event_sel_nob_pnet)

# pu weights with variations
for route in sorted(self[pu_weight].produced_columns):
Expand All @@ -349,14 +365,16 @@ def setup_and_increment_stats(
continue
for route in sorted(self[prod].produced_columns):
name = str(route)
if not name.startswith("btag_weight"):
if not name.startswith(prod.weight_name):
continue
weight_map[f"sum_{name}"] = events[name]
weight_map[f"sum_{name}_selected"] = (events[name], event_sel)
if event_sel_nob is None:
continue
weight_map[f"sum_{name}_selected_nobjet"] = (events[name], event_sel_nob)
weight_map[f"sum_mc_weight_{name}_selected_nobjet"] = (events.mc_weight * events[name], event_sel_nob)
if event_sel_nob_deepjet is not None:
weight_map[f"sum_{name}_selected_nobjet_deepjet"] = (events[name], event_sel_nob_deepjet)
weight_map[f"sum_mc_weight_{name}_selected_nobjet_deepjet"] = (events.mc_weight * events[name], event_sel_nob_deepjet) # noqa: E501
if event_sel_nob_pnet is not None:
weight_map[f"sum_{name}_selected_nobjet_pnet"] = (events[name], event_sel_nob_pnet)
weight_map[f"sum_mc_weight_{name}_selected_nobjet_pnet"] = (events.mc_weight * events[name], event_sel_nob_pnet) # noqa: E501

# groups
group_map = {
Expand Down
3 changes: 2 additions & 1 deletion hbt/selection/jet.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,8 @@ def jet_selection(
# the btag weight normalization requires a selection with everything but the bjet
# selection, so add this step here
# note: there is currently no b-tag discriminant cut at this point, so take jet_sel
"bjet": jet_sel,
"bjet_deepjet": jet_sel,
"bjet_pnet": jet_sel, # no need in run 2
},
objects={
"Jet": {
Expand Down
2 changes: 1 addition & 1 deletion modules/columnflow

0 comments on commit d4c9222

Please sign in to comment.