Skip to content

Commit e2c21a2

Browse files
committed
cleanup
1 parent f286313 commit e2c21a2

File tree

3 files changed

+2
-223
lines changed

3 files changed

+2
-223
lines changed

hbw/config/hist_hooks.py

Lines changed: 0 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -11,178 +11,12 @@
1111

1212
from columnflow.util import maybe_import, DotDict
1313

14-
1514
np = maybe_import("numpy")
1615
hist = maybe_import("hist")
1716

18-
1917
logger = law.logger.get_logger(__name__)
2018

2119

22-
# def integrate_hist(h, **kwargs):
23-
# """
24-
# Given a scikit-hist histogram object return a reduced histogram with specified
25-
# axes integrated out.
26-
27-
# For scikit-hist histograms, the integration should be formed in 3 steps:
28-
# - slicing the histogram to contain only the range of interest
29-
# - Setting overflow values to 0 (excluding the values from future calculations)
30-
# - Summing over the axes of interest.
31-
32-
# The latter 2 steps will only be carried out if the var_slice doesn't uniquely
33-
# identify a singular bin in the histogram axis
34-
# """
35-
# # Reduction in parallel.
36-
# r = h[kwargs]
37-
# for var, var_slice in kwargs.items():
38-
# # In the case that histogram has been reduced to singular value simple return
39-
# if not isinstance(r, hist.Hist):
40-
# return r
41-
# if var in [x.name for x in r.axes]:
42-
# ax = h.axes[var]
43-
# get_underflow = var_slice.start == None or var_slice.start == -1
44-
# get_overflow = var_slice.stop == None or var_slice.stop == len(ax)
45-
# if not get_underflow and ax.traits.underflow:
46-
# r[{var: hist.underflow}] = np.zeros_like(r[{var: hist.underflow}])
47-
# if not get_overflow and ax.traits.overflow:
48-
# r[{var: hist.overflow}] = np.zeros_like(r[{var: hist.overflow}])
49-
50-
# # Sum over all remaining elements on axis
51-
# r = r[{var: sum}]
52-
# return r
53-
54-
55-
# def rebin_hist(h, **kwargs):
56-
# """
57-
# Rebinning a scikit-hist histogram. 2 types of values can be accepted as the
58-
# argument values:
59-
# - Derivatives of the `hist.rebin` argument. In this case we directly use the
60-
# UHI facilities to perform the rebinning.
61-
# - A new axis object where all the bin edges lands on the old bin edges of the
62-
# given histogram. In this case a custom intergration loop is performed to
63-
# extract the rebinning. Beware that this methods is very slow, as it requires
64-
# a loop generation of all possible UHI values after the rebinning, so be sure
65-
# that rebinning is performed as the final step of the histogram reduction. See
66-
# `_rebin_single_scikit` for more information regarding this method.
67-
# """
68-
# h = h.copy()
69-
# for var, var_val in kwargs.items():
70-
# if isinstance(var_val, hist.rebin):
71-
# h = h[{var: var_val}]
72-
# else:
73-
# h = _rebin_single_scikit(h, var, var_val)
74-
# return h
75-
76-
77-
# def __check_scikit_axis_compat(axis1, axis2):
78-
# """
79-
# Checking that axis 2 is rebin-compatible with axis 1. This checks that:
80-
# 1. The two histogram share the same name.
81-
# 2. The edges of the second axis all land on the edges of the first axis.
82-
83-
# If the two axis are compatible the function will return an array of the bin
84-
# index of the axis 1 that the bin edges of axis 2 falls on.
85-
# """
86-
# assert axis1.name == axis2.name, \
87-
# 'Naming of the axis is required to match'
88-
# # Getting the new bin edges index for the old bin edges
89-
# try:
90-
# return [
91-
# np.argwhere(axis1.edges == new_edge)[0][0] for new_edge in axis2.edges
92-
# ]
93-
# except IndexError:
94-
# raise ValueError(
95-
# f"Bin edges of the axis {axis2} is incompatible with {axis1}")
96-
97-
98-
# def _get_all_indices(axis):
99-
# """
100-
# Getting all possible (integer) bin index values given a scikit-hep histogram.
101-
# The special indices of hist.underflow and hist.overflow will be included if the
102-
# axis in questions has those traits.
103-
# """
104-
# idxs = list(range(len(axis)))
105-
# if axis.traits.underflow: # Extension to include the under/overflow bins
106-
# idxs.insert(0, hist.underflow)
107-
# if axis.traits.overflow:
108-
# idxs.append(hist.overflow)
109-
# return idxs
110-
111-
112-
# def _rebin_single_scikit(h, old_axis, new_axis):
113-
# """
114-
# Rebinning a single axis of a scikit-hist histogram. This includes the following
115-
# routines:
116-
117-
# - Generating a new scikit hep instance that perserves axis ordering with the
118-
# exception of the rebinned axis (in place) replacement.
119-
# - Setting up the integration ranges required to calculate the bin values of the
120-
# new histogram.
121-
# - Looping over the UHI values of the new histogram and performing the a
122-
# summation over the specified range on the old histogram to fill in the new
123-
# values.
124-
125-
# As here we have variable number of axis each with variable number of bins, this
126-
# method will require the use of more old fashioned python looping, which can be
127-
# very slow for large dimensional histograms with many bins for each axis. So be
128-
# sure to make rebinning be the final step in histogram reduction.
129-
# """
130-
# # assert isinstance(h, hist.NamedHist), "Can only process named histograms"
131-
# # Additional type casing
132-
# if isinstance(old_axis, str):
133-
# return _rebin_single_scikit(h, h.axes[old_axis], new_axis)
134-
# axis_name = old_axis.name
135-
136-
# # Creating the new histogram instance with identical axis ordering.
137-
# all_axes = list(h.axes)
138-
# all_axes[all_axes.index(old_axis)] = new_axis
139-
# h_rebinned = hist.Hist(*all_axes, storage=h._storage_type())
140-
141-
# # Getting the all possible bin indices for all axes in the old histogram
142-
# bin_idx_dict = {ax.name: _get_all_indices(ax) for ax in h.axes}
143-
144-
# # Getting the new bin edges index for the old bin edges
145-
# new_bin_edge_idx = __check_scikit_axis_compat(old_axis, new_axis)
146-
# if new_axis.traits.underflow: # Adding additional underflow/overflow
147-
# new_bin_edge_idx.insert(0, bin_idx_dict[axis_name][0])
148-
# if new_axis.traits.overflow:
149-
# new_bin_edge_idx.append(bin_idx_dict[axis_name][-1])
150-
151-
# # Generating a the int range pair. Additional parsing will be required for the
152-
# # under/overflow bins
153-
# def make_slice(index):
154-
# start = new_bin_edge_idx[index]
155-
# stop = new_bin_edge_idx[index + 1]
156-
# if start == hist.underflow:
157-
# start = -1
158-
# if stop == hist.overflow:
159-
# stop = len(old_axis)
160-
# return slice(int(start), int(stop))
161-
162-
# new_axis_idx = _get_all_indices(new_axis)
163-
# new_int_slice = [make_slice(i) for i in range(len(new_axis_idx))]
164-
# assert len(new_axis_idx) == len(new_bin_edge_idx) - 1
165-
166-
# new_idx_dict = bin_idx_dict.copy()
167-
# new_idx_dict[axis_name] = new_axis_idx
168-
# bin_idx_dict[axis_name] = new_int_slice
169-
170-
# name_list = list(bin_idx_dict.keys())
171-
# new_idx = [x for x in itertools.product(*[x for x in new_idx_dict.values()])]
172-
# old_int = [x for x in itertools.product(*[x for x in bin_idx_dict.values()])]
173-
174-
# print(new_idx)
175-
# print("Here")
176-
# print(old_int)
177-
# for o, n in zip(old_int, new_idx):
178-
# n_uhi = {name: n[name_idx] for name_idx, name in enumerate(name_list)}
179-
# o_uhi = {name: o[name_idx] for name_idx, name in enumerate(name_list)}
180-
# # Single variable histogram, with just the axis of interest
181-
# h_rebinned[n_uhi] = integrate_hist(h, **o_uhi)
182-
183-
# return h_rebinned
184-
185-
18620
def rebin_hist(h, axis_name, edges):
18721
if isinstance(edges, int):
18822
return h[{axis_name: hist.rebin(edges)}]

hbw/inference/constants.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,6 @@
3434
# "w_lnu": "W",
3535
# "dy": "DY",
3636
# "vv": "VV",
37-
# di higgs (required name conventions)
38-
"hh_ggf_hbb_hvv2l2nu_kl0_kt1": "ggHH_kl_0_kt_1_hbb_hvv2l2nu",
39-
"hh_ggf_hbb_hvv2l2nu_kl1_kt1": "ggHH_kl_1_kt_1_hbb_hvv2l2nu",
40-
"hh_ggf_hbb_hvv2l2nu_kl2p45_kt1": "ggHH_kl_2p45_kt_1_hbb_hvv2l2nu",
41-
"hh_ggf_hbb_hvv2l2nu_kl5_kt1": "ggHH_kl_5_kt_1_hbb_hvv2l2nu",
42-
"hh_ggf_hbb_hvvqqlnu_kl0_kt1": "ggHH_kl_0_kt_1_hbb_hvvqqlnu",
43-
"hh_ggf_hbb_hvvqqlnu_kl1_kt1": "ggHH_kl_1_kt_1_hbb_hvvqqlnu",
44-
"hh_ggf_hbb_hvvqqlnu_kl2p45_kt1": "ggHH_kl_2p45_kt_1_hbb_hvvqqlnu",
45-
"hh_ggf_hbb_hvvqqlnu_kl5_kt1": "ggHH_kl_5_kt_1_hbb_hvvqqlnu",
46-
"hh_ggf_{decay}_{params}": "ggHH_{params}_{decay}",
47-
"hh_vbf_{decay}_{params}": "qqHH_{params}_{decay}",
4837
# single higgs (required name conventions)
4938
"h_ggf": "ggH",
5039
"h_vbf": "qqH",

hbw/tasks/ml.py

Lines changed: 2 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -218,30 +218,6 @@ def create_branch_map(self):
218218
def workflow_requires(self):
219219
reqs = super().workflow_requires()
220220

221-
# reqs["events"] = {
222-
# config_inst.name: {
223-
# dataset_inst.name: [
224-
# self.reqs.MergeMLEvents.req(
225-
# self,
226-
# config=config_inst.name,
227-
# dataset=dataset_inst.name,
228-
# calibrators=_calibrators,
229-
# selector=_selector,
230-
# producers=_producers,
231-
# fold=f,
232-
# tree_index=-1,
233-
# )
234-
# for f in range(self.ml_model_inst.folds)
235-
# ]
236-
# for dataset_inst in dataset_insts
237-
# }
238-
# for (config_inst, dataset_insts), _calibrators, _selector, _producers in zip(
239-
# self.ml_model_inst.used_datasets.items(),
240-
# self.calibrators,
241-
# self.selectors,
242-
# self.producers,
243-
# )
244-
# }
245221
reqs["events"] = {
246222
config_inst.name: {
247223
dataset_inst.name: self.reqs.SimpleMergeMLEvents.req_different_branching(
@@ -291,28 +267,8 @@ def requires(self):
291267
return reqs
292268

293269
process = self.branch_data["process"]
294-
# load events only for specified process and fold
295-
# reqs["events"] = {
296-
# config_inst.name: {
297-
# dataset_inst.name: self.reqs.MergeMLEvents.req(
298-
# self,
299-
# config=config_inst.name,
300-
# dataset=dataset_inst.name,
301-
# calibrators=_calibrators,
302-
# selector=_selector,
303-
# producers=_producers,
304-
# fold=self.fold,
305-
# )
306-
# for dataset_inst in dataset_insts
307-
# if dataset_inst.x.ml_process == process
308-
# }
309-
# for (config_inst, dataset_insts), _calibrators, _selector, _producers in zip(
310-
# self.ml_model_inst.used_datasets.items(),
311-
# self.calibrators,
312-
# self.selectors,
313-
# self.producers,
314-
# )
315-
# }
270+
271+
# load events
316272
reqs["events"] = {
317273
config_inst.name: {
318274
dataset_inst.name: self.reqs.SimpleMergeMLEvents.req_different_branching(

0 commit comments

Comments
 (0)