cleanup

mafrahm · mafrahm · commit e2c21a270cb9 · 2024-09-27T11:52:37.000+02:00
diff --git a/hbw/config/hist_hooks.py b/hbw/config/hist_hooks.py
@@ -11,178 +11,12 @@
 
 from columnflow.util import maybe_import, DotDict
 
-
 np = maybe_import("numpy")
 hist = maybe_import("hist")
 
-
 logger = law.logger.get_logger(__name__)
 
 
-# def integrate_hist(h, **kwargs):
-#     """
-#     Given a scikit-hist histogram object return a reduced histogram with specified
-#     axes integrated out.
-
-#     For scikit-hist histograms, the integration should be formed in 3 steps:
-#     - slicing the histogram to contain only the range of interest
-#     - Setting overflow values to 0 (excluding the values from future calculations)
-#     - Summing over the axes of interest.
-
-#     The latter 2 steps will only be carried out if the var_slice doesn't uniquely
-#     identify a singular bin in the histogram axis
-#     """
-#     # Reduction in parallel.
-#     r = h[kwargs]
-#     for var, var_slice in kwargs.items():
-#         # In the case that histogram has been reduced to singular value simple return
-#         if not isinstance(r, hist.Hist):
-#             return r
-#         if var in [x.name for x in r.axes]:
-#             ax = h.axes[var]
-#             get_underflow = var_slice.start == None or var_slice.start == -1
-#             get_overflow = var_slice.stop == None or var_slice.stop == len(ax)
-#             if not get_underflow and ax.traits.underflow:
-#                 r[{var: hist.underflow}] = np.zeros_like(r[{var: hist.underflow}])
-#             if not get_overflow and ax.traits.overflow:
-#                 r[{var: hist.overflow}] = np.zeros_like(r[{var: hist.overflow}])
-
-#             # Sum over all remaining elements on axis
-#             r = r[{var: sum}]
-#     return r
-
-
-# def rebin_hist(h, **kwargs):
-#     """
-#     Rebinning a scikit-hist histogram. 2 types of values can be accepted as the
-#     argument values:
-#     - Derivatives of the `hist.rebin` argument. In this case we directly use the
-#         UHI facilities to perform the rebinning.
-#     - A new axis object where all the bin edges lands on the old bin edges of the
-#         given histogram. In this case a custom intergration loop is performed to
-#         extract the rebinning. Beware that this methods is very slow, as it requires
-#         a loop generation of all possible UHI values after the rebinning, so be sure
-#         that rebinning is performed as the final step of the histogram reduction. See
-#         `_rebin_single_scikit` for more information regarding this method.
-#     """
-#     h = h.copy()
-#     for var, var_val in kwargs.items():
-#         if isinstance(var_val, hist.rebin):
-#             h = h[{var: var_val}]
-#         else:
-#             h = _rebin_single_scikit(h, var, var_val)
-#     return h
-
-
-# def __check_scikit_axis_compat(axis1, axis2):
-#     """
-#     Checking that axis 2 is rebin-compatible with axis 1. This checks that:
-#     1. The two histogram share the same name.
-#     2. The edges of the second axis all land on the edges of the first axis.
-
-#     If the two axis are compatible the function will return an array of the bin
-#     index of the axis 1 that the bin edges of axis 2 falls on.
-#     """
-#     assert axis1.name == axis2.name, \
-#         'Naming of the axis is required to match'
-#     # Getting the new bin edges index for the old bin edges
-#     try:
-#         return [
-#             np.argwhere(axis1.edges == new_edge)[0][0] for new_edge in axis2.edges
-#         ]
-#     except IndexError:
-#         raise ValueError(
-#             f"Bin edges of the axis {axis2} is incompatible with {axis1}")
-
-
-# def _get_all_indices(axis):
-#     """
-#     Getting all possible (integer) bin index values given a scikit-hep histogram.
-#     The special indices of hist.underflow and hist.overflow will be included if the
-#     axis in questions has those traits.
-#     """
-#     idxs = list(range(len(axis)))
-#     if axis.traits.underflow:    # Extension to include the under/overflow bins
-#         idxs.insert(0, hist.underflow)
-#     if axis.traits.overflow:
-#         idxs.append(hist.overflow)
-#     return idxs
-
-
-# def _rebin_single_scikit(h, old_axis, new_axis):
-#     """
-#     Rebinning a single axis of a scikit-hist histogram. This includes the following
-#     routines:
-
-#     - Generating a new scikit hep instance that perserves axis ordering with the
-#         exception of the rebinned axis (in place) replacement.
-#     - Setting up the integration ranges required to calculate the bin values of the
-#         new histogram.
-#     - Looping over the UHI values of the new histogram and performing the a
-#         summation over the specified range on the old histogram to fill in the new
-#         values.
-
-#     As here we have variable number of axis each with variable number of bins, this
-#     method will require the use of more old fashioned python looping, which can be
-#     very slow for large dimensional histograms with many bins for each axis. So be
-#     sure to make rebinning be the final step in histogram reduction.
-#     """
-#     # assert isinstance(h, hist.NamedHist), "Can only process named histograms"
-#     # Additional type casing
-#     if isinstance(old_axis, str):
-#         return _rebin_single_scikit(h, h.axes[old_axis], new_axis)
-#     axis_name = old_axis.name
-
-#     # Creating the new histogram instance with identical axis ordering.
-#     all_axes = list(h.axes)
-#     all_axes[all_axes.index(old_axis)] = new_axis
-#     h_rebinned = hist.Hist(*all_axes, storage=h._storage_type())
-
-#     # Getting the all possible bin indices for all axes in the old histogram
-#     bin_idx_dict = {ax.name: _get_all_indices(ax) for ax in h.axes}
-
-#     # Getting the new bin edges index for the old bin edges
-#     new_bin_edge_idx = __check_scikit_axis_compat(old_axis, new_axis)
-#     if new_axis.traits.underflow:    # Adding additional underflow/overflow
-#         new_bin_edge_idx.insert(0, bin_idx_dict[axis_name][0])
-#     if new_axis.traits.overflow:
-#         new_bin_edge_idx.append(bin_idx_dict[axis_name][-1])
-
-#     # Generating a the int range pair. Additional parsing will be required for the
-#     # under/overflow bins
-#     def make_slice(index):
-#         start = new_bin_edge_idx[index]
-#         stop = new_bin_edge_idx[index + 1]
-#         if start == hist.underflow:
-#             start = -1
-#         if stop == hist.overflow:
-#             stop = len(old_axis)
-#         return slice(int(start), int(stop))
-
-#     new_axis_idx = _get_all_indices(new_axis)
-#     new_int_slice = [make_slice(i) for i in range(len(new_axis_idx))]
-#     assert len(new_axis_idx) == len(new_bin_edge_idx) - 1
-
-#     new_idx_dict = bin_idx_dict.copy()
-#     new_idx_dict[axis_name] = new_axis_idx
-#     bin_idx_dict[axis_name] = new_int_slice
-
-#     name_list = list(bin_idx_dict.keys())
-#     new_idx = [x for x in itertools.product(*[x for x in new_idx_dict.values()])]
-#     old_int = [x for x in itertools.product(*[x for x in bin_idx_dict.values()])]
-
-#     print(new_idx)
-#     print("Here")
-#     print(old_int)
-#     for o, n in zip(old_int, new_idx):
-#         n_uhi = {name: n[name_idx] for name_idx, name in enumerate(name_list)}
-#         o_uhi = {name: o[name_idx] for name_idx, name in enumerate(name_list)}
-#         # Single variable histogram, with just the axis of interest
-#         h_rebinned[n_uhi] = integrate_hist(h, **o_uhi)
-
-#     return h_rebinned
-
-
 def rebin_hist(h, axis_name, edges):
     if isinstance(edges, int):
         return h[{axis_name: hist.rebin(edges)}]
diff --git a/hbw/inference/constants.py b/hbw/inference/constants.py
@@ -34,17 +34,6 @@
     # "w_lnu": "W",
     # "dy": "DY",
     # "vv": "VV",
-    # di higgs (required name conventions)
-    "hh_ggf_hbb_hvv2l2nu_kl0_kt1": "ggHH_kl_0_kt_1_hbb_hvv2l2nu",
-    "hh_ggf_hbb_hvv2l2nu_kl1_kt1": "ggHH_kl_1_kt_1_hbb_hvv2l2nu",
-    "hh_ggf_hbb_hvv2l2nu_kl2p45_kt1": "ggHH_kl_2p45_kt_1_hbb_hvv2l2nu",
-    "hh_ggf_hbb_hvv2l2nu_kl5_kt1": "ggHH_kl_5_kt_1_hbb_hvv2l2nu",
-    "hh_ggf_hbb_hvvqqlnu_kl0_kt1": "ggHH_kl_0_kt_1_hbb_hvvqqlnu",
-    "hh_ggf_hbb_hvvqqlnu_kl1_kt1": "ggHH_kl_1_kt_1_hbb_hvvqqlnu",
-    "hh_ggf_hbb_hvvqqlnu_kl2p45_kt1": "ggHH_kl_2p45_kt_1_hbb_hvvqqlnu",
-    "hh_ggf_hbb_hvvqqlnu_kl5_kt1": "ggHH_kl_5_kt_1_hbb_hvvqqlnu",
-    "hh_ggf_{decay}_{params}": "ggHH_{params}_{decay}",
-    "hh_vbf_{decay}_{params}": "qqHH_{params}_{decay}",
     # single higgs (required name conventions)
     "h_ggf": "ggH",
     "h_vbf": "qqH",
diff --git a/hbw/tasks/ml.py b/hbw/tasks/ml.py
@@ -218,30 +218,6 @@ def create_branch_map(self):
     def workflow_requires(self):
         reqs = super().workflow_requires()
 
-        # reqs["events"] = {
-        #     config_inst.name: {
-        #         dataset_inst.name: [
-        #             self.reqs.MergeMLEvents.req(
-        #                 self,
-        #                 config=config_inst.name,
-        #                 dataset=dataset_inst.name,
-        #                 calibrators=_calibrators,
-        #                 selector=_selector,
-        #                 producers=_producers,
-        #                 fold=f,
-        #                 tree_index=-1,
-        #             )
-        #             for f in range(self.ml_model_inst.folds)
-        #         ]
-        #         for dataset_inst in dataset_insts
-        #     }
-        #     for (config_inst, dataset_insts), _calibrators, _selector, _producers in zip(
-        #         self.ml_model_inst.used_datasets.items(),
-        #         self.calibrators,
-        #         self.selectors,
-        #         self.producers,
-        #     )
-        # }
         reqs["events"] = {
             config_inst.name: {
                 dataset_inst.name: self.reqs.SimpleMergeMLEvents.req_different_branching(
@@ -291,28 +267,8 @@ def requires(self):
             return reqs
 
         process = self.branch_data["process"]
-        # load events only for specified process and fold
-        # reqs["events"] = {
-        #     config_inst.name: {
-        #         dataset_inst.name: self.reqs.MergeMLEvents.req(
-        #             self,
-        #             config=config_inst.name,
-        #             dataset=dataset_inst.name,
-        #             calibrators=_calibrators,
-        #             selector=_selector,
-        #             producers=_producers,
-        #             fold=self.fold,
-        #         )
-        #         for dataset_inst in dataset_insts
-        #         if dataset_inst.x.ml_process == process
-        #     }
-        #     for (config_inst, dataset_insts), _calibrators, _selector, _producers in zip(
-        #         self.ml_model_inst.used_datasets.items(),
-        #         self.calibrators,
-        #         self.selectors,
-        #         self.producers,
-        #     )
-        # }
+
+        # load events
         reqs["events"] = {
             config_inst.name: {
                 dataset_inst.name: self.reqs.SimpleMergeMLEvents.req_different_branching(