|
11 | 11 |
|
12 | 12 | from columnflow.util import maybe_import, DotDict
|
13 | 13 |
|
14 |
| - |
15 | 14 | np = maybe_import("numpy")
|
16 | 15 | hist = maybe_import("hist")
|
17 | 16 |
|
18 |
| - |
19 | 17 | logger = law.logger.get_logger(__name__)
|
20 | 18 |
|
21 | 19 |
|
22 |
| -# def integrate_hist(h, **kwargs): |
23 |
| -# """ |
24 |
| -# Given a scikit-hist histogram object return a reduced histogram with specified |
25 |
| -# axes integrated out. |
26 |
| - |
27 |
| -# For scikit-hist histograms, the integration should be formed in 3 steps: |
28 |
| -# - slicing the histogram to contain only the range of interest |
29 |
| -# - Setting overflow values to 0 (excluding the values from future calculations) |
30 |
| -# - Summing over the axes of interest. |
31 |
| - |
32 |
| -# The latter 2 steps will only be carried out if the var_slice doesn't uniquely |
33 |
| -# identify a singular bin in the histogram axis |
34 |
| -# """ |
35 |
| -# # Reduction in parallel. |
36 |
| -# r = h[kwargs] |
37 |
| -# for var, var_slice in kwargs.items(): |
38 |
| -# # In the case that histogram has been reduced to singular value simple return |
39 |
| -# if not isinstance(r, hist.Hist): |
40 |
| -# return r |
41 |
| -# if var in [x.name for x in r.axes]: |
42 |
| -# ax = h.axes[var] |
43 |
| -# get_underflow = var_slice.start == None or var_slice.start == -1 |
44 |
| -# get_overflow = var_slice.stop == None or var_slice.stop == len(ax) |
45 |
| -# if not get_underflow and ax.traits.underflow: |
46 |
| -# r[{var: hist.underflow}] = np.zeros_like(r[{var: hist.underflow}]) |
47 |
| -# if not get_overflow and ax.traits.overflow: |
48 |
| -# r[{var: hist.overflow}] = np.zeros_like(r[{var: hist.overflow}]) |
49 |
| - |
50 |
| -# # Sum over all remaining elements on axis |
51 |
| -# r = r[{var: sum}] |
52 |
| -# return r |
53 |
| - |
54 |
| - |
55 |
| -# def rebin_hist(h, **kwargs): |
56 |
| -# """ |
57 |
| -# Rebinning a scikit-hist histogram. 2 types of values can be accepted as the |
58 |
| -# argument values: |
59 |
| -# - Derivatives of the `hist.rebin` argument. In this case we directly use the |
60 |
| -# UHI facilities to perform the rebinning. |
61 |
| -# - A new axis object where all the bin edges lands on the old bin edges of the |
62 |
| -# given histogram. In this case a custom intergration loop is performed to |
63 |
| -# extract the rebinning. Beware that this methods is very slow, as it requires |
64 |
| -# a loop generation of all possible UHI values after the rebinning, so be sure |
65 |
| -# that rebinning is performed as the final step of the histogram reduction. See |
66 |
| -# `_rebin_single_scikit` for more information regarding this method. |
67 |
| -# """ |
68 |
| -# h = h.copy() |
69 |
| -# for var, var_val in kwargs.items(): |
70 |
| -# if isinstance(var_val, hist.rebin): |
71 |
| -# h = h[{var: var_val}] |
72 |
| -# else: |
73 |
| -# h = _rebin_single_scikit(h, var, var_val) |
74 |
| -# return h |
75 |
| - |
76 |
| - |
77 |
| -# def __check_scikit_axis_compat(axis1, axis2): |
78 |
| -# """ |
79 |
| -# Checking that axis 2 is rebin-compatible with axis 1. This checks that: |
80 |
| -# 1. The two histogram share the same name. |
81 |
| -# 2. The edges of the second axis all land on the edges of the first axis. |
82 |
| - |
83 |
| -# If the two axis are compatible the function will return an array of the bin |
84 |
| -# index of the axis 1 that the bin edges of axis 2 falls on. |
85 |
| -# """ |
86 |
| -# assert axis1.name == axis2.name, \ |
87 |
| -# 'Naming of the axis is required to match' |
88 |
| -# # Getting the new bin edges index for the old bin edges |
89 |
| -# try: |
90 |
| -# return [ |
91 |
| -# np.argwhere(axis1.edges == new_edge)[0][0] for new_edge in axis2.edges |
92 |
| -# ] |
93 |
| -# except IndexError: |
94 |
| -# raise ValueError( |
95 |
| -# f"Bin edges of the axis {axis2} is incompatible with {axis1}") |
96 |
| - |
97 |
| - |
98 |
| -# def _get_all_indices(axis): |
99 |
| -# """ |
100 |
| -# Getting all possible (integer) bin index values given a scikit-hep histogram. |
101 |
| -# The special indices of hist.underflow and hist.overflow will be included if the |
102 |
| -# axis in questions has those traits. |
103 |
| -# """ |
104 |
| -# idxs = list(range(len(axis))) |
105 |
| -# if axis.traits.underflow: # Extension to include the under/overflow bins |
106 |
| -# idxs.insert(0, hist.underflow) |
107 |
| -# if axis.traits.overflow: |
108 |
| -# idxs.append(hist.overflow) |
109 |
| -# return idxs |
110 |
| - |
111 |
| - |
112 |
| -# def _rebin_single_scikit(h, old_axis, new_axis): |
113 |
| -# """ |
114 |
| -# Rebinning a single axis of a scikit-hist histogram. This includes the following |
115 |
| -# routines: |
116 |
| - |
117 |
| -# - Generating a new scikit hep instance that perserves axis ordering with the |
118 |
| -# exception of the rebinned axis (in place) replacement. |
119 |
| -# - Setting up the integration ranges required to calculate the bin values of the |
120 |
| -# new histogram. |
121 |
| -# - Looping over the UHI values of the new histogram and performing the a |
122 |
| -# summation over the specified range on the old histogram to fill in the new |
123 |
| -# values. |
124 |
| - |
125 |
| -# As here we have variable number of axis each with variable number of bins, this |
126 |
| -# method will require the use of more old fashioned python looping, which can be |
127 |
| -# very slow for large dimensional histograms with many bins for each axis. So be |
128 |
| -# sure to make rebinning be the final step in histogram reduction. |
129 |
| -# """ |
130 |
| -# # assert isinstance(h, hist.NamedHist), "Can only process named histograms" |
131 |
| -# # Additional type casing |
132 |
| -# if isinstance(old_axis, str): |
133 |
| -# return _rebin_single_scikit(h, h.axes[old_axis], new_axis) |
134 |
| -# axis_name = old_axis.name |
135 |
| - |
136 |
| -# # Creating the new histogram instance with identical axis ordering. |
137 |
| -# all_axes = list(h.axes) |
138 |
| -# all_axes[all_axes.index(old_axis)] = new_axis |
139 |
| -# h_rebinned = hist.Hist(*all_axes, storage=h._storage_type()) |
140 |
| - |
141 |
| -# # Getting the all possible bin indices for all axes in the old histogram |
142 |
| -# bin_idx_dict = {ax.name: _get_all_indices(ax) for ax in h.axes} |
143 |
| - |
144 |
| -# # Getting the new bin edges index for the old bin edges |
145 |
| -# new_bin_edge_idx = __check_scikit_axis_compat(old_axis, new_axis) |
146 |
| -# if new_axis.traits.underflow: # Adding additional underflow/overflow |
147 |
| -# new_bin_edge_idx.insert(0, bin_idx_dict[axis_name][0]) |
148 |
| -# if new_axis.traits.overflow: |
149 |
| -# new_bin_edge_idx.append(bin_idx_dict[axis_name][-1]) |
150 |
| - |
151 |
| -# # Generating a the int range pair. Additional parsing will be required for the |
152 |
| -# # under/overflow bins |
153 |
| -# def make_slice(index): |
154 |
| -# start = new_bin_edge_idx[index] |
155 |
| -# stop = new_bin_edge_idx[index + 1] |
156 |
| -# if start == hist.underflow: |
157 |
| -# start = -1 |
158 |
| -# if stop == hist.overflow: |
159 |
| -# stop = len(old_axis) |
160 |
| -# return slice(int(start), int(stop)) |
161 |
| - |
162 |
| -# new_axis_idx = _get_all_indices(new_axis) |
163 |
| -# new_int_slice = [make_slice(i) for i in range(len(new_axis_idx))] |
164 |
| -# assert len(new_axis_idx) == len(new_bin_edge_idx) - 1 |
165 |
| - |
166 |
| -# new_idx_dict = bin_idx_dict.copy() |
167 |
| -# new_idx_dict[axis_name] = new_axis_idx |
168 |
| -# bin_idx_dict[axis_name] = new_int_slice |
169 |
| - |
170 |
| -# name_list = list(bin_idx_dict.keys()) |
171 |
| -# new_idx = [x for x in itertools.product(*[x for x in new_idx_dict.values()])] |
172 |
| -# old_int = [x for x in itertools.product(*[x for x in bin_idx_dict.values()])] |
173 |
| - |
174 |
| -# print(new_idx) |
175 |
| -# print("Here") |
176 |
| -# print(old_int) |
177 |
| -# for o, n in zip(old_int, new_idx): |
178 |
| -# n_uhi = {name: n[name_idx] for name_idx, name in enumerate(name_list)} |
179 |
| -# o_uhi = {name: o[name_idx] for name_idx, name in enumerate(name_list)} |
180 |
| -# # Single variable histogram, with just the axis of interest |
181 |
| -# h_rebinned[n_uhi] = integrate_hist(h, **o_uhi) |
182 |
| - |
183 |
| -# return h_rebinned |
184 |
| - |
185 |
| - |
186 | 20 | def rebin_hist(h, axis_name, edges):
|
187 | 21 | if isinstance(edges, int):
|
188 | 22 | return h[{axis_name: hist.rebin(edges)}]
|
|
0 commit comments