Skip to content

Commit d97293a

Browse files
authored
fix: update I/O branches (#126)
* added script to get I/O branches and updated list of branches to hit various thresholds * moved some config options to a dedicated config file
1 parent 267f9cc commit d97293a

File tree

5 files changed

+275
-227
lines changed

5 files changed

+275
-227
lines changed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import json
2+
import numpy as np
3+
4+
### OPTIONS
5+
ratio_json_path = "nanoaod_branch_ratios.json"
6+
agc_original_branches = ["Jet_pt", "Jet_eta", "Jet_phi", "Jet_btagCSVV2", "Jet_mass",
7+
"Muon_pt", "Electron_pt"]
8+
desired_percents = [15,25,50]
9+
10+
11+
def main():
12+
13+
with open(ratio_json_path) as json_file:
14+
branch_ratios = json.load(json_file)
15+
16+
io_branch_dict = {}
17+
18+
# calculate percentage associated with original AGC branches
19+
current_sum = 0
20+
for key in branch_ratios.keys():
21+
if key in agc_original_branches:
22+
current_sum+=branch_ratios[key]
23+
io_branch_dict[np.round(100*current_sum,1)] = agc_original_branches
24+
25+
sortind = np.flip(np.argsort(list(branch_ratios.values())))
26+
keys = np.array(list(branch_ratios.keys()))[sortind]
27+
values = np.array(list(branch_ratios.values()))[sortind]
28+
29+
for percent in desired_percents:
30+
branch_names = []
31+
current_sum = 0
32+
for i, key in enumerate(keys):
33+
if 100*values[i] > 1.02*(percent-100*current_sum):
34+
continue
35+
if 100*(current_sum+values[i])>=percent:
36+
print(f"Expected Percentage = {percent}, Calculated Percentage = {100*np.round(current_sum,4)}, Number of Branches = {len(branch_names)}")
37+
break
38+
branch_names.append(key)
39+
current_sum+=values[i]
40+
io_branch_dict[percent] = branch_names
41+
42+
print(json.dumps(io_branch_dict, sort_keys=True, indent=4))
43+
44+
45+
if __name__ == "__main__":
46+
main()
47+
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
global:
2+
3+
# ServiceX: ignore cache with repeated queries
4+
SERVICEX_IGNORE_CACHE: false
5+
6+
# analysis facility: set to "coffea_casa" for coffea-casa environments, "EAF" for FNAL, "local" for local setups
7+
AF: coffea_casa
8+
9+
benchmarking:
10+
11+
# chunk size to use
12+
CHUNKSIZE: 500000
13+
14+
# metadata to propagate through to metrics
15+
# "ssl-dev" allows for the switch to local data on /data
16+
AF_NAME: coffea_casa
17+
18+
# currently has no effect
19+
SYSTEMATICS: all
20+
21+
# does not do anything, only used for metric gathering (set to 2 for distributed coffea-casa)
22+
CORES_PER_WORKER: 2
23+
24+
# scaling for local setups with FuturesExecutor
25+
NUM_CORES: 4
26+
27+
# only I/O, all other processing disabled
28+
DISABLE_PROCESSING: false
29+
30+
# read additional branches (only with DISABLE_PROCESSING = True)
31+
# acceptable values are 4.1, 15, 25, 50 (corresponding to % of file read), 4.1% corresponds to the standard branches used in the notebook
32+
IO_FILE_PERCENT: '4.1'
33+
34+
# nanoAOD branches that correspond to different values of IO_FILE_PERCENT
35+
IO_BRANCHES:
36+
'4.1':
37+
- Jet_pt
38+
- Jet_eta
39+
- Jet_phi
40+
- Jet_btagCSVV2
41+
- Jet_mass
42+
- Muon_pt
43+
- Electron_pt
44+
'15':
45+
- LHEPdfWeight
46+
- GenPart_pdgId
47+
- CorrT1METJet_phi
48+
'25':
49+
- LHEPdfWeight
50+
- GenPart_pt
51+
- GenPart_eta
52+
- GenPart_pdgId
53+
- LHEScaleWeight
54+
'50':
55+
- LHEPdfWeight
56+
- GenPart_pt
57+
- GenPart_eta
58+
- GenPart_phi
59+
- GenPart_pdgId
60+
- GenPart_genPartIdxMother
61+
- GenPart_statusFlags
62+
- GenPart_mass
63+
- LHEScaleWeight
64+
- GenJet_pt
65+
- GenPart_status
66+
- LHEPart_eta
67+
- LHEPart_phi
68+
- LHEPart_pt
69+
- GenJet_eta
70+
- GenJet_phi
71+
- Jet_eta
72+
- Jet_phi
73+
- SoftActivityJet_pt
74+
- SoftActivityJet_phi
75+
- SoftActivityJet_eta
76+
- GenJet_mass
77+
- Jet_pt
78+
- Jet_mass
79+
- LHEPart_mass
80+
- Jet_qgl
81+
- Jet_muonSubtrFactor
82+
- Jet_puIdDisc

analyses/cms-open-data-ttbar/nanoaod_branch_ratios.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)