forked from mbandrews/MLAnalyzer
-
Notifications
You must be signed in to change notification settings - Fork 1
/
convert_Tree2Dask_EBv3.py
113 lines (98 loc) · 4.57 KB
/
convert_Tree2Dask_EBv3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import numpy as np
import ROOT
from root_numpy import tree2array
from dask.delayed import delayed
import dask.array as da
#eosDir='/eos/uscms/store/user/mba2012/IMGs/HighLumi_ROOTv2'
eosDir='/eos/uscms/store/user/mba2012/IMGs'
#decays = ["H125GGgluonfusion_Pt25_Eta23_13TeV_TuneCUETP8M1_HighLumiPileUpv3", "PromptDiPhoton_MGG80toInf_Pt25_Eta23_13TeV_TuneCUETP8M1_HighLumiPileUp"]
#decays = ["H125GGgluonfusion_Pt25_Eta14_13TeV_TuneCUETP8M1_HighLumiPileUpv3", "PromptDiPhotonAll_MGG80toInf_Pt25_Eta14_13TeV_TuneCUETP8M1_HighLumiPileUp"]
#decays = ["H125GGgluonfusion_Pt25_Eta14_13TeV_TuneCUETP8M1_HighLumiPileUpv3", "PromptDiPhotonAll_PtHat45_MGG80toInf_Pt25_Eta14_13TeV_TuneCUETP8M1_HighLumiPileUp"]
#decays = ['H125GGgluonfusion_Pt25_Eta14_13TeV_TuneCUETP8M1_HighLumiPileUpv3']
#decays = ['H100GGgluonfusion_Pt25_Eta14_13TeV_TuneCUETP8M1_HighLumiPileUpv3']
#decays = ['H112GGgluonfusion_Pt25_Eta14_13TeV_TuneCUETP8M1_HighLumiPileUpv3']
#decays = ['H137GGgluonfusion_Pt25_Eta14_13TeV_TuneCUETP8M1_HighLumiPileUpv3']
#decays = ['H150GGgluonfusion_Pt25_Eta14_13TeV_TuneCUETP8M1_HighLumiPileUpv3']
#decays = ['H175GGgluonfusion_Pt25_Eta14_13TeV_TuneCUETP8M1_HighLumiPileUpv3']
#decays = ['H162GGgluonfusion_Pt25_Eta14_13TeV_TuneCUETP8M1_HighLumiPileUpv3']
decays = ['H087GGgluonfusion_Pt25_Eta14_13TeV_TuneCUETP8M1_HighLumiPileUpv3']
chunk_size = 250
scale = 100.
@delayed
def load_X(tree, start_, stop_, branches_, readouts, scale):
X = tree2array(tree, start=start_, stop=stop_, branches=branches_)
# Convert the object array X to a multidim array:
# 1: for each event x in X, concatenate the object columns (branches) into a flat array of shape (readouts*branches)
# 2: reshape the flat array into a stacked array: (branches, readouts)
# 3: embed each stacked array as a single row entry in a list via list comprehension
# 4: convert this list into an array with shape (events, branches, readouts)
X = np.array([np.concatenate(x).reshape(len(branches_),readouts[0]*readouts[1]) for x in X])
#print "X.shape:",X.shape
X = X.reshape((-1,len(branches_),readouts[0],readouts[1]))
X = np.transpose(X, [0,2,3,1])
# Rescale
X /= scale
return X
@delayed
def load_single(tree, start_, stop_, branches_):
X = tree2array(tree, start=start_, stop=stop_, branches=branches_)
X = np.array([x[0] for x in X])
return X
for j,decay in enumerate(decays):
if j == 0:
pass
#continue
tfile_str = '%s/%s_FEVTDEBUG_IMG.root'%(eosDir,decay)
#tfile_str = '%s/%s_FEVTDEBUG_nXXX_IMG.root'%(eosDir,decay)
tfile = ROOT.TFile(tfile_str)
tree = tfile.Get('fevt/RHTree')
nevts = tree.GetEntries()
neff = (nevts//1000)*1000
#neff = 1000
#neff = 233000
print " >> Doing decay:", decay
print " >> Input file:", tfile_str
print " >> Total events:", nevts
print " >> Effective events:", neff
# EB
readouts = [170,360]
branches = ["EB_energy"]
X = da.concatenate([\
da.from_delayed(\
load_X(tree,i,i+chunk_size, branches, readouts, scale),\
shape=(chunk_size, readouts[0], readouts[1], len(branches)),\
dtype=np.float32)\
for i in range(0,neff,chunk_size)])
print " >> Expected shape:", X.shape
# eventId
branches = ["eventId"]
eventId = da.concatenate([\
da.from_delayed(\
load_single(tree,i,i+chunk_size, branches),\
shape=(chunk_size,),\
dtype=np.int32)\
for i in range(0,neff,chunk_size)])
print " >> Expected shape:", eventId.shape
# m0
branches = ["m0"]
m0 = da.concatenate([\
da.from_delayed(\
load_single(tree,i,i+chunk_size, branches),\
shape=(chunk_size,),\
dtype=np.float32)\
for i in range(0,neff,chunk_size)])
print " >> Expected shape:", m0.shape
# Class label
label = j
#label = 1
print " >> Class label:",label
y = da.from_array(\
np.full(X.shape[0], label, dtype=np.float32),\
chunks=(chunk_size,))
file_out_str = "%s/%s_IMG_RH%d_n%dk.hdf5"%(eosDir,decay,int(scale),neff//1000.)
#file_out_str = "%s/%s_IMG_RH%d-%d_n%dk.hdf5"%(eosDir,decay,int(scale[0]),int(scale[1]),neff//1000.)
#file_out_str = "test.hdf5"
print " >> Writing to:", file_out_str
#da.to_hdf5(file_out_str, {'/X': X, '/y': y}, chunks=(chunk_size,s,s,2), compression='lzf')
da.to_hdf5(file_out_str, {'/X': X, '/y': y, 'eventId': eventId, 'm0': m0}, compression='lzf')
print " >> Done.\n"