15
15
"""A module for fetching from multiple-level preprocessing."""
16
16
17
17
import utils
18
-
18
+ import time
19
19
from level_slices_reader import LevelSlices
20
20
from metadata import Metadata
21
21
@@ -77,9 +77,17 @@ def fetch(self, strategy, number_records, timespan_start, timespan_end):
77
77
}
78
78
]
79
79
"""
80
+
81
+ prevTime = time .time ()
82
+ print ("fetch data starts" , prevTime )
83
+
80
84
self ._metadata = Metadata (
81
85
self ._preprocess_dir , bucket = self ._preprocess_bucket )
82
86
self ._metadata .load ()
87
+
88
+ diff = time .time () - prevTime
89
+ prevTime = time .time ()
90
+ print ("meta data done" , diff )
83
91
84
92
if timespan_start is None :
85
93
timespan_start = self ._metadata ['start' ]
@@ -100,6 +108,10 @@ def fetch(self, strategy, number_records, timespan_start, timespan_end):
100
108
target_level = self ._metadata ['levels' ][self ._metadata ['levels' ]
101
109
['names' ][target_level_index ]]
102
110
111
+ diff = time .time () - prevTime
112
+ prevTime = time .time ()
113
+ print ("target level located" ,diff )
114
+
103
115
level_metadata = Metadata (
104
116
self ._preprocess_dir , strategy , utils .get_level_name (
105
117
target_level_index ), bucket = self ._preprocess_bucket )
@@ -115,15 +127,62 @@ def fetch(self, strategy, number_records, timespan_start, timespan_end):
115
127
self ._preprocess_dir ,
116
128
utils .get_level_name (target_level_index ),
117
129
single_slice , strategy ) for single_slice in target_slices_names ]
130
+
131
+ diff = time .time () - prevTime
132
+ prevTime = time .time ()
133
+ print ("all slice found" , diff )
134
+
135
+ target_slice_paths_min = [utils .get_slice_path (
136
+ self ._preprocess_dir ,
137
+ utils .get_level_name (target_level_index ),
138
+ single_slice , 'min' ) for single_slice in target_slices_names ]
139
+
140
+ target_slice_paths_max = [utils .get_slice_path (
141
+ self ._preprocess_dir ,
142
+ utils .get_level_name (target_level_index ),
143
+ single_slice , 'max' ) for single_slice in target_slices_names ]
144
+
145
+ diff = time .time () - prevTime
146
+ prevTime = time .time ()
147
+ print ("min max slice found" , diff )
118
148
119
149
# Reads records and downsamples.
120
150
target_slices = LevelSlices (
121
151
target_slice_paths , self ._preprocess_bucket )
152
+
153
+
154
+
122
155
target_slices .read (timespan_start , timespan_end )
123
- number_target_records = target_slices .get_records_count ()
124
156
157
+ diff = time .time () - prevTime
158
+ prevTime = time .time ()
159
+ print ("main file read" , diff )
160
+
161
+ target_slices_min = LevelSlices (
162
+ target_slice_paths_min , self ._preprocess_bucket )
163
+
164
+ target_slices_max = LevelSlices (
165
+ target_slice_paths_max , self ._preprocess_bucket )
166
+ target_slices_min .read (timespan_start , timespan_end )
167
+ target_slices_max .read (timespan_start , timespan_end )
168
+
169
+ diff = time .time () - prevTime
170
+ prevTime = time .time ()
171
+ print ("min max file read" , diff )
172
+
173
+ minList = target_slices_min .get_min ()
174
+ maxList = target_slices_max .get_max ()
175
+
176
+ diff = time .time () - prevTime
177
+ prevTime = time .time ()
178
+ print ("min max get" , diff )
179
+ number_target_records = target_slices .get_records_count ()
125
180
target_slices .downsample (strategy , max_records = number_records )
126
- downsampled_data = target_slices .format_response ()
181
+ downsampled_data = target_slices .format_response (minList , maxList )
182
+
183
+ diff = time .time () - prevTime
184
+ prevTime = time .time ()
185
+ print ("dowmsample finished" , diff )
127
186
number_result_records = target_slices .get_records_count ()
128
187
129
188
if number_target_records == 0 :
@@ -146,6 +205,8 @@ def _binary_search(self, data_list, value, reverse=False):
146
205
Returns:
147
206
An int of index for the result.
148
207
"""
208
+ print (data_list )
209
+
149
210
if not data_list :
150
211
return - 1
151
212
0 commit comments