14
14
import requests
15
15
16
16
from grafoleancollector import Collector , send_results_to_grafolean
17
- from dbutils import get_db_cursor , DB_PREFIX , LEAVE_N_PAST_DAYS
17
+ from dbutils import get_db_cursor , DB_PREFIX , LEAVE_N_PAST_DAYS , DBConnectionError
18
18
from lookup import PROTOCOLS , DIRECTION_INGRESS , DIRECTION_EGRESS
19
19
20
20
logging .basicConfig (format = '%(asctime)s.%(msecs)03d | %(levelname)s | %(message)s' ,
@@ -78,57 +78,62 @@ def _save_current_max_ts(job_id, max_ts):
78
78
79
79
def job_maint_remove_old_data (* args , ** kwargs ):
80
80
log .info ("MAINT: Maintenance started - removing old data" )
81
- with get_db_cursor () as c :
82
- c .execute (f"SELECT drop_chunks(INTERVAL '{ LEAVE_N_PAST_DAYS } days', '{ DB_PREFIX } flows2');" )
83
- log .info ("MAINT: Maintenance finished (removing old data)." )
81
+ try :
82
+ with get_db_cursor () as c :
83
+ c .execute (f"SELECT drop_chunks(INTERVAL '{ LEAVE_N_PAST_DAYS } days', '{ DB_PREFIX } flows2');" )
84
+ log .info ("MAINT: Maintenance finished (removing old data)." )
85
+ except DBConnectionError :
86
+ log .error ("MAINT: Maintenance job (removing old data) failed due to DB connection issues." )
84
87
85
88
86
89
def job_maint_suggest_entities (* args , ** job_params ):
87
90
log .info ("MAINT: Maintenance started - making suggestions for device entities" )
88
-
89
- backend_url = job_params ['backend_url' ]
90
- bot_token = job_params ['bot_token' ]
91
- requests_session = requests .Session ()
92
-
93
- # for each account, add any new netflow exporters (entities) that might not exist yet:
94
- # find all the accounts we have access to:
95
- r = requests_session .get (f'{ backend_url } /accounts/?b={ bot_token } ' )
96
- if r .status_code != 200 :
97
- raise Exception ("Invalid bot token or network error, got status {} while retrieving {}/accounts" .format (r .status_code , backend_url ))
98
- j = r .json ()
99
- accounts_ids = [a ["id" ] for a in j ["list" ]]
100
-
101
- # find all entities for each of the accounts:
102
- for account_id in accounts_ids :
103
- r = requests_session .get ('{}/accounts/{}/entities/?b={}' .format (backend_url , account_id , bot_token ))
91
+ try :
92
+ backend_url = job_params ['backend_url' ]
93
+ bot_token = job_params ['bot_token' ]
94
+ requests_session = requests .Session ()
95
+
96
+ # for each account, add any new netflow exporters (entities) that might not exist yet:
97
+ # find all the accounts we have access to:
98
+ r = requests_session .get (f'{ backend_url } /accounts/?b={ bot_token } ' )
104
99
if r .status_code != 200 :
105
- raise Exception ("Network error, got status {} while retrieving {}/accounts/{}/entities " .format (r .status_code , backend_url , account_id ))
100
+ raise Exception ("Invalid bot token or network error, got status {} while retrieving {}/accounts" .format (r .status_code , backend_url ))
106
101
j = r .json ()
107
- entities_ips = [e ["details" ]["ipv4" ] for e in j ["list" ] if e ["entity_type" ] == "device" ]
108
-
109
- with get_db_cursor () as c :
110
- # Ideally, we would just run "select distinct(client_ip) from netflow_flows;", but unfortunately
111
- # I was unable to find a performant way to run this query. So we are using netflow_exporters:
112
- c .execute (f"SELECT ip FROM { DB_PREFIX } exporters;" )
113
- for client_ip , in c .fetchall ():
114
- if client_ip in entities_ips :
115
- log .info (f"MAINT: We already know exporter [{ client_ip } ]" )
116
- continue
117
-
118
- log .info (f"MAINT: Unknown exporter found, inserting [{ client_ip } ] to account [{ account_id } ]" )
119
- url = f'{ backend_url } /accounts/{ account_id } /entities/?b={ bot_token } '
120
- params = {
121
- "name" : f'{ client_ip } (NetFlow exporter)' ,
122
- "entity_type" : "device" ,
123
- "details" : {
124
- "ipv4" : client_ip ,
125
- },
126
- }
127
- r = requests_session .post (url , json = params )
128
- if r .status_code > 299 :
129
- raise Exception ("Network error, got status {} while posting to {}/accounts/{}/entities: {}" .format (r .status_code , backend_url , account_id , r .content ))
130
-
131
- log .info ("MAINT: Maintenance finished (device entities suggestions)." )
102
+ accounts_ids = [a ["id" ] for a in j ["list" ]]
103
+
104
+ # find all entities for each of the accounts:
105
+ for account_id in accounts_ids :
106
+ r = requests_session .get ('{}/accounts/{}/entities/?b={}' .format (backend_url , account_id , bot_token ))
107
+ if r .status_code != 200 :
108
+ raise Exception ("Network error, got status {} while retrieving {}/accounts/{}/entities" .format (r .status_code , backend_url , account_id ))
109
+ j = r .json ()
110
+ entities_ips = [e ["details" ]["ipv4" ] for e in j ["list" ] if e ["entity_type" ] == "device" ]
111
+
112
+ with get_db_cursor () as c :
113
+ # Ideally, we would just run "select distinct(client_ip) from netflow_flows;", but unfortunately
114
+ # I was unable to find a performant way to run this query. So we are using netflow_exporters:
115
+ c .execute (f"SELECT ip FROM { DB_PREFIX } exporters;" )
116
+ for client_ip , in c .fetchall ():
117
+ if client_ip in entities_ips :
118
+ log .info (f"MAINT: We already know exporter [{ client_ip } ]" )
119
+ continue
120
+
121
+ log .info (f"MAINT: Unknown exporter found, inserting [{ client_ip } ] to account [{ account_id } ]" )
122
+ url = f'{ backend_url } /accounts/{ account_id } /entities/?b={ bot_token } '
123
+ params = {
124
+ "name" : f'{ client_ip } (NetFlow exporter)' ,
125
+ "entity_type" : "device" ,
126
+ "details" : {
127
+ "ipv4" : client_ip ,
128
+ },
129
+ }
130
+ r = requests_session .post (url , json = params )
131
+ if r .status_code > 299 :
132
+ raise Exception ("Network error, got status {} while posting to {}/accounts/{}/entities: {}" .format (r .status_code , backend_url , account_id , r .content ))
133
+
134
+ log .info ("MAINT: Maintenance finished (device entities suggestions)." )
135
+ except DBConnectionError :
136
+ log .error ("MAINT: Maintenance job (device entities suggestions) failed due to DB connection issues." )
132
137
133
138
134
139
class NetFlowBot (Collector ):
@@ -163,11 +168,11 @@ def jobs(self):
163
168
"bot_token" : self .bot_token ,
164
169
}
165
170
start_ts = int (time .time ()) + first_run_ts - interval # start_ts must be in the past
166
- yield job_id , [interval ], NetFlowBot .perform_account_aggr_job , job_params , start_ts
171
+ yield job_id , [interval ], NetFlowBot .job_perform_account_aggr , job_params , start_ts
167
172
168
173
169
174
@staticmethod
170
- def perform_account_aggr_job (* args , ** job_params ):
175
+ def job_perform_account_aggr (* args , ** job_params ):
171
176
# \d netflow_flows2
172
177
# Column | Type | Description
173
178
# ---------------+---------------+------------
@@ -189,68 +194,72 @@ def perform_account_aggr_job(*args, **job_params):
189
194
entities = [(entity_info ["entity_id" ], entity_info ["details" ]["ipv4" ],) for entity_info in job_params ["entities_infos" ]]
190
195
log .info (f"Starting { interval_label } aggregation job for account { account_id } ..." )
191
196
192
- last_used_ts = _get_last_used_ts (job_id )
193
- max_ts = _get_current_max_ts ()
194
- if max_ts is None or last_used_ts == max_ts :
195
- log .info (f"No netflow data found for job { job_id } , skipping." )
196
- return
197
- _save_current_max_ts (job_id , max_ts )
198
- if last_used_ts is None :
199
- log .info (f"Counter was not yet initialized for job { job_id } , skipping." )
200
- return
201
-
202
- # WATCH OUT! This hack changes all of the units from Bps to B! (should be cleaned up)
203
- #time_between = float(max_ts - last_used_ts)
204
- time_between = 1 # we want to use bytes as unit, not bytes per second
205
-
206
- # traffic:
207
- values = []
208
- sum_traffic_egress = 0
209
- sum_traffic_ingress = 0
210
- for entity_id , entity_ip in entities :
211
- v , s = NetFlowBot .get_traffic_for_entity (interval_label , last_used_ts , max_ts , time_between , DIRECTION_EGRESS , entity_id , entity_ip )
212
- values .extend (v )
213
- sum_traffic_egress += s
214
- v , s = NetFlowBot .get_traffic_for_entity (interval_label , last_used_ts , max_ts , time_between , DIRECTION_INGRESS , entity_id , entity_ip )
215
- values .extend (v )
216
- sum_traffic_ingress += s
217
-
218
- # cumulative sum for the whole account:
219
- output_path = NetFlowBot .construct_output_path_prefix (interval_label , DIRECTION_EGRESS , entity_id = None , interface = None )
220
- values .append ({
221
- 'p' : output_path ,
222
- 'v' : sum_traffic_egress / time_between ,
223
- })
224
- output_path = NetFlowBot .construct_output_path_prefix (interval_label , DIRECTION_INGRESS , entity_id = None , interface = None )
225
- values .append ({
226
- 'p' : output_path ,
227
- 'v' : sum_traffic_ingress / time_between ,
228
- })
229
-
230
- # top N IPs:
231
- for entity_id , entity_ip in entities :
232
- for direction in [DIRECTION_EGRESS , DIRECTION_INGRESS ]:
233
- values .extend (NetFlowBot .get_top_N_IPs_for_entity (interval_label , last_used_ts , max_ts , time_between , direction , entity_id , entity_ip ))
234
- values .extend (NetFlowBot .get_top_N_IPs_for_entity_interfaces (interval_label , last_used_ts , max_ts , time_between , direction , entity_id , entity_ip ))
235
- values .extend (NetFlowBot .get_top_N_protocols_for_entity (interval_label , last_used_ts , max_ts , time_between , direction , entity_id , entity_ip ))
236
- values .extend (NetFlowBot .get_top_N_protocols_for_entity_interfaces (interval_label , last_used_ts , max_ts , time_between , direction , entity_id , entity_ip ))
237
- values .extend (NetFlowBot .get_top_N_connections_for_entity (interval_label , last_used_ts , max_ts , time_between , direction , entity_id , entity_ip ))
238
-
239
- if not values :
240
- log .warning ("No values found to be sent to Grafolean" )
241
- return
242
-
243
- # the values are Decimals because they come from BIGINT column, so we must transform
244
- # them to strings before encoding to JSON:
245
- values = [{'p' : v ['p' ], 'v' : str (v ['v' ])} for v in values ]
246
-
247
- # send the data to Grafolean:
248
- send_results_to_grafolean (
249
- job_params ['backend_url' ],
250
- job_params ['bot_token' ],
251
- account_id ,
252
- values ,
253
- )
197
+ try :
198
+ last_used_ts = _get_last_used_ts (job_id )
199
+ max_ts = _get_current_max_ts ()
200
+ if max_ts is None or last_used_ts == max_ts :
201
+ log .info (f"No netflow data found for job { job_id } , skipping." )
202
+ return
203
+ _save_current_max_ts (job_id , max_ts )
204
+ if last_used_ts is None :
205
+ log .info (f"Counter was not yet initialized for job { job_id } , skipping." )
206
+ return
207
+
208
+ # WATCH OUT! This hack changes all of the units from Bps to B! (should be cleaned up)
209
+ #time_between = float(max_ts - last_used_ts)
210
+ time_between = 1 # we want to use bytes as unit, not bytes per second
211
+
212
+ # traffic:
213
+ values = []
214
+ sum_traffic_egress = 0
215
+ sum_traffic_ingress = 0
216
+ for entity_id , entity_ip in entities :
217
+ v , s = NetFlowBot .get_traffic_for_entity (interval_label , last_used_ts , max_ts , time_between , DIRECTION_EGRESS , entity_id , entity_ip )
218
+ values .extend (v )
219
+ sum_traffic_egress += s
220
+ v , s = NetFlowBot .get_traffic_for_entity (interval_label , last_used_ts , max_ts , time_between , DIRECTION_INGRESS , entity_id , entity_ip )
221
+ values .extend (v )
222
+ sum_traffic_ingress += s
223
+
224
+ # cumulative sum for the whole account:
225
+ output_path = NetFlowBot .construct_output_path_prefix (interval_label , DIRECTION_EGRESS , entity_id = None , interface = None )
226
+ values .append ({
227
+ 'p' : output_path ,
228
+ 'v' : sum_traffic_egress / time_between ,
229
+ })
230
+ output_path = NetFlowBot .construct_output_path_prefix (interval_label , DIRECTION_INGRESS , entity_id = None , interface = None )
231
+ values .append ({
232
+ 'p' : output_path ,
233
+ 'v' : sum_traffic_ingress / time_between ,
234
+ })
235
+
236
+ # top N IPs:
237
+ for entity_id , entity_ip in entities :
238
+ for direction in [DIRECTION_EGRESS , DIRECTION_INGRESS ]:
239
+ values .extend (NetFlowBot .get_top_N_IPs_for_entity (interval_label , last_used_ts , max_ts , time_between , direction , entity_id , entity_ip ))
240
+ values .extend (NetFlowBot .get_top_N_IPs_for_entity_interfaces (interval_label , last_used_ts , max_ts , time_between , direction , entity_id , entity_ip ))
241
+ values .extend (NetFlowBot .get_top_N_protocols_for_entity (interval_label , last_used_ts , max_ts , time_between , direction , entity_id , entity_ip ))
242
+ values .extend (NetFlowBot .get_top_N_protocols_for_entity_interfaces (interval_label , last_used_ts , max_ts , time_between , direction , entity_id , entity_ip ))
243
+ values .extend (NetFlowBot .get_top_N_connections_for_entity (interval_label , last_used_ts , max_ts , time_between , direction , entity_id , entity_ip ))
244
+
245
+ if not values :
246
+ log .warning ("No values found to be sent to Grafolean" )
247
+ return
248
+
249
+ # the values are Decimals because they come from BIGINT column, so we must transform
250
+ # them to strings before encoding to JSON:
251
+ values = [{'p' : v ['p' ], 'v' : str (v ['v' ])} for v in values ]
252
+
253
+ # send the data to Grafolean:
254
+ send_results_to_grafolean (
255
+ job_params ['backend_url' ],
256
+ job_params ['bot_token' ],
257
+ account_id ,
258
+ values ,
259
+ )
260
+
261
+ except DBConnectionError :
262
+ log .error (f"{ interval_label } aggregation job for account { account_id } failed due to DB connection issues." )
254
263
255
264
256
265
@staticmethod
0 commit comments