1
1
import argparse
2
+ import base64
2
3
import gzip
3
4
import json
4
5
import logging
5
6
import os
6
7
import errno
7
8
import sys
9
+ import socket
10
+ import struct
8
11
import time
9
12
from collections import defaultdict
10
13
from datetime import datetime
14
17
15
18
from lookup import PROTOCOLS
16
19
from dbutils import migrate_if_needed , get_db_cursor , DB_PREFIX
20
+ from lookup import DIRECTION_INGRESS
21
+
22
+
23
+ # python-netflow-v9-softflowd expects main.py to be the main entrypoint, but we only need
24
+ # parse_packet():
25
+ sys .path .append (os .path .dirname (os .path .realpath (__file__ )) + '/pynetflow' )
26
+ from pynetflow .netflow import parse_packet , UnknownNetFlowVersion , TemplateNotRecognized
17
27
18
28
19
29
IS_DEBUG = os .environ .get ('DEBUG' , 'false' ) in ['true' , 'yes' , '1' ]
26
36
log = logging .getLogger ("{}.{}" .format (__name__ , "writer" ))
27
37
28
38
39
+ # Amount of time to wait before dropping an undecodable ExportPacket
40
+ PACKET_TIMEOUT = 60 * 60
41
+
29
42
def process_named_pipe (named_pipe_filename ):
30
43
try :
31
44
os .mkfifo (named_pipe_filename )
32
45
except OSError as ex :
33
46
if ex .errno != errno .EEXIST :
34
47
raise
35
48
49
+ templates = {}
36
50
while True :
37
51
with open (named_pipe_filename , "rb" ) as fp :
38
52
log .info (f"Opened named pipe { named_pipe_filename } " )
@@ -42,12 +56,28 @@ def process_named_pipe(named_pipe_filename):
42
56
break
43
57
44
58
try :
45
- write_record (json .loads (line ))
59
+ data_b64 , ts , client = json .loads (line )
60
+ data = base64 .b64decode (data_b64 )
61
+
62
+ try :
63
+ export = parse_packet (data , templates )
64
+ write_record (ts , client , export )
65
+ except UnknownNetFlowVersion :
66
+ log .warning ("Unknown NetFlow version" )
67
+ continue
68
+ except TemplateNotRecognized :
69
+ log .warning ("Failed to decode a v9 ExportPacket, template not "
70
+ "recognized (if this happens at the start, it's ok)" )
71
+ continue
72
+
46
73
except Exception as ex :
47
74
log .exception ("Error writing line, skipping..." )
48
75
49
76
50
- def write_record (j ):
77
+ last_record_seqs = {}
78
+
79
+
80
+ def write_record (ts , client , export ):
51
81
# {
52
82
# "DST_AS": 0,
53
83
# "SRC_AS": 0,
@@ -73,30 +103,77 @@ def write_record(j):
73
103
# }
74
104
# https://www.cisco.com/en/US/technologies/tk648/tk362/technologies_white_paper09186a00800a3db9.html#wp9001622
75
105
106
+ client_ip , _ = client
107
+
108
+ # check for missing records:
109
+ last_record_seq = last_record_seqs .get (client_ip )
110
+ if last_record_seq is None :
111
+ log .warning (f"Last record sequence number is not known, starting with { export .header .sequence } " )
112
+ elif export .header .sequence != last_record_seq + 1 :
113
+ log .error (f"Sequence number ({ export .header .sequence } ) does not follow ({ last_record_seq } ), some records might have been skipped" )
114
+ last_record_seqs [client_ip ] = export .header .sequence
115
+
76
116
with get_db_cursor () as c :
77
117
# first save the flow record:
78
- ts = j ['ts' ]
79
- log .debug (f"Received record [{ j ['seq' ]} ]: { datetime .utcfromtimestamp (ts )} from { j ['client' ]} " )
80
- c .execute (f"INSERT INTO { DB_PREFIX } records (ts, client_ip) VALUES (%s, %s) RETURNING seq;" , (ts , j ['client' ],))
118
+ log .debug (f"Received record [{ export .header .sequence } ]: { datetime .utcfromtimestamp (ts )} from { client_ip } " )
119
+ c .execute (f"INSERT INTO { DB_PREFIX } records (ts, client_ip) VALUES (%s, %s) RETURNING seq;" , (ts , client_ip ,))
81
120
record_db_seq = c .fetchone ()[0 ]
82
121
83
122
# then save each of the flows within the record, but use execute_values() to perform bulk insert:
84
- def _get_data (record_db_seq , flows ):
85
- for flow in flows :
86
- yield (
87
- record_db_seq ,
88
- * flow ,
89
- # flow.get('IN_BYTES'),
90
- # flow.get('PROTOCOL'),
91
- # flow.get('DIRECTION'),
92
- # flow.get('L4_DST_PORT'),
93
- # flow.get('L4_SRC_PORT'),
94
- # flow.get('INPUT_SNMP'),
95
- # flow.get('OUTPUT_SNMP'),
96
- # flow.get('IPV4_DST_ADDR'),
97
- # flow.get('IPV4_SRC_ADDR'),
98
- )
99
- data_iterator = _get_data (record_db_seq , j ['flows' ])
123
+ def _get_data (netflow_version , record_db_seq , flows ):
124
+ if netflow_version == 9 :
125
+ for f in flows :
126
+ yield (
127
+ record_db_seq ,
128
+ # "IN_BYTES":
129
+ f .data ["IN_BYTES" ],
130
+ # "PROTOCOL":
131
+ f .data ["PROTOCOL" ],
132
+ # "DIRECTION":
133
+ f .data ["DIRECTION" ],
134
+ # "L4_DST_PORT":
135
+ f .data ["L4_DST_PORT" ],
136
+ # "L4_SRC_PORT":
137
+ f .data ["L4_SRC_PORT" ],
138
+ # "INPUT_SNMP":
139
+ f .data ["INPUT_SNMP" ],
140
+ # "OUTPUT_SNMP":
141
+ f .data ["OUTPUT_SNMP" ],
142
+ # "IPV4_DST_ADDR":
143
+ f .data ["IPV4_DST_ADDR" ],
144
+ # "IPV4_SRC_ADDR":
145
+ f .data ["IPV4_SRC_ADDR" ],
146
+ )
147
+ elif netflow_version == 5 :
148
+ for f in flows :
149
+ yield (
150
+ record_db_seq ,
151
+ # "IN_BYTES":
152
+ f .data ["IN_OCTETS" ],
153
+ # "PROTOCOL":
154
+ f .data ["PROTO" ],
155
+ # "DIRECTION":
156
+ DIRECTION_INGRESS ,
157
+ # "L4_DST_PORT":
158
+ f .data ["DST_PORT" ],
159
+ # "L4_SRC_PORT":
160
+ f .data ["SRC_PORT" ],
161
+ # "INPUT_SNMP":
162
+ f .data ["INPUT" ],
163
+ # "OUTPUT_SNMP":
164
+ f .data ["OUTPUT" ],
165
+ # netflow v5 IP addresses are decoded to integers, which is less suitable for us - pack
166
+ # them back to bytes and transform them to strings:
167
+ # "IPV4_DST_ADDR":
168
+ socket .inet_ntoa (struct .pack ('!I' , f .data ["IPV4_DST_ADDR" ])),
169
+ # "IPV4_SRC_ADDR":
170
+ socket .inet_ntoa (struct .pack ('!I' , f .data ["IPV4_SRC_ADDR" ])),
171
+ )
172
+ else :
173
+ log .error (f"Only Netflow v5 and v9 currently supported, ignoring record (version: [{ export .header .version } ])" )
174
+ return
175
+
176
+ data_iterator = _get_data (export .header .version , record_db_seq , export .flows )
100
177
psycopg2 .extras .execute_values (
101
178
c ,
102
179
f"INSERT INTO { DB_PREFIX } flows (record, IN_BYTES, PROTOCOL, DIRECTION, L4_DST_PORT, L4_SRC_PORT, INPUT_SNMP, OUTPUT_SNMP, IPV4_DST_ADDR, IPV4_SRC_ADDR) VALUES %s" ,
0 commit comments