11import argparse
2+ import base64
23import gzip
34import json
45import logging
56import os
67import errno
78import sys
9+ import socket
10+ import struct
811import time
912from collections import defaultdict
1013from datetime import datetime
1417
1518from lookup import PROTOCOLS
1619from dbutils import migrate_if_needed , get_db_cursor , DB_PREFIX
20+ from lookup import DIRECTION_INGRESS
21+
22+
23+ # python-netflow-v9-softflowd expects main.py to be the main entrypoint, but we only need
24+ # parse_packet():
25+ sys .path .append (os .path .dirname (os .path .realpath (__file__ )) + '/pynetflow' )
26+ from pynetflow .netflow import parse_packet , UnknownNetFlowVersion , TemplateNotRecognized
1727
1828
1929IS_DEBUG = os .environ .get ('DEBUG' , 'false' ) in ['true' , 'yes' , '1' ]
2636log = logging .getLogger ("{}.{}" .format (__name__ , "writer" ))
2737
2838
39+ # Amount of time to wait before dropping an undecodable ExportPacket
40+ PACKET_TIMEOUT = 60 * 60
41+
2942def process_named_pipe (named_pipe_filename ):
3043 try :
3144 os .mkfifo (named_pipe_filename )
3245 except OSError as ex :
3346 if ex .errno != errno .EEXIST :
3447 raise
3548
49+ templates = {}
3650 while True :
3751 with open (named_pipe_filename , "rb" ) as fp :
3852 log .info (f"Opened named pipe { named_pipe_filename } " )
@@ -42,12 +56,28 @@ def process_named_pipe(named_pipe_filename):
4256 break
4357
4458 try :
45- write_record (json .loads (line ))
59+ data_b64 , ts , client = json .loads (line )
60+ data = base64 .b64decode (data_b64 )
61+
62+ try :
63+ export = parse_packet (data , templates )
64+ write_record (ts , client , export )
65+ except UnknownNetFlowVersion :
66+ log .warning ("Unknown NetFlow version" )
67+ continue
68+ except TemplateNotRecognized :
69+ log .warning ("Failed to decode a v9 ExportPacket, template not "
70+ "recognized (if this happens at the start, it's ok)" )
71+ continue
72+
4673 except Exception as ex :
4774 log .exception ("Error writing line, skipping..." )
4875
4976
50- def write_record (j ):
77+ last_record_seqs = {}
78+
79+
80+ def write_record (ts , client , export ):
5181 # {
5282 # "DST_AS": 0,
5383 # "SRC_AS": 0,
@@ -73,30 +103,77 @@ def write_record(j):
73103 # }
74104 # https://www.cisco.com/en/US/technologies/tk648/tk362/technologies_white_paper09186a00800a3db9.html#wp9001622
75105
106+ client_ip , _ = client
107+
108+ # check for missing records:
109+ last_record_seq = last_record_seqs .get (client_ip )
110+ if last_record_seq is None :
111+ log .warning (f"Last record sequence number is not known, starting with { export .header .sequence } " )
112+ elif export .header .sequence != last_record_seq + 1 :
113+ log .error (f"Sequence number ({ export .header .sequence } ) does not follow ({ last_record_seq } ), some records might have been skipped" )
114+ last_record_seqs [client_ip ] = export .header .sequence
115+
76116 with get_db_cursor () as c :
77117 # first save the flow record:
78- ts = j ['ts' ]
79- log .debug (f"Received record [{ j ['seq' ]} ]: { datetime .utcfromtimestamp (ts )} from { j ['client' ]} " )
80- c .execute (f"INSERT INTO { DB_PREFIX } records (ts, client_ip) VALUES (%s, %s) RETURNING seq;" , (ts , j ['client' ],))
118+ log .debug (f"Received record [{ export .header .sequence } ]: { datetime .utcfromtimestamp (ts )} from { client_ip } " )
119+ c .execute (f"INSERT INTO { DB_PREFIX } records (ts, client_ip) VALUES (%s, %s) RETURNING seq;" , (ts , client_ip ,))
81120 record_db_seq = c .fetchone ()[0 ]
82121
83122 # then save each of the flows within the record, but use execute_values() to perform bulk insert:
84- def _get_data (record_db_seq , flows ):
85- for flow in flows :
86- yield (
87- record_db_seq ,
88- * flow ,
89- # flow.get('IN_BYTES'),
90- # flow.get('PROTOCOL'),
91- # flow.get('DIRECTION'),
92- # flow.get('L4_DST_PORT'),
93- # flow.get('L4_SRC_PORT'),
94- # flow.get('INPUT_SNMP'),
95- # flow.get('OUTPUT_SNMP'),
96- # flow.get('IPV4_DST_ADDR'),
97- # flow.get('IPV4_SRC_ADDR'),
98- )
99- data_iterator = _get_data (record_db_seq , j ['flows' ])
123+ def _get_data (netflow_version , record_db_seq , flows ):
124+ if netflow_version == 9 :
125+ for f in flows :
126+ yield (
127+ record_db_seq ,
128+ # "IN_BYTES":
129+ f .data ["IN_BYTES" ],
130+ # "PROTOCOL":
131+ f .data ["PROTOCOL" ],
132+ # "DIRECTION":
133+ f .data ["DIRECTION" ],
134+ # "L4_DST_PORT":
135+ f .data ["L4_DST_PORT" ],
136+ # "L4_SRC_PORT":
137+ f .data ["L4_SRC_PORT" ],
138+ # "INPUT_SNMP":
139+ f .data ["INPUT_SNMP" ],
140+ # "OUTPUT_SNMP":
141+ f .data ["OUTPUT_SNMP" ],
142+ # "IPV4_DST_ADDR":
143+ f .data ["IPV4_DST_ADDR" ],
144+ # "IPV4_SRC_ADDR":
145+ f .data ["IPV4_SRC_ADDR" ],
146+ )
147+ elif netflow_version == 5 :
148+ for f in flows :
149+ yield (
150+ record_db_seq ,
151+ # "IN_BYTES":
152+ f .data ["IN_OCTETS" ],
153+ # "PROTOCOL":
154+ f .data ["PROTO" ],
155+ # "DIRECTION":
156+ DIRECTION_INGRESS ,
157+ # "L4_DST_PORT":
158+ f .data ["DST_PORT" ],
159+ # "L4_SRC_PORT":
160+ f .data ["SRC_PORT" ],
161+ # "INPUT_SNMP":
162+ f .data ["INPUT" ],
163+ # "OUTPUT_SNMP":
164+ f .data ["OUTPUT" ],
165+ # netflow v5 IP addresses are decoded to integers, which is less suitable for us - pack
166+ # them back to bytes and transform them to strings:
167+ # "IPV4_DST_ADDR":
168+ socket .inet_ntoa (struct .pack ('!I' , f .data ["IPV4_DST_ADDR" ])),
169+ # "IPV4_SRC_ADDR":
170+ socket .inet_ntoa (struct .pack ('!I' , f .data ["IPV4_SRC_ADDR" ])),
171+ )
172+ else :
173+ log .error (f"Only Netflow v5 and v9 currently supported, ignoring record (version: [{ export .header .version } ])" )
174+ return
175+
176+ data_iterator = _get_data (export .header .version , record_db_seq , export .flows )
100177 psycopg2 .extras .execute_values (
101178 c ,
102179 f"INSERT INTO { DB_PREFIX } flows (record, IN_BYTES, PROTOCOL, DIRECTION, L4_DST_PORT, L4_SRC_PORT, INPUT_SNMP, OUTPUT_SNMP, IPV4_DST_ADDR, IPV4_SRC_ADDR) VALUES %s" ,
0 commit comments