From e68bd2e020292f5fdb5fd0dd76dac7a2a9e783c6 Mon Sep 17 00:00:00 2001 From: Aditya Pahuja Date: Wed, 29 Jul 2020 20:41:21 +0100 Subject: [PATCH] Add "--use-http-x-forwarded-for" option for NGINX Updated the script to pick up an ip address from the `http_x_forwarded_for` field instead of the `ip` field in NGINX JSON formatted log file provided that the "--use-http-x-forwarded-for" option is used. If the `http_x_forwarded_for` field contains "-" or is missing, the script will pick up the ip address from the `ip` field. This option is useful if the `ip` field does not contain client's ip address (e.g. using NGINX as a reverse proxy). Updated README file to include a field `http_x_forwarded_for` in NGINX log file and information about the usage of "--use-http-x-forwarded-for" option. Added two test cases to test the "--use-http-x-forwarded-for" option to ensure that the script works properly. --- README.md | 5 +++- import_logs.py | 14 ++++++++++ tests/logs/nginx_json.log | 5 ++++ tests/tests.py | 59 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 816f908..edd2024 100644 --- a/README.md +++ b/README.md @@ -177,12 +177,15 @@ As a syslog central server you could use rsyslog or syslog-ng, use relevant part You can use any log format that this script can handle, like Apache Combined, and Json format which needs less processing. +The script has an option "--use-http-x-forwarded-for" which causes the script to pick up an ip address from the `http_x_forwarded_for` field instead of the `ip` field in Nginx JSON formatted log file. If the `http_x_forwarded_for` field contains "-" or is missing, the script will pick up the ip address from the `ip` field. This option is useful if the `ip` field does not contain client's ip address (e.g. using Nginx as a reverse proxy). + ##### Setup Nginx logs ``` http { ... -log_format matomo '{"ip": "$remote_addr",' +log_format matomo '{"ip": "$remote_addr",' + '"http_x_forwarded_for": "$http_x_forwarded_for",' '"host": "$host",' '"path": "$request_uri",' '"status": "$status",' diff --git a/import_logs.py b/import_logs.py index c704d67..528d11a 100755 --- a/import_logs.py +++ b/import_logs.py @@ -176,6 +176,15 @@ def get(self, key): # Some ugly patchs ... if key == 'generation_time_milli': self.json[key] = int(float(self.json[key]) * 1000) + elif key == 'ip': + try: + if config.options.use_http_x_forwarded_for and 'http_x_forwarded_for' in self.json: + ips = self.json['http_x_forwarded_for'] + if ips != '-': + return re.search(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4})', ips).group() + return self.json[key] + except KeyError: + raise BaseFormatException() # Patch date format ISO 8601 elif key == 'date': tz = self.json[key][19:] @@ -891,6 +900,11 @@ def _create_parser(self): default=False, help="Do not verify the SSL / TLS certificate when contacting the Matomo server. This is the default when running on Python 2.7.8 or older." ) + option_parser.add_option( + '--use-http-x-forwarded-for', dest='use_http_x_forwarded_for', + action='store_true', default=False, + help="Use http_x_forwarded_for for ip in Nginx JSON formatted log file. If http_x_forwarded_for is not present, it will use ip." + ) return option_parser def _set_date(self, option_attr_name, option, opt_str, value, parser): diff --git a/tests/logs/nginx_json.log b/tests/logs/nginx_json.log index 63dc3a1..8a70bb0 100644 --- a/tests/logs/nginx_json.log +++ b/tests/logs/nginx_json.log @@ -1,2 +1,7 @@ {"idsite":1,"ip": "1.2.3.4","host": "www.piwik.org","path": "/piwik.php?action_name=Clearcode%20-%20Web%20and%20Mobile%20Development%20%7C%20Technology%20With%20Passion&idsite=1&rec=1&r=983420&h=17&m=31&s=25&url=http%3A%2F%2Fclearcode.cc%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050","status": "200","referrer": "http://clearcode.cc/","user_agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17","length": 192,"generation_time_milli": 0.008,"date": "2013-10-10T16:52:00+02:00"} {"idsite":1,"ip": "1.2.3.4","host": "www.piwik.org","path": "/piwik.php?action_name=AdviserBrief%20-%20Track%20Your%20Investments%20and%20Plan%20Financial%20Future%20%7C%20Clearcode&idsite=1&rec=1&r=109464&h=17&m=31&s=40&url=http%3A%2F%2Fclearcode.cc%2Fcase%2Fadviserbrief-track-your-investments-and-plan-financial-future%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050","status": "200","referrer": "http://clearcode.cc/","user_agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17","length": 192,"generation_time_milli": 0.008,"date": "2013-10-10T16:52:00+02:00"} +{"idsite":1,"ip": "0:0:0:0:0:ffff:7b2d:4350","host": "www.piwik.org","path": "/piwik.php?action_name=Clearcode%20-%20Web%20and%20Mobile%20Development%20%7C%20Technology%20With%20Passion&idsite=1&rec=1&r=983420&h=17&m=31&s=25&url=http%3A%2F%2Fclearcode.cc%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050","status": "200","referrer": "http://clearcode.cc/","user_agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17","length": 192,"generation_time_milli": 0.008,"date": "2013-10-10T16:52:00+02:00"} +{"idsite":1,"ip": "1.2.3.5","http_x_forwarded_for": "-","host": "www.piwik.org","path": "/piwik.php?action_name=Clearcode%20-%20Web%20and%20Mobile%20Development%20%7C%20Technology%20With%20Passion&idsite=1&rec=1&r=983420&h=17&m=31&s=25&url=http%3A%2F%2Fclearcode.cc%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050","status": "200","referrer": "http://clearcode.cc/","user_agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17","length": 192,"generation_time_milli": 0.008,"date": "2013-10-10T16:52:00+02:00"} +{"idsite":1,"ip": "0:0:0:0:0:ffff:7b2d:4351","http_x_forwarded_for": "-","host": "www.piwik.org","path": "/piwik.php?action_name=Clearcode%20-%20Web%20and%20Mobile%20Development%20%7C%20Technology%20With%20Passion&idsite=1&rec=1&r=983420&h=17&m=31&s=25&url=http%3A%2F%2Fclearcode.cc%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050","status": "200","referrer": "http://clearcode.cc/","user_agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17","length": 192,"generation_time_milli": 0.008,"date": "2013-10-10T16:52:00+02:00"} +{"idsite":1,"ip": "1.2.3.6","http_x_forwarded_for": "4.3.2.1 1.1.1.1 2.2.2.2","host": "www.piwik.org","path": "/piwik.php?action_name=Clearcode%20-%20Web%20and%20Mobile%20Development%20%7C%20Technology%20With%20Passion&idsite=1&rec=1&r=983420&h=17&m=31&s=25&url=http%3A%2F%2Fclearcode.cc%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050","status": "200","referrer": "http://clearcode.cc/","user_agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17","length": 192,"generation_time_milli": 0.008,"date": "2013-10-10T16:52:00+02:00"} +{"idsite":1,"ip": "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff","http_x_forwarded_for": "0:0:0:0:0:ffff:7b2d:4359 0:0:0:0:0:0:7b2d:4359","host": "www.piwik.org","path": "/piwik.php?action_name=Clearcode%20-%20Web%20and%20Mobile%20Development%20%7C%20Technology%20With%20Passion&idsite=1&rec=1&r=983420&h=17&m=31&s=25&url=http%3A%2F%2Fclearcode.cc%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050","status": "200","referrer": "http://clearcode.cc/","user_agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17","length": 192,"generation_time_milli": 0.008,"date": "2013-10-10T16:52:00+02:00"} diff --git a/tests/tests.py b/tests/tests.py index e25f3aa..b26248d 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -196,6 +196,7 @@ def __init__(self): self.track_http_method = True self.seconds_to_add_to_date = 0 self.request_suffix = None + self.use_http_x_forwarded_for = False class Config(object): """Mock configuration.""" @@ -548,6 +549,64 @@ def test_iis_custom_format(): assert hits[2]['full_path'] == u'/hello/world/6,681965' assert hits[2]['user_agent'] == u'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36' +def test_nginx_parsing_using_use_http_x_forwarded_for_option_set_to_true(): + """test parsing of nginx_json log with use_http_x_forwarded_for set to true""" + + file_ = 'logs/nginx_json.log' + + # have to override previous globals override for this test + import_logs.config.options.custom_w3c_fields = {} + Recorder.recorders = [] + import_logs.parser = import_logs.Parser() + import_logs.config.format = None + import_logs.config.options.enable_http_redirects = True + import_logs.config.options.enable_http_errors = True + import_logs.config.options.replay_tracking = True + import_logs.config.options.use_http_x_forwarded_for = True + + import_logs.parser.parse(file_) + + hits = [hit.__dict__ for hit in Recorder.recorders] + + assert hits[0]['ip'] == u'1.2.3.4' + assert hits[1]['ip'] == u'1.2.3.4' + assert hits[2]['ip'] == u'0:0:0:0:0:ffff:7b2d:4350' + assert hits[3]['ip'] == u'1.2.3.5' + assert hits[4]['ip'] == u'0:0:0:0:0:ffff:7b2d:4351' + assert hits[5]['ip'] == u'4.3.2.1' + assert hits[6]['ip'] == u'0:0:0:0:0:ffff:7b2d:4359' + + assert len(hits) == 7 + +def test_nginx_parsing_using_use_http_x_forwarded_for_option_set_to_false(): + """test parsing of nginx_json log with use_http_x_forwarded_for set to false""" + + file_ = 'logs/nginx_json.log' + + # have to override previous globals override for this test + import_logs.config.options.custom_w3c_fields = {} + Recorder.recorders = [] + import_logs.parser = import_logs.Parser() + import_logs.config.format = None + import_logs.config.options.enable_http_redirects = True + import_logs.config.options.enable_http_errors = True + import_logs.config.options.replay_tracking = True + import_logs.config.options.use_http_x_forwarded_for = False + import_logs.parser.parse(file_) + + print(import_logs.config.options.use_http_x_forwarded_for) + hits = [hit.__dict__ for hit in Recorder.recorders] + + assert hits[0]['ip'] == u'1.2.3.4' + assert hits[1]['ip'] == u'1.2.3.4' + assert hits[2]['ip'] == u'0:0:0:0:0:ffff:7b2d:4350' + assert hits[3]['ip'] == u'1.2.3.5' + assert hits[4]['ip'] == u'0:0:0:0:0:ffff:7b2d:4351' + assert hits[5]['ip'] == u'1.2.3.6' + assert hits[6]['ip'] == u'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' + + assert len(hits) == 7 + def test_netscaler_parsing(): """test parsing of netscaler logs (which use extended W3C log format)"""