Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add "--use-http-x-forwarded-for" option for NGINX #277

Open
wants to merge 1 commit into
base: 3.x-dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -177,12 +177,15 @@ As a syslog central server you could use rsyslog or syslog-ng, use relevant part

You can use any log format that this script can handle, like Apache Combined, and Json format which needs less processing.

The script has an option "--use-http-x-forwarded-for" which causes the script to pick up an ip address from the `http_x_forwarded_for` field instead of the `ip` field in Nginx JSON formatted log file. If the `http_x_forwarded_for` field contains "-" or is missing, the script will pick up the ip address from the `ip` field. This option is useful if the `ip` field does not contain client's ip address (e.g. using Nginx as a reverse proxy).

##### Setup Nginx logs

```
http {
...
log_format matomo '{"ip": "$remote_addr",'
log_format matomo '{"ip": "$remote_addr",'
'"http_x_forwarded_for": "$http_x_forwarded_for",'
'"host": "$host",'
'"path": "$request_uri",'
'"status": "$status",'
Expand Down
14 changes: 14 additions & 0 deletions import_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,15 @@ def get(self, key):
# Some ugly patchs ...
if key == 'generation_time_milli':
self.json[key] = int(float(self.json[key]) * 1000)
elif key == 'ip':
try:
if config.options.use_http_x_forwarded_for and 'http_x_forwarded_for' in self.json:
ips = self.json['http_x_forwarded_for']
if ips != '-':
return re.search(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4})', ips).group()
return self.json[key]
except KeyError:
raise BaseFormatException()
# Patch date format ISO 8601
elif key == 'date':
tz = self.json[key][19:]
Expand Down Expand Up @@ -891,6 +900,11 @@ def _create_parser(self):
default=False,
help="Do not verify the SSL / TLS certificate when contacting the Matomo server. This is the default when running on Python 2.7.8 or older."
)
option_parser.add_option(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For new versions it will be
parser.add_argument(

'--use-http-x-forwarded-for', dest='use_http_x_forwarded_for',
action='store_true', default=False,
help="Use http_x_forwarded_for for ip in Nginx JSON formatted log file. If http_x_forwarded_for is not present, it will use ip."
)
return option_parser

def _set_date(self, option_attr_name, option, opt_str, value, parser):
Expand Down
5 changes: 5 additions & 0 deletions tests/logs/nginx_json.log
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
{"idsite":1,"ip": "1.2.3.4","host": "www.piwik.org","path": "/piwik.php?action_name=Clearcode%20-%20Web%20and%20Mobile%20Development%20%7C%20Technology%20With%20Passion&idsite=1&rec=1&r=983420&h=17&m=31&s=25&url=http%3A%2F%2Fclearcode.cc%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050","status": "200","referrer": "http://clearcode.cc/","user_agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17","length": 192,"generation_time_milli": 0.008,"date": "2013-10-10T16:52:00+02:00"}
{"idsite":1,"ip": "1.2.3.4","host": "www.piwik.org","path": "/piwik.php?action_name=AdviserBrief%20-%20Track%20Your%20Investments%20and%20Plan%20Financial%20Future%20%7C%20Clearcode&idsite=1&rec=1&r=109464&h=17&m=31&s=40&url=http%3A%2F%2Fclearcode.cc%2Fcase%2Fadviserbrief-track-your-investments-and-plan-financial-future%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050","status": "200","referrer": "http://clearcode.cc/","user_agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17","length": 192,"generation_time_milli": 0.008,"date": "2013-10-10T16:52:00+02:00"}
{"idsite":1,"ip": "0:0:0:0:0:ffff:7b2d:4350","host": "www.piwik.org","path": "/piwik.php?action_name=Clearcode%20-%20Web%20and%20Mobile%20Development%20%7C%20Technology%20With%20Passion&idsite=1&rec=1&r=983420&h=17&m=31&s=25&url=http%3A%2F%2Fclearcode.cc%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050","status": "200","referrer": "http://clearcode.cc/","user_agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17","length": 192,"generation_time_milli": 0.008,"date": "2013-10-10T16:52:00+02:00"}
{"idsite":1,"ip": "1.2.3.5","http_x_forwarded_for": "-","host": "www.piwik.org","path": "/piwik.php?action_name=Clearcode%20-%20Web%20and%20Mobile%20Development%20%7C%20Technology%20With%20Passion&idsite=1&rec=1&r=983420&h=17&m=31&s=25&url=http%3A%2F%2Fclearcode.cc%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050","status": "200","referrer": "http://clearcode.cc/","user_agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17","length": 192,"generation_time_milli": 0.008,"date": "2013-10-10T16:52:00+02:00"}
{"idsite":1,"ip": "0:0:0:0:0:ffff:7b2d:4351","http_x_forwarded_for": "-","host": "www.piwik.org","path": "/piwik.php?action_name=Clearcode%20-%20Web%20and%20Mobile%20Development%20%7C%20Technology%20With%20Passion&idsite=1&rec=1&r=983420&h=17&m=31&s=25&url=http%3A%2F%2Fclearcode.cc%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050","status": "200","referrer": "http://clearcode.cc/","user_agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17","length": 192,"generation_time_milli": 0.008,"date": "2013-10-10T16:52:00+02:00"}
{"idsite":1,"ip": "1.2.3.6","http_x_forwarded_for": "4.3.2.1 1.1.1.1 2.2.2.2","host": "www.piwik.org","path": "/piwik.php?action_name=Clearcode%20-%20Web%20and%20Mobile%20Development%20%7C%20Technology%20With%20Passion&idsite=1&rec=1&r=983420&h=17&m=31&s=25&url=http%3A%2F%2Fclearcode.cc%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050","status": "200","referrer": "http://clearcode.cc/","user_agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17","length": 192,"generation_time_milli": 0.008,"date": "2013-10-10T16:52:00+02:00"}
{"idsite":1,"ip": "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff","http_x_forwarded_for": "0:0:0:0:0:ffff:7b2d:4359 0:0:0:0:0:0:7b2d:4359","host": "www.piwik.org","path": "/piwik.php?action_name=Clearcode%20-%20Web%20and%20Mobile%20Development%20%7C%20Technology%20With%20Passion&idsite=1&rec=1&r=983420&h=17&m=31&s=25&url=http%3A%2F%2Fclearcode.cc%2F&urlref=http%3A%2F%2Fclearcode.cc%2Fwelcome&_id=1da79fc743e8bcc4&_idts=1360047661&_idvc=1&_idn=0&_refts=1360047661&_viewts=1360047661&_ref=http%3A%2F%2Fpiwik.org%2Fthank-you-all%2F&pdf=1&qt=1&realp=0&wma=1&dir=1&fla=1&java=1&gears=0&ag=1&cookie=1&res=1680x1050","status": "200","referrer": "http://clearcode.cc/","user_agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17","length": 192,"generation_time_milli": 0.008,"date": "2013-10-10T16:52:00+02:00"}
59 changes: 59 additions & 0 deletions tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ def __init__(self):
self.track_http_method = True
self.seconds_to_add_to_date = 0
self.request_suffix = None
self.use_http_x_forwarded_for = False

class Config(object):
"""Mock configuration."""
Expand Down Expand Up @@ -548,6 +549,64 @@ def test_iis_custom_format():
assert hits[2]['full_path'] == u'/hello/world/6,681965'
assert hits[2]['user_agent'] == u'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36'

def test_nginx_parsing_using_use_http_x_forwarded_for_option_set_to_true():
"""test parsing of nginx_json log with use_http_x_forwarded_for set to true"""

file_ = 'logs/nginx_json.log'

# have to override previous globals override for this test
import_logs.config.options.custom_w3c_fields = {}
Recorder.recorders = []
import_logs.parser = import_logs.Parser()
import_logs.config.format = None
import_logs.config.options.enable_http_redirects = True
import_logs.config.options.enable_http_errors = True
import_logs.config.options.replay_tracking = True
import_logs.config.options.use_http_x_forwarded_for = True

import_logs.parser.parse(file_)

hits = [hit.__dict__ for hit in Recorder.recorders]

assert hits[0]['ip'] == u'1.2.3.4'
assert hits[1]['ip'] == u'1.2.3.4'
assert hits[2]['ip'] == u'0:0:0:0:0:ffff:7b2d:4350'
assert hits[3]['ip'] == u'1.2.3.5'
assert hits[4]['ip'] == u'0:0:0:0:0:ffff:7b2d:4351'
assert hits[5]['ip'] == u'4.3.2.1'
assert hits[6]['ip'] == u'0:0:0:0:0:ffff:7b2d:4359'

assert len(hits) == 7

def test_nginx_parsing_using_use_http_x_forwarded_for_option_set_to_false():
"""test parsing of nginx_json log with use_http_x_forwarded_for set to false"""

file_ = 'logs/nginx_json.log'

# have to override previous globals override for this test
import_logs.config.options.custom_w3c_fields = {}
Recorder.recorders = []
import_logs.parser = import_logs.Parser()
import_logs.config.format = None
import_logs.config.options.enable_http_redirects = True
import_logs.config.options.enable_http_errors = True
import_logs.config.options.replay_tracking = True
import_logs.config.options.use_http_x_forwarded_for = False
import_logs.parser.parse(file_)

print(import_logs.config.options.use_http_x_forwarded_for)
hits = [hit.__dict__ for hit in Recorder.recorders]

assert hits[0]['ip'] == u'1.2.3.4'
assert hits[1]['ip'] == u'1.2.3.4'
assert hits[2]['ip'] == u'0:0:0:0:0:ffff:7b2d:4350'
assert hits[3]['ip'] == u'1.2.3.5'
assert hits[4]['ip'] == u'0:0:0:0:0:ffff:7b2d:4351'
assert hits[5]['ip'] == u'1.2.3.6'
assert hits[6]['ip'] == u'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'

assert len(hits) == 7

def test_netscaler_parsing():
"""test parsing of netscaler logs (which use extended W3C log format)"""

Expand Down