Skip to content

Commit

Permalink
Store all requests to pages hosted by BugHog and minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
GJFR committed Oct 24, 2024
1 parent 26f822e commit 7338ada
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 47 deletions.
21 changes: 5 additions & 16 deletions bci/evaluations/collectors/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,39 +3,28 @@


class BaseCollector:

def __init__(self) -> None:
self.data = {}

@abstractmethod
def start():
def start(self):
pass

@abstractmethod
def stop():
def stop(self):
pass

@staticmethod
def _parse_bughog_variables(raw_log_lines: list[str], regex) -> list[tuple[str, str]]:
'''
"""
Parses the given `raw_log_lines` for matches against the given `regex`.
'''
"""
data = []
regex_match_lists = [re.findall(regex, line) for line in raw_log_lines if re.search(regex, line)]
# Flatten list
regex_matches = [regex_match for regex_match_list in regex_match_lists for regex_match in regex_match_list]
for match in regex_matches:
var = match[0]
val = match[1]
BaseCollector._add_val_var_pair(var, val, data)
data.append({'var': var, 'val': val})
return data

@staticmethod
def _add_val_var_pair(var: str, val: str, data: list) -> list:
for entry in data:
if entry['var'] == var and entry['val'] == val:
return data
data.append({
'var': var,
'val': val
})
41 changes: 30 additions & 11 deletions bci/evaluations/collectors/requests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import http.server
import json
import logging
import socket
import socketserver
from threading import Thread

Expand All @@ -12,32 +13,49 @@


class RequestHandler(http.server.BaseHTTPRequestHandler):
"""
Handles requests sent to the collector.
"""

def __init__(self, collector, request, client_address, server) -> None:
self.collector = collector
self.request_body = None
super().__init__(request, client_address, server)

def log_message(self, *_):
def log_message(self, format: str, *args) -> None:
"""
Handle and store the received body.
"""
if not self.request_body:
logger.debug('Received request without body')
return

logger.debug(f'Received request with body: {self.request_body}')
request_body = json.loads(self.request_body)
logger.debug(f'Received request information with {len(request_body.keys())} attributes.')
self.collector.data['requests'].append(request_body)

def do_POST(self):
content_length = int(self.headers['Content-Length'])
body = self.rfile.read(content_length)
self.request_body = body.decode('utf-8')
self.send_response(200)
self.end_headers()
self.wfile.write(b'Post request received')
"""
This function is called upon receiving a POST request.
It sets `self.request_body`, which will be parsed later by `self.log_message`.
"""
# We have to read the body before allowing it to be thrashed when connection clusure is confirmed.
if self.headers['Content-Length'] is not None:
content_length = int(self.headers['Content-Length'])
body = self.rfile.read(content_length)
self.request_body = body.decode('utf-8')

# Because of our hacky NGINX methodology, we have to allow premature socket closings.
try:
self.send_response(200)
self.send_header('Content-Type', 'text/plain; charset=utf-8')
self.end_headers()
self.wfile.write('Post request received!\n'.encode('utf-8'))
except socket.error:
logger.debug('Socket closed by NGINX (expected)')

class RequestCollector(BaseCollector):

class RequestCollector(BaseCollector):
def __init__(self):
super().__init__()
self.__httpd = None
Expand All @@ -48,7 +66,7 @@ def __init__(self):
def start(self):
logger.debug('Starting collector...')
socketserver.TCPServer.allow_reuse_address = True
self.__httpd = socketserver.TCPServer(("", PORT), lambda *args, **kwargs: RequestHandler(self, *args, **kwargs))
self.__httpd = socketserver.TCPServer(('', PORT), lambda *args, **kwargs: RequestHandler(self, *args, **kwargs))
# self.__httpd.allow_reuse_address = True
self.__thread = Thread(target=self.__httpd.serve_forever)
self.__thread.start()
Expand All @@ -58,7 +76,8 @@ def stop(self):
regex = r'bughog_(.+)=(.+)'
if self.__httpd:
self.__httpd.shutdown()
self.__thread.join()
if self.__thread:
self.__thread.join()
self.__httpd.server_close()
request_urls = [request['url'] for request in self.data['requests'] if 'url' in request]
data = self._parse_bughog_variables(request_urls, regex)
Expand Down
25 changes: 9 additions & 16 deletions bci/web/blueprints/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,10 @@ def index():

@exp.route("/report/", methods=["GET", "POST"])
def report():
leak = request.args.get("leak")
if leak is not None:
resp = make_response(
render_template("cookies.html", title="Report", to_report=leak)
)
else:
resp = make_response(
render_template(
"cookies.html", title="Report", to_report="Nothing to report"
)
)
get_params = [item for item in get_all_GET_parameters(request).items()]
resp = make_response(
render_template("cookies.html", title="Report", get_params=get_params)
)

cookie_exp_date = datetime.datetime.now() + datetime.timedelta(weeks=4)
resp.set_cookie("generic", "1", expires=cookie_exp_date)
Expand Down Expand Up @@ -86,7 +79,7 @@ def report_leak_if_using_http(target_scheme):
Triggers request to /report/ if a request was received over the specified `scheme`.
"""
used_scheme = request.headers.get("X-Forwarded-Proto")
params = get_all_bughog_GET_parameters(request)
params = get_all_GET_parameters(request)
if used_scheme == target_scheme:
return "Redirect", 307, {"Location": url_for("experiments.report", **params)}
else:
Expand All @@ -101,7 +94,7 @@ def report_leak_if_present(expected_header_name: str):
if expected_header_name not in request.headers:
return f"Header {expected_header_name} not found", 200, {"Allow-CSP-From": "*"}

params = get_all_bughog_GET_parameters(request)
params = get_all_GET_parameters(request)
return (
"Redirect",
307,
Expand All @@ -126,7 +119,7 @@ def report_leak_if_contains(expected_header_name: str, expected_header_value: st
{"Allow-CSP-From": "*"},
)

params = get_all_bughog_GET_parameters(request)
params = get_all_GET_parameters(request)
return (
"Redirect",
307,
Expand All @@ -137,5 +130,5 @@ def report_leak_if_contains(expected_header_name: str, expected_header_value: st
)


def get_all_bughog_GET_parameters(request):
return {k: v for k, v in request.args.items() if k.startswith("bughog_")}
def get_all_GET_parameters(request):
return {k: v for k, v in request.args.items()}
13 changes: 9 additions & 4 deletions bci/web/templates/cookies.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@

{% block content %}

{% if to_report %}
{% if get_params %}
<h2>Reported: </h2>

<p>{{ to_report }}</p>
<ul>
{% for get_param in get_params %}
<li>{{ get_param[0] }}: {{ get_param[1] }}</li>
{% endfor %}
</ul>
{% else %}
No GET parameters.
{% endif %}

<h2>Cookies</h2>
Expand All @@ -20,4 +25,4 @@ <h2>Cookies</h2>
elem.appendChild(cookie);
}
</script>
{% endblock %}
{% endblock %}
10 changes: 10 additions & 0 deletions nginx/config/experiments.conf
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
access_log /logs/nginx-access-poc.log default_format;

location /res/ {
include /etc/nginx/config/notify_collector.conf;
error_page 504 @resources;
}

location @resources {
root /www/data;
}

Expand Down Expand Up @@ -48,6 +53,11 @@ location ~ ^/(.+)/(.+)/(.+)/$ {
}

location ~ ^/(.+)/(.+)/(.+)$ {
include /etc/nginx/config/notify_collector.conf;
error_page 504 @experiment;
}

location @experiment {
rewrite ^/(.+)$ /$1/;
}

Expand Down
30 changes: 30 additions & 0 deletions nginx/config/notify_collector.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# We want to notify worker-specific request collectors of every request to our experiment server.
# Hacky solution: pretend that the collector is not reachable and use fallback to serve experiment page.
# So:
# 1. NGINX will send request to request collector of worker.
# 2. NGINX will serve the experiment page.

proxy_pass http://$remote_addr:5001/report/;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;

proxy_method POST;
proxy_set_header Content-Type "application/json";

set $request_body_data '';
if ($request_body) {
set $request_body_data "$request_body";
}

set $url '"url": "${scheme}://${host}${request_uri}"';
set $method '"method": "$request_method"';
set $content '"content": "${request_body_data}"';
set $report '{${url}, ${method}, ${content}}';
proxy_set_body $report;

# We don't need any response, we merely want to notify
proxy_connect_timeout 2s;
proxy_send_timeout 2s;
proxy_read_timeout 0s; # Force a 504 by setting timeout to 0

0 comments on commit 7338ada

Please sign in to comment.