Improve reproducibility reporting and small fixes.

- Reproducibility reporting improvements - Docker compose improvements - Attempt at log collectiony
DistriNet · Mar 21, 2024 · 127669c · 127669c
1 parent 8efbb4c
commit 127669c
Show file tree

Hide file tree

Showing 20 changed files with 234 additions and 51 deletions.
diff --git a/analysis/plot_factory.py b/analysis/plot_factory.py
@@ -105,17 +105,24 @@ def __add_outcome_info(params: PlotParameters, docs: dict):
         target_mech_id = params.target_mech_id if params.target_mech_id else params.mech_group
 
         for doc in docs:
+            # Backwards compatibility
             requests_to_target = list(filter(lambda x: f'/report/?leak={target_mech_id}' in x['url'], doc['results']['requests']))
-            requests_to_baseline = list(filter(lambda x: '/report/?leak=baseline' in x['url'], doc['results']['requests']))
+            # New way
+            if  [req_var for req_var in doc['results']['req_vars'] if req_var['var'] == 'reproduced' and req_var['val'] == 'OK'] or \
+                [log_var for log_var in doc['results']['log_vars'] if log_var['var'] == 'reproduced' and log_var['val'] == 'OK']:
+                reproduced = True
+            else:
+                reproduced = False
+
             new_doc = {
                 'revision_number': doc['revision_number'],
                 'browser_version': int(doc['browser_version'].split('.')[0]),
                 'browser_version_str': doc['browser_version'].split('.')[0]
             }
-            if doc['dirty'] or len(requests_to_baseline) == 0:
+            if doc['dirty']:
                 new_doc['outcome'] = 'Error'
                 docs_with_outcome.append(new_doc)
-            elif len(requests_to_target) > 0:
+            elif len(requests_to_target) > 0 or reproduced:
                 new_doc['outcome'] = 'Reproduced'
                 docs_with_outcome.append(new_doc)
             else:

diff --git a/bci/browser/automation/terminal.py b/bci/browser/automation/terminal.py
@@ -3,8 +3,7 @@
 import subprocess
 import time
 
-
-logger = logging.getLogger('bci')
+logger = logging.getLogger(__name__)
 
 
 class TerminalAutomation:
@@ -14,11 +13,12 @@ def run(url: str, args: list[str], seconds_per_visit: int):
         logger.debug("Starting browser process...")
         args.append(url)
         logger.debug(f'Command string: \'{" ".join(args)}\'')
-        proc = subprocess.Popen(
-            args,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE
-        )
+        with open('/tmp/browser.log', 'a') as file:
+            proc = subprocess.Popen(
+                args,
+                stdout=file,
+                stderr=file
+            )
 
         time.sleep(seconds_per_visit)
 

diff --git a/bci/browser/configuration/chromium.py b/bci/browser/configuration/chromium.py
@@ -11,7 +11,6 @@
 SELENIUM_USED_FLAGS = [
     '--use-fake-ui-for-media-stream',
     '--ignore-certificate-errors',
-    '--use-fake-ui-for-media-stream',
     '--disable-background-networking',
     '--disable-client-side-phishing-detection',
     '--disable-component-update',
@@ -22,15 +21,12 @@
     '--disable-prompt-on-repost',
     '--disable-sync',
     '--disable-web-resources',
-    '--enable-logging',
-    '--log-level=0',
     '--metrics-recording-only',
     '--no-first-run',
     '--password-store=basic',
     '--safebrowsing-disable-auto-update',
     '--use-mock-keychain',
     '--no-sandbox',
-    '--ignore-certificate-errors'
 ]
 
 
@@ -41,6 +37,14 @@ def _get_terminal_args(self) -> list[str]:
 
         args = [self._get_executable_file_path()]
         args.append(f'--user-data-dir={self._profile_path}')
+        # Enable logging
+        args.append('--enable-logging')
+        args.append('--v=1')
+        args.append('--log-level=0')
+        # Headless changed from version +/- 110 onwards: https://developer.chrome.com/docs/chromium/new-headless
+        # Using the `--headless` flag will crash the browser for these later versions.
+        # Also see: https://github.com/DistriNet/BugHog/issues/12
+        # args.append('--headless=new')  # From Chrome
 
         if 'btpc' in self.browser_config.browser_setting:
             # This is handled in the profile folder

diff --git a/bci/database/mongo/mongodb.py b/bci/database/mongo/mongodb.py
@@ -98,7 +98,7 @@ def store_result(self, result: TestResult):
             'revision_id': result.params.state.revision_id,
             'revision_number': result.params.state.revision_number,
             'mech_group': result.params.mech_group,
-            'results': result.requests,
+            'results': result.data,
             'dirty': result.is_dirty,
             'ts': str(datetime.now(timezone.utc).replace(microsecond=0))
         }
@@ -119,13 +119,15 @@ def get_result(self, params: TestParameters) -> TestResult:
         collection = self.__get_data_collection(params)
         query = self.__to_query(params)
         document = collection.find_one(query)
-        return TestResult(
-            params,
-            document['browser_version'],
-            document['binary_origin'],
-            requests=document['results']['requests'] if 'requests' in document['results'] else None,
-            is_dirty=document['dirty']
-        )
+        if document:
+            return params.create_test_result_with(
+                document['browser_version'],
+                document['binary_origin'],
+                document['results'],
+                document['dirty']
+            )
+        else:
+            logger.error(f'Could not find document for query {query}')
 
     def has_result(self, params: TestParameters) -> bool:
         collection = self.__get_data_collection(params)

diff --git a/bci/evaluations/collector.py b/bci/evaluations/collector.py
@@ -0,0 +1,48 @@
+from abc import abstractmethod
+from enum import Enum
+import logging
+
+from bci.evaluations.collectors.base import BaseCollector
+
+from .collectors.requests import RequestCollector
+from .collectors.logs import LogCollector
+
+logger = logging.getLogger(__name__)
+
+
+class Type(Enum):
+    REQUESTS = 1
+    LOGS = 2
+
+
+class Collector:
+
+    def __init__(self, types: list[Type]) -> None:
+        self.collectors: list[BaseCollector] = []
+        if Type.REQUESTS in types:
+            collector = RequestCollector()
+            self.collectors.append(collector)
+        if Type.LOGS in types:
+            collector = LogCollector()
+            self.collectors.append(collector)
+        logger.debug(f'Using {len(self.collectors)} result collectors')
+
+    def start(self):
+        for collector in self.collectors:
+            collector.start()
+
+    def stop(self):
+        for collector in self.collectors:
+            collector.stop()
+
+    @abstractmethod
+    def collect_results(self) -> dict:
+        all_data = {}
+        for collector in self.collectors:
+            all_data.update(collector.data)
+        logger.debug(f'Collected data: {all_data}')
+        return all_data
+
+
+
+
diff --git a/bci/evaluations/collectors/base.py b/bci/evaluations/collectors/base.py
@@ -0,0 +1,42 @@
+import re
+from abc import abstractmethod
+
+
+class BaseCollector:
+
+    def __init__(self) -> None:
+        self.data = {}
+
+    @abstractmethod
+    def start():
+        pass
+
+    @abstractmethod
+    def stop():
+        pass
+
+    @staticmethod
+    def _parse_bughog_variables(raw_log_lines: list[str], regex) -> list[tuple[str, str]]:
+        '''
+        Parses the given `raw_log_lines` for matches against the given `regex`.
+        '''
+        data = []
+        regex_match_lists = [re.findall(regex, line) for line in raw_log_lines if re.search(regex, line)]
+        # Flatten list
+        regex_matches = [regex_match for regex_match_list in regex_match_lists for regex_match in regex_match_list]
+        for match in regex_matches:
+            var = match[0]
+            val = match[1]
+            BaseCollector._add_val_var_pair(var, val, data)
+        return data
+
+
+    @staticmethod
+    def _add_val_var_pair(var: str, val: str, data: list) -> list:
+        for entry in data:
+            if entry['var'] == var and entry['val'] == val:
+                return data
+        data.append({
+            'var': var,
+            'val': val
+        })
diff --git a/bci/evaluations/collectors/logs.py b/bci/evaluations/collectors/logs.py
@@ -0,0 +1,21 @@
+from .base import BaseCollector
+
+
+class LogCollector(BaseCollector):
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.data['log_vars'] = []
+
+    def start(self):
+        with open('/tmp/browser.log', 'w') as file:
+            file.write('')
+
+    def stop(self):
+        data = []
+        regex = r'\+\+\+bughog_(.+)=(.+)\+\+\+'
+        with open('/tmp/browser.log', 'r+') as log_file:
+            log_lines = [line for line in log_file.readlines()]
+            log_file.write('')
+        data = self._parse_bughog_variables(log_lines, regex)
+        self.data['log_vars'] = data
diff --git a/bci/http/collector.py → bci/evaluations/collectors/requests.py b/bci/http/collector.py → bci/evaluations/collectors/requests.py
@@ -1,10 +1,11 @@
-
 import http.server
 import json
 import logging
 import socketserver
 from threading import Thread
 
+from .base import BaseCollector
+
 logger = logging.getLogger(__name__)
 
 PORT = 5001
@@ -24,7 +25,7 @@ def log_message(self, *_):
 
         logger.debug(f'Received request with body: {self.request_body}')
         request_body = json.loads(self.request_body)
-        self.collector.requests.append(request_body)
+        self.collector.data['requests'].append(request_body)
 
     def do_POST(self):
         content_length = int(self.headers['Content-Length'])
@@ -35,12 +36,14 @@ def do_POST(self):
         self.wfile.write(b'Post request received')
 
 
-class Collector:
+class RequestCollector(BaseCollector):
 
     def __init__(self):
+        super().__init__()
         self.__httpd = None
         self.__thread = None
-        self.requests = []
+        self.data['requests'] = []
+        self.data['req_vars'] = []
 
     def start(self):
         logger.debug('Starting collector...')
@@ -51,8 +54,12 @@ def start(self):
         self.__thread.start()
 
     def stop(self):
-        logger.debug('Stopping collector...')
+        data = []
+        regex = r'bughog_(.+)=(.+)'
         if self.__httpd:
             self.__httpd.shutdown()
             self.__thread.join()
             self.__httpd.server_close()
+        request_urls = [request['url'] for request in self.data['requests']]
+        data = self._parse_bughog_variables(request_urls, regex)
+        self.data['req_vars'] = data
diff --git a/bci/evaluations/custom/custom_evaluation.py b/bci/evaluations/custom/custom_evaluation.py
@@ -1,14 +1,14 @@
 import logging
 import os
 from unittest import TestResult
-from bci.browser.configuration.browser import Browser
 
+from bci.browser.configuration.browser import Browser
 from bci.configuration import Global
+from bci.evaluations.collector import Collector
+from bci.evaluations.collector import Type
 from bci.evaluations.custom.custom_mongodb import CustomMongoDB
 from bci.evaluations.evaluation_framework import EvaluationFramework
 from bci.evaluations.logic import TestParameters
-from bci.http.collector import Collector
-
 
 logger = logging.getLogger(__name__)
 
@@ -45,7 +45,7 @@ def initialize_tests_and_url_queues(self):
                         if os.path.exists(main_folder_path):
                             self.tests_per_project[project_name][test_name] = [
                                 f'https://{domain}/{project_name}/{test_name}/main',
-                                'https://a.test/report/?leak=baseline'
+                                'https://a.test/report/?bughog_sanity_check=OK'
                             ]
                             self.tests[test_name] = self.tests_per_project[project_name][test_name]
 
@@ -54,7 +54,7 @@ def perform_specific_evaluation(self, browser: Browser, params: TestParameters)
         browser_version = browser.version
         binary_origin = browser.get_binary_origin()
 
-        collector = Collector()
+        collector = Collector([Type.REQUESTS, Type.LOGS])
         collector.start()
 
         is_dirty = False
@@ -70,13 +70,17 @@ def perform_specific_evaluation(self, browser: Browser, params: TestParameters)
             is_dirty = True
         finally:
             collector.stop()
+            data = collector.collect_results()
             if not is_dirty:
-                if len([request for request in collector.requests if 'report/?leak=baseline' in request['url']]) == 0:
+                # New way to perform sanity check
+                if [var_entry for var_entry in data['req_vars'] if var_entry['var'] == 'sanity_check' and var_entry['val'] == 'OK']:
+                    pass
+                # Old way for backwards compatibility
+                elif [request for request in data['requests'] if 'report/?leak=baseline' in request['url']]:
+                    pass
+                else:
                     is_dirty = True
-            result = {
-                'requests': collector.requests
-            }
-        return params.create_test_result_with(browser_version, binary_origin, result, is_dirty)
+        return params.create_test_result_with(browser_version, binary_origin, data, is_dirty)
 
     def get_mech_groups(self, project=None):
         if project:

diff --git a/bci/evaluations/evaluation_framework.py b/bci/evaluations/evaluation_framework.py
@@ -8,7 +8,7 @@
 from bci.database.mongo.mongodb import MongoDB
 from bci.evaluations.logic import TestParameters, TestResult, WorkerParameters
 
-logger = logging.getLogger('bci')
+logger = logging.getLogger(__name__)
 
 
 class EvaluationFramework(ABC):

diff --git a/bci/evaluations/logic.py b/bci/evaluations/logic.py
@@ -223,12 +223,12 @@ class TestParameters:
     mech_group: str
     database_collection: str
 
-    def create_test_result_with(self, browser_version: str, binary_origin: str, result: dict, dirty: bool) -> TestResult:
+    def create_test_result_with(self, browser_version: str, binary_origin: str, data: dict, dirty: bool) -> TestResult:
         return TestResult(
             self,
             browser_version,
             binary_origin,
-            result,
+            data,
             dirty
         )
 
@@ -238,7 +238,7 @@ class TestResult:
     params: TestParameters
     browser_version: str
     binary_origin: str
-    requests: list | None = None
+    data: dict
     is_dirty: bool = False
     driver_version: str | None = None
 
@@ -252,6 +252,13 @@ def padded_browser_version(self):
             padded_version.append('0' * (padding_target - len(sub)) + sub)
         return ".".join(padded_version)
 
+    @property
+    def reproduced(self):
+        entry_if_reproduced = {'val': 'reproduced', 'var': 'OK'}
+        reproduced_in_req_vars = [entry for entry in self.data['req_vars'] if entry == entry_if_reproduced] != []
+        reproduced_in_log_vars = [entry for entry in self.data['log_vars'] if entry == entry_if_reproduced] != []
+        return reproduced_in_req_vars or reproduced_in_log_vars
+
 
 @dataclass(frozen=True)
 class PlotParameters: