Skip to content

Commit

Permalink
Browser binary caching and evaluation queueing (#33)
Browse files Browse the repository at this point in the history
- Users can indicate whether and how many binaries should be cached
locally in MongoDB.
- Multiple evaluations can be queued, where each evaluation sequence is
decided independently.
- Removed unused code and dependencies.
- Updated dependencies and images.
- Various bug fixes and improvements.
  • Loading branch information
GJFR authored Oct 23, 2024
2 parents 2fa28b5 + 78cc00c commit 26f822e
Show file tree
Hide file tree
Showing 73 changed files with 2,143 additions and 2,974 deletions.
9 changes: 9 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,13 @@
"test_*.py"
],
"python.testing.unittestEnabled": true,
"[python]": {
"editor.defaultFormatter": "charliermarsh.ruff",
"editor.rulers": [100, 120],
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.organizeImports": "explicit"
},
"editor.formatOnType": true
},
}
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ COPY /bci/web/vue ./
RUN npm run build


FROM openresty/openresty:1.25.3.1-3-bullseye AS nginx
FROM openresty/openresty:1.27.1.1-bullseye AS nginx
COPY ./nginx/start.sh /usr/local/bin/
COPY ./nginx/config /etc/nginx/config
COPY --from=ui-build-stage /app/dist /www/data
Expand Down
2 changes: 1 addition & 1 deletion bci/browser/binary/artisanal_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def get_artisanal_binaries_list(self) -> list:
return sorted(self.meta_info, key=lambda i: int(i["id"]))

def has_artisanal_binary_for(self, state: State) -> bool:
return len(list(filter(lambda x: x['id'] == state.revision_number, self.meta_info))) > 0
return len(list(filter(lambda x: x['id'] == state.revision_nb, self.meta_info))) > 0

def add_new_subfolders(self, subfolders):
logger.info("Adding new subfolders to metadata")
Expand Down
49 changes: 28 additions & 21 deletions bci/browser/binary/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,17 @@
import logging
import os
from abc import abstractmethod
from typing import Optional

from bci import util
from bci.browser.binary.artisanal_manager import ArtisanalBuildManager
from bci.database.mongo.binary_cache import BinaryCache
from bci.version_control.states.state import State

logger = logging.getLogger(__name__)


class Binary:

def __init__(self, state: State):
self.state = state
self.__version = None
Expand Down Expand Up @@ -40,19 +41,23 @@ def bin_folder_path(self) -> str:

@property
def origin(self) -> str:
if 'artisanal' in self.get_bin_path():
bin_path = self.get_bin_path()
if bin_path is None:
raise AttributeError('Binary path is not available')

if 'artisanal' in bin_path:
return 'artisanal'
elif 'downloaded' in self.get_bin_path():
elif 'downloaded' in bin_path:
return 'downloaded'
else:
raise ValueError(f'Unknown binary origin for path \'{self.get_bin_path()}\'')
raise AttributeError(f"Unknown binary origin for path '{self.get_bin_path()}'")

@staticmethod
def list_downloaded_binaries(bin_folder_path: str) -> list[dict[str, str]]:
binaries = []
for subfolder_path in os.listdir(os.path.join(bin_folder_path, "downloaded")):
for subfolder_path in os.listdir(os.path.join(bin_folder_path, 'downloaded')):
bin_entry = {}
bin_entry["id"] = subfolder_path
bin_entry['id'] = subfolder_path
binaries.append(bin_entry)
return binaries

Expand All @@ -67,17 +72,24 @@ def get_artisanal_manager(bin_folder_path: str, executable_name: str) -> Artisan
def fetch_binary(self):
# Check cache
if self.is_built():
logger.info(f'Binary for {self.state.index} is already in place')
return
# Consult binary cache
elif BinaryCache.fetch_binary_files(self.get_potential_bin_path(), self.state):
logger.info(f'Binary for {self.state.index} fetched from cache')
return
# Try to download binary
elif self.is_available_online():
self.download_binary()
logger.info(f'Binary for {self.state.index} downloaded')
BinaryCache.store_binary_files(self.get_potential_bin_path(), self.state)
else:
raise BuildNotAvailableError(self.browser_name, self.state)

def is_available(self):
'''
"""
Returns True if the binary is available either locally or online.
'''
"""
return self.is_available_locally() or self.is_available_online()

def is_available_locally(self):
Expand All @@ -95,7 +107,7 @@ def is_built(self):
bin_path = self.get_bin_path()
return bin_path is not None

def get_bin_path(self):
def get_bin_path(self) -> Optional[str]:
"""
Returns path to binary, only if the binary is available locally. Otherwise it returns None.
"""
Expand All @@ -112,8 +124,8 @@ def get_potential_bin_path(self, artisanal=False):
Returns path to potential binary. It does not guarantee whether the binary is available locally.
"""
if artisanal:
return os.path.join(self.bin_folder_path, "artisanal", self.state.name, self.executable_name)
return os.path.join(self.bin_folder_path, "downloaded", self.state.name, self.executable_name)
return os.path.join(self.bin_folder_path, 'artisanal', self.state.name, self.executable_name)
return os.path.join(self.bin_folder_path, 'downloaded', self.state.name, self.executable_name)

def get_bin_folder_path(self):
path_downloaded = self.get_potential_bin_folder_path()
Expand All @@ -126,25 +138,20 @@ def get_bin_folder_path(self):

def get_potential_bin_folder_path(self, artisanal=False):
if artisanal:
return os.path.join(self.bin_folder_path, "artisanal", self.state.name)
return os.path.join(self.bin_folder_path, "downloaded", self.state.name)
return os.path.join(self.bin_folder_path, 'artisanal', self.state.name)
return os.path.join(self.bin_folder_path, 'downloaded', self.state.name)

def remove_bin_folder(self):
path = self.get_bin_folder_path()
if path and "artisanal" not in path:
if path and 'artisanal' not in path:
if not util.rmtree(path):
logger.error("Could not remove folder '%s'" % path)

@abstractmethod
def get_driver_version(self, browser_version):
pass

@abstractmethod
def _get_version(self):
def _get_version(self) -> str:
pass


class BuildNotAvailableError(Exception):

def __init__(self, browser_name, build_state):
super().__init__("Browser build not available: %s (%s)" % (browser_name, build_state))
super().__init__('Browser build not available: %s (%s)' % (browser_name, build_state))
4 changes: 3 additions & 1 deletion bci/browser/binary/factory.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Type

from bci.browser.binary.binary import Binary
from bci.browser.binary.vendors.chromium import ChromiumBinary
from bci.browser.binary.vendors.firefox import FirefoxBinary
Expand Down Expand Up @@ -36,7 +38,7 @@ def get_binary(state: State) -> Binary:
return __get_object(state)


def __get_class(browser_name: str) -> Binary.__class__:
def __get_class(browser_name: str) -> Type[Binary]:
match browser_name:
case 'chromium':
return ChromiumBinary
Expand Down
21 changes: 5 additions & 16 deletions bci/browser/binary/vendors/chromium.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from bci.browser.binary.binary import Binary
from bci.version_control.states.state import State

logger = logging.getLogger('bci')
logger = logging.getLogger(__name__)

EXECUTABLE_NAME = 'chrome'
BIN_FOLDER_PATH = '/app/browser/binaries/chromium'
Expand Down Expand Up @@ -74,27 +74,16 @@ def download_binary(self):
shutil.rmtree(os.path.dirname(zip_file_path))

def _get_version(self) -> str:
bin_path = self.get_bin_path()
command = "./chrome --version"
output = cli.execute_and_return_output(command, cwd=os.path.dirname(bin_path))
if bin_path := self.get_bin_path():
output = cli.execute_and_return_output(command, cwd=os.path.dirname(bin_path))
else:
raise AttributeError(f'Could not get binary path for {self.state}')
match = re.match(r'Chromium (?P<version>[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)', output)
if match:
return match.group("version")
raise AttributeError("Could not determine version of binary at '%s'. Version output: %s" % (bin_path, output))

def get_driver_path(self, full_browser_version):
driver_version = self.get_driver_version(full_browser_version)
driver_path = os.path.join(DRIVER_FOLDER_PATH, driver_version)
if os.path.exists(driver_path):
return driver_path
raise AttributeError("Could not find appropriate driver for Chromium %s" % full_browser_version)

def get_driver_version(self, browser_version):
short_browser_version = browser_version.split('.')[0]
if short_browser_version not in self.browser_version_to_driver_version.keys():
raise AttributeError("Could not determine driver version associated with Chromium version %s" % browser_version)
return self.browser_version_to_driver_version[short_browser_version]

@staticmethod
def list_downloaded_binaries() -> list[dict[str, str]]:
return Binary.list_downloaded_binaries(BIN_FOLDER_PATH)
Expand Down
2 changes: 0 additions & 2 deletions bci/browser/binary/vendors/firefox.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
from bci import cli, util
from bci.browser.binary.artisanal_manager import ArtisanalBuildManager
from bci.browser.binary.binary import Binary
from bci.version_control.states.revisions.firefox import (BINARY_AVAILABILITY_MAPPING,
REVISION_NUMBER_MAPPING)
from bci.version_control.states.state import State

logger = logging.getLogger('bci')
Expand Down
49 changes: 23 additions & 26 deletions bci/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@
import sys

import bci.database.mongo.container as container
from bci.evaluations.logic import DatabaseConnectionParameters
from bci.evaluations.logic import DatabaseParameters

logger = logging.getLogger(__name__)


class Global:

custom_page_folder = '/app/experiments/pages'

@staticmethod
Expand All @@ -25,7 +24,7 @@ def get_browser_config_class(browser: str):
case 'firefox':
return Firefox
case _:
raise ValueError(f'Invalid browser \'{browser}\'')
raise ValueError(f"Invalid browser '{browser}'")

@staticmethod
def get_available_domains() -> list[str]:
Expand All @@ -44,7 +43,9 @@ def check_required_env_parameters() -> bool:
fatal = False
# HOST_PWD
if (host_pwd := os.getenv('HOST_PWD')) in ['', None]:
logger.fatal('The "HOST_PWD" variable is not set. If you\'re using sudo, you might have to pass it explicitly, for example "sudo HOST_PWD=$PWD docker compose up".')
logger.fatal(
'The "HOST_PWD" variable is not set. If you\'re using sudo, you might have to pass it explicitly, for example "sudo HOST_PWD=$PWD docker compose up".'
)
fatal = True
else:
logger.debug(f'HOST_PWD={host_pwd}')
Expand All @@ -66,54 +67,49 @@ def initialize_folders():
file.write('{}')

@staticmethod
def get_database_connection_params() -> DatabaseConnectionParameters:
required_database_params = [
'BCI_MONGO_HOST',
'BCI_MONGO_USERNAME',
'BCI_MONGO_DATABASE',
'BCI_MONGO_PASSWORD'
]
missing_database_params = [
param for param in required_database_params
if os.getenv(param) in ['', None]]
def get_database_params() -> DatabaseParameters:
required_database_params = ['BCI_MONGO_HOST', 'BCI_MONGO_USERNAME', 'BCI_MONGO_DATABASE', 'BCI_MONGO_PASSWORD']
missing_database_params = [param for param in required_database_params if os.getenv(param) in ['', None]]
if missing_database_params:
logger.info(f'Could not find database parameters {missing_database_params}, using database container...')
return container.run()
else:
database_params = DatabaseConnectionParameters(
database_params = DatabaseParameters(
os.getenv('BCI_MONGO_HOST'),
os.getenv('BCI_MONGO_USERNAME'),
os.getenv('BCI_MONGO_PASSWORD'),
os.getenv('BCI_MONGO_DATABASE')
os.getenv('BCI_MONGO_DATABASE'),
int(os.getenv('BCI_BINARY_CACHE_LIMIT', 0)),
)
logger.info(f'Found database environment variables \'{database_params}\'')
logger.info(f"Found database environment variables '{database_params}'")
return database_params

@staticmethod
def get_tag() -> str:
'''
"""
Returns the Docker image tag of BugHog.
This should never be empty.
'''
assert (bughog_version := os.getenv('BUGHOG_VERSION')) not in ['', None]
"""
bughog_version = os.getenv('BUGHOG_VERSION', None)
if bughog_version is None or bughog_version == '':
raise ValueError('BUGHOG_VERSION is not set')
return bughog_version


class Chromium:

extension_folder = '/app/browser/extensions/chromium'
repo_to_use = 'online'


class Firefox:

extension_folder = '/app/browser/extensions/firefox'
repo_to_use = 'online'


class CustomHTTPHandler(logging.handlers.HTTPHandler):

def __init__(self, host: str, url: str, method: str = 'GET', secure: bool = False, credentials=None, context=None) -> None:
def __init__(
self, host: str, url: str, method: str = 'GET', secure: bool = False, credentials=None, context=None
) -> None:
super().__init__(host, url, method=method, secure=secure, credentials=credentials, context=context)
self.hostname = os.getenv('HOSTNAME')

Expand All @@ -124,8 +120,9 @@ def mapLogRecord(self, record):


class Loggers:

formatter = logging.Formatter(fmt='[%(asctime)s] [%(levelname)s] %(name)s: %(message)s', datefmt='%d-%m-%Y %H:%M:%S')
formatter = logging.Formatter(
fmt='[%(asctime)s] [%(levelname)s] %(name)s: %(message)s', datefmt='%d-%m-%Y %H:%M:%S'
)
memory_handler = logging.handlers.MemoryHandler(capacity=100, flushLevel=logging.ERROR)

@staticmethod
Expand Down
Loading

0 comments on commit 26f822e

Please sign in to comment.