Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fixes and small improvements #40

Merged
merged 15 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
"vscode": {
"extensions": [
"charliermarsh.ruff",
"ms-python.debugpy",
"ms-python.python",
"Vue.volar"
]
}
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ nginx/ssl/keys/*
**/node_modules
**/junit.xml

# Screenshots
# Screenshots and logs
logs/*.log.*
logs/screenshots/*
!logs/screenshots/.gitkeep

Expand Down
2 changes: 1 addition & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"configurations": [
{
"name": "BugHog",
"type": "python",
"type": "debugpy",
"request": "launch",
"program": "/app/bci/app.py",
"purpose": [
Expand Down
9 changes: 8 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM node:lts-alpine as ui-build-stage
FROM node:22.12-alpine as ui-build-stage
WORKDIR /app
COPY /bci/web/vue/package*.json ./
RUN npm install
Expand All @@ -7,9 +7,16 @@ RUN npm run build


FROM openresty/openresty:1.27.1.1-bullseye AS nginx
RUN apt update -y && \
apt install -y curl && \
rm -rf /var/lib/apt/lists/*
RUN mkdir -p /www/data/js && \
curl https://cdn.bokeh.org/bokeh/release/bokeh-3.6.1.min.js -o /www/data/js/bokeh.min.js && \
curl https://cdn.bokeh.org/bokeh/release/bokeh-api-3.6.1.min.js -o /www/data/js/bokeh-api.min.js
COPY ./nginx/start.sh /usr/local/bin/
COPY ./nginx/config /etc/nginx/config
COPY --from=ui-build-stage /app/dist /www/data
COPY --from=ui-build-stage /app/node_modules/ace-builds/src-min-noconflict /www/data/node_modules/ace-builds/src-min-noconflict
CMD ["start.sh"]


Expand Down
11 changes: 7 additions & 4 deletions bci/browser/binary/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import logging
import os
import time
from abc import abstractmethod
from typing import Optional

Expand Down Expand Up @@ -53,7 +54,7 @@ def origin(self) -> str:
raise AttributeError(f"Unknown binary origin for path '{self.get_bin_path()}'")

@staticmethod
def list_downloaded_binaries(bin_folder_path: str) -> list[dict[str, str]]:
def _list_downloaded_binaries(bin_folder_path: str) -> list[dict[str, str]]:
binaries = []
for subfolder_path in os.listdir(os.path.join(bin_folder_path, 'downloaded')):
bin_entry = {}
Expand All @@ -63,10 +64,10 @@ def list_downloaded_binaries(bin_folder_path: str) -> list[dict[str, str]]:

@staticmethod
def list_artisanal_binaries(bin_folder_path: str, executable_name: str):
return Binary.get_artisanal_manager(bin_folder_path, executable_name).get_artisanal_binaries_list()
return Binary._get_artisanal_manager(bin_folder_path, executable_name).get_artisanal_binaries_list()

@staticmethod
def get_artisanal_manager(bin_folder_path: str, executable_name: str) -> ArtisanalBuildManager:
def _get_artisanal_manager(bin_folder_path: str, executable_name: str) -> ArtisanalBuildManager:
return ArtisanalBuildManager(bin_folder_path, executable_name)

def fetch_binary(self):
Expand All @@ -80,8 +81,10 @@ def fetch_binary(self):
return
# Try to download binary
elif self.is_available_online():
start = time.time()
self.download_binary()
logger.info(f'Binary for {self.state.index} downloaded')
elapsed_time = time.time() - start
logger.info(f'Binary for {self.state.index} downloaded in {elapsed_time:.2f}s')
BinaryCache.store_binary_files(self.get_potential_bin_path(), self.state)
else:
raise BuildNotAvailableError(self.browser_name, self.state)
Expand Down
40 changes: 0 additions & 40 deletions bci/browser/binary/factory.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,13 @@
from typing import Type

from bci.browser.binary.binary import Binary
from bci.browser.binary.vendors.chromium import ChromiumBinary
from bci.browser.binary.vendors.firefox import FirefoxBinary
from bci.version_control.states.state import State


def list_downloaded_binaries(browser):
return __get_class(browser).list_downloaded_binaries()


def list_artisanal_binaries(browser):
return __get_class(browser).list_artisanal_binaries()


def update_artisanal_binaries(browser):
return __get_class(browser).get_artisanal_manager().update()


def download_online_binary(browser, revision_number):
return __get_class(browser).download_online_binary(revision_number)


def binary_is_available(state: State) -> bool:
return __has_available_binary_online(state) or __has_available_binary_artisanal(state)


def __has_available_binary_online(state: State) -> bool:
return __get_class(state.browser_name).has_available_binary_online()


def __has_available_binary_artisanal(state: State) -> bool:
return __get_class(state.browser_name).get_artisanal_manager().has_artisanal_binary_for(state)


def get_binary(state: State) -> Binary:
return __get_object(state)


def __get_class(browser_name: str) -> Type[Binary]:
match browser_name:
case 'chromium':
return ChromiumBinary
case 'firefox':
return FirefoxBinary
case _:
raise ValueError(f'Unknown browser {browser_name}')


def __get_object(state: State) -> Binary:
match state.browser_name:
case 'chromium':
Expand Down
16 changes: 14 additions & 2 deletions bci/browser/binary/vendors/chromium.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,23 @@ def download_binary(self):
bin_path = self.get_potential_bin_path()
os.makedirs(os.path.dirname(bin_path), exist_ok=True)
unzipped_folder_path = os.path.join(os.path.dirname(zip_file_path), "chrome-linux")
self.__remove_unnecessary_files(unzipped_folder_path)
util.safe_move_dir(unzipped_folder_path, os.path.dirname(bin_path))
cli.execute_and_return_status("chmod -R a+x %s" % os.path.dirname(bin_path))
# Remove temporary files in /tmp/COMMIT_POS
shutil.rmtree(os.path.dirname(zip_file_path))

def __remove_unnecessary_files(self, binary_folder_path: str) -> None:
"""
Remove binary files that are not necessary for default usage of the browser.
This is to improve performance, especially when caching binary files.

:param binary_folder_path: Path to the folder where the binary files are stored.
"""
locales_folder_path = os.path.join(binary_folder_path, 'locales')
if os.path.isdir(locales_folder_path):
util.remove_all_in_folder(locales_folder_path, except_files=['en-GB.pak', 'en-US.pak'])

def _get_version(self) -> str:
command = "./chrome --version"
if bin_path := self.get_bin_path():
Expand All @@ -86,11 +98,11 @@ def _get_version(self) -> str:

@staticmethod
def list_downloaded_binaries() -> list[dict[str, str]]:
return Binary.list_downloaded_binaries(BIN_FOLDER_PATH)
return Binary._list_downloaded_binaries(BIN_FOLDER_PATH)

@staticmethod
def get_artisanal_manager() -> ArtisanalBuildManager:
return Binary.get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME)
return Binary._get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME)

browser_version_to_driver_version = {
'88': "88.0.4324.96",
Expand Down
4 changes: 2 additions & 2 deletions bci/browser/binary/vendors/firefox.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,11 @@ def get_driver_version(self, browser_version):

@staticmethod
def list_downloaded_binaries() -> list[dict[str, str]]:
return Binary.list_downloaded_binaries(BIN_FOLDER_PATH)
return Binary._list_downloaded_binaries(BIN_FOLDER_PATH)

@staticmethod
def get_artisanal_manager() -> ArtisanalBuildManager:
return Binary.get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME)
return Binary._get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME)

browser_version_to_driver_version = {
'84': "0.28.0",
Expand Down
10 changes: 5 additions & 5 deletions bci/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,16 +70,17 @@ def initialize_folders():
def get_database_params() -> DatabaseParameters:
required_database_params = ['BCI_MONGO_HOST', 'BCI_MONGO_USERNAME', 'BCI_MONGO_DATABASE', 'BCI_MONGO_PASSWORD']
missing_database_params = [param for param in required_database_params if os.getenv(param) in ['', None]]
binary_cache_limit = int(os.getenv('BCI_BINARY_CACHE_LIMIT', 0))
if missing_database_params:
logger.info(f'Could not find database parameters {missing_database_params}, using database container...')
return container.run()
return container.run(binary_cache_limit)
else:
database_params = DatabaseParameters(
os.getenv('BCI_MONGO_HOST'),
os.getenv('BCI_MONGO_USERNAME'),
os.getenv('BCI_MONGO_PASSWORD'),
os.getenv('BCI_MONGO_DATABASE'),
int(os.getenv('BCI_BINARY_CACHE_LIMIT', 0)),
binary_cache_limit,
)
logger.info(f"Found database environment variables '{database_params}'")
return database_params
Expand Down Expand Up @@ -140,7 +141,7 @@ def configure_loggers():
bci_logger.addHandler(stream_handler)

# Configure file handler
file_handler = logging.handlers.RotatingFileHandler(f'/app/logs/{hostname}.log', mode='a', backupCount=2)
file_handler = logging.handlers.RotatingFileHandler(f'/app/logs/{hostname}.log', mode='a', backupCount=3, maxBytes=8*1024*1024)
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(Loggers.formatter)
bci_logger.addHandler(file_handler)
Expand All @@ -154,8 +155,7 @@ def configure_loggers():

# Configure memory handler
Loggers.memory_handler.setLevel(logging.INFO)
buffer_formatter = logging.handlers.BufferingHandler(Loggers.formatter)
Loggers.memory_handler.setFormatter(buffer_formatter)
Loggers.memory_handler.setFormatter(Loggers.formatter)
bci_logger.addHandler(Loggers.memory_handler)

# Log uncaught exceptions
Expand Down
76 changes: 55 additions & 21 deletions bci/database/mongo/binary_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def write_from_db(file_path: str, grid_file_id: str) -> None:
return True

@staticmethod
def store_binary_files(binary_executable_path: str, state: State) -> bool:
def store_binary_files(binary_executable_path: str, state: State):
"""
Stores the files in the folder of the given path in the database.

Expand All @@ -80,35 +80,60 @@ def store_binary_files(binary_executable_path: str, state: State) -> bool:
if MongoDB().binary_cache_limit <= 0:
return False

while BinaryCache.__count_cached_binaries() >= MongoDB.binary_cache_limit:
while BinaryCache.__count_cached_binaries() >= MongoDB().binary_cache_limit:
if BinaryCache.__count_cached_binaries(state_type='revision') <= 0:
# There are only version binaries in the cache, which will never be removed
return False
BinaryCache.__remove_least_used_revision_binary_files()

logger.debug(f"Caching binary files for {state}...")
fs = MongoDB().gridfs

binary_folder_path = os.path.dirname(binary_executable_path)
last_access_ts = datetime.datetime.now()
def store_file(file_path: str) -> None:
# Max chunk size is 16 MB (meta-data included)
chunk_size = 1024 * 1024 * 15
with open(file_path, 'rb') as file:
file_id = fs.new_file(
file_type='binary',
browser_name=state.browser_name,
state_type=state.type,
state_index=state.index,
relative_file_path=os.path.relpath(file_path, binary_folder_path),
access_count=0,
last_access_ts=last_access_ts,
chunk_size=chunk_size
)
while chunk := file.read(chunk_size):
file_id.write(chunk)
file_id.close()

start_time = time.time()
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
futures = []
for root, _, files in os.walk(binary_folder_path):
for file in files:
file_path = os.path.join(root, file)
with open(file_path, 'rb') as file:
executor.submit(
fs.put,
file.read(),
file_type='binary',
browser_name=state.browser_name,
state_type=state.type,
state_index=state.index,
relative_file_path=os.path.relpath(file_path, binary_folder_path),
access_count=0,
last_access_ts=datetime.datetime.now(),
)
future = executor.submit(store_file, file_path)
futures.append(future)
logger.debug(f"Number of files to cache: {len(futures)}")
executor.shutdown(wait=True)
elapsed_time = time.time() - start_time
logger.debug(f'Stored binary in {elapsed_time:.2f}s')
return True

futures_with_exception = [future for future in futures if future.exception() is not None]
if futures_with_exception:
logger.error(
(
f"Something went wrong caching binary files for {state}, "
"Removing possibly imcomplete binary files from cache."
),
exc_info=futures_with_exception[0].exception()
)
BinaryCache.__remove_revision_binary_files(state.type, state.index)
logger.debug(f"Removed possibly incomplete cached binary files for {state}.")
else:
elapsed_time = time.time() - start_time
logger.debug(f'Stored binary in {elapsed_time:.2f}s')

@staticmethod
def __count_cached_binaries(state_type: Optional[str] = None) -> int:
Expand All @@ -130,7 +155,6 @@ def __remove_least_used_revision_binary_files() -> None:
"""
Removes the least used revision binary files from the database.
"""
fs = MongoDB().gridfs
files_collection = MongoDB().get_collection('fs.files')

grid_cursor = files_collection.find(
Expand All @@ -139,6 +163,16 @@ def __remove_least_used_revision_binary_files() -> None:
)
for state_doc in grid_cursor:
state_index = state_doc['state_index']
for grid_doc in files_collection.find({'state_index': state_index, 'state_type': 'revision'}):
fs.delete(grid_doc['_id'])
BinaryCache.__remove_revision_binary_files('revision', state_index)
break

@staticmethod
def __remove_revision_binary_files(state_type: str, state_index: int) -> None:
"""
Removes the binary files associated with the parameters.
"""
fs = MongoDB().gridfs
files_collection = MongoDB().get_collection('fs.files')

for grid_doc in files_collection.find({'state_index': state_index, 'state_type': state_type}):
fs.delete(grid_doc['_id'])
6 changes: 3 additions & 3 deletions bci/database/mongo/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
DEFAULT_HOST = 'bh_db'


def run() -> DatabaseParameters:
def run(binary_cache_limit: int) -> DatabaseParameters:
docker_client = docker.from_env()
try:
mongo_container = docker_client.containers.get(DEFAULT_HOST)
Expand All @@ -33,7 +33,7 @@ def run() -> DatabaseParameters:
LOGGER.debug('MongoDB container not found, creating a new one...')
__create_new_container(DEFAULT_USER, DEFAULT_PW, DEFAULT_DB_NAME, DEFAULT_HOST)
LOGGER.debug('MongoDB container has started!')
return DatabaseParameters(DEFAULT_HOST, DEFAULT_USER, DEFAULT_PW, DEFAULT_DB_NAME, 0)
return DatabaseParameters(DEFAULT_HOST, DEFAULT_USER, DEFAULT_PW, DEFAULT_DB_NAME, binary_cache_limit)


def stop():
Expand All @@ -51,7 +51,7 @@ def __create_new_container(user: str, pw: str, db_name, db_host):
raise AttributeError("Could not create container because of missing HOST_PWD environment variable")
docker_client = docker.from_env()
docker_client.containers.run(
'mongo:5.0.17',
'mongo:6',
name=db_host,
hostname=db_host,
network=NETWORK_NAME,
Expand Down
1 change: 1 addition & 0 deletions bci/database/mongo/mongodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def connect(self, db_params: DatabaseParameters) -> None:
retryWrites=False,
serverSelectionTimeoutMS=10000,
)
self.binary_cache_limit = db_params.binary_cache_limit
logger.info(f'Binary cache limit set to {db_params.binary_cache_limit}')
# Force connection to check whether MongoDB server is reachable
try:
Expand Down
2 changes: 1 addition & 1 deletion bci/database/mongo/revision_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def store_firefox_binary_availability(data: dict) -> None:

collection.delete_many({})
collection.insert_many(values)
logger.info(f'Revision Cache was updates ({len(values)} documents).')
logger.info(f'Revision Cache was updated ({len(values)} documents).')

@staticmethod
def firefox_get_revision_number(revision_id: str) -> int:
Expand Down
Loading
Loading