Skip to content

Commit

Permalink
Bug fixes and small improvements (#40)
Browse files Browse the repository at this point in the history
Fixes:
- Experiment status codes are now respected.
- SHIFT key for removing data points is not sticky anymore.
- Big PoC files are OK.
- Updated tailwind config due to breaking node image upgrade.
- Increase RAM allowance for workers.
- Ignore .DS_Store files in experiments folder.

Improvements:
- Upgrade MongoDB image to v6
- Add log rotating.
- Additional logging.
- Allow binary logging without external database.
  • Loading branch information
GJFR authored Dec 9, 2024
2 parents 9e0e8f7 + 9cc24be commit 76e2efe
Show file tree
Hide file tree
Showing 29 changed files with 279 additions and 175 deletions.
2 changes: 2 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
"vscode": {
"extensions": [
"charliermarsh.ruff",
"ms-python.debugpy",
"ms-python.python",
"Vue.volar"
]
}
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ nginx/ssl/keys/*
**/node_modules
**/junit.xml

# Screenshots
# Screenshots and logs
logs/*.log.*
logs/screenshots/*
!logs/screenshots/.gitkeep

Expand Down
2 changes: 1 addition & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"configurations": [
{
"name": "BugHog",
"type": "python",
"type": "debugpy",
"request": "launch",
"program": "/app/bci/app.py",
"purpose": [
Expand Down
9 changes: 8 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM node:lts-alpine as ui-build-stage
FROM node:22.12-alpine as ui-build-stage
WORKDIR /app
COPY /bci/web/vue/package*.json ./
RUN npm install
Expand All @@ -7,9 +7,16 @@ RUN npm run build


FROM openresty/openresty:1.27.1.1-bullseye AS nginx
RUN apt update -y && \
apt install -y curl && \
rm -rf /var/lib/apt/lists/*
RUN mkdir -p /www/data/js && \
curl https://cdn.bokeh.org/bokeh/release/bokeh-3.6.1.min.js -o /www/data/js/bokeh.min.js && \
curl https://cdn.bokeh.org/bokeh/release/bokeh-api-3.6.1.min.js -o /www/data/js/bokeh-api.min.js
COPY ./nginx/start.sh /usr/local/bin/
COPY ./nginx/config /etc/nginx/config
COPY --from=ui-build-stage /app/dist /www/data
COPY --from=ui-build-stage /app/node_modules/ace-builds/src-min-noconflict /www/data/node_modules/ace-builds/src-min-noconflict
CMD ["start.sh"]


Expand Down
11 changes: 7 additions & 4 deletions bci/browser/binary/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import logging
import os
import time
from abc import abstractmethod
from typing import Optional

Expand Down Expand Up @@ -53,7 +54,7 @@ def origin(self) -> str:
raise AttributeError(f"Unknown binary origin for path '{self.get_bin_path()}'")

@staticmethod
def list_downloaded_binaries(bin_folder_path: str) -> list[dict[str, str]]:
def _list_downloaded_binaries(bin_folder_path: str) -> list[dict[str, str]]:
binaries = []
for subfolder_path in os.listdir(os.path.join(bin_folder_path, 'downloaded')):
bin_entry = {}
Expand All @@ -63,10 +64,10 @@ def list_downloaded_binaries(bin_folder_path: str) -> list[dict[str, str]]:

@staticmethod
def list_artisanal_binaries(bin_folder_path: str, executable_name: str):
return Binary.get_artisanal_manager(bin_folder_path, executable_name).get_artisanal_binaries_list()
return Binary._get_artisanal_manager(bin_folder_path, executable_name).get_artisanal_binaries_list()

@staticmethod
def get_artisanal_manager(bin_folder_path: str, executable_name: str) -> ArtisanalBuildManager:
def _get_artisanal_manager(bin_folder_path: str, executable_name: str) -> ArtisanalBuildManager:
return ArtisanalBuildManager(bin_folder_path, executable_name)

def fetch_binary(self):
Expand All @@ -80,8 +81,10 @@ def fetch_binary(self):
return
# Try to download binary
elif self.is_available_online():
start = time.time()
self.download_binary()
logger.info(f'Binary for {self.state.index} downloaded')
elapsed_time = time.time() - start
logger.info(f'Binary for {self.state.index} downloaded in {elapsed_time:.2f}s')
BinaryCache.store_binary_files(self.get_potential_bin_path(), self.state)
else:
raise BuildNotAvailableError(self.browser_name, self.state)
Expand Down
40 changes: 0 additions & 40 deletions bci/browser/binary/factory.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,13 @@
from typing import Type

from bci.browser.binary.binary import Binary
from bci.browser.binary.vendors.chromium import ChromiumBinary
from bci.browser.binary.vendors.firefox import FirefoxBinary
from bci.version_control.states.state import State


def list_downloaded_binaries(browser):
return __get_class(browser).list_downloaded_binaries()


def list_artisanal_binaries(browser):
return __get_class(browser).list_artisanal_binaries()


def update_artisanal_binaries(browser):
return __get_class(browser).get_artisanal_manager().update()


def download_online_binary(browser, revision_number):
return __get_class(browser).download_online_binary(revision_number)


def binary_is_available(state: State) -> bool:
return __has_available_binary_online(state) or __has_available_binary_artisanal(state)


def __has_available_binary_online(state: State) -> bool:
return __get_class(state.browser_name).has_available_binary_online()


def __has_available_binary_artisanal(state: State) -> bool:
return __get_class(state.browser_name).get_artisanal_manager().has_artisanal_binary_for(state)


def get_binary(state: State) -> Binary:
return __get_object(state)


def __get_class(browser_name: str) -> Type[Binary]:
match browser_name:
case 'chromium':
return ChromiumBinary
case 'firefox':
return FirefoxBinary
case _:
raise ValueError(f'Unknown browser {browser_name}')


def __get_object(state: State) -> Binary:
match state.browser_name:
case 'chromium':
Expand Down
16 changes: 14 additions & 2 deletions bci/browser/binary/vendors/chromium.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,23 @@ def download_binary(self):
bin_path = self.get_potential_bin_path()
os.makedirs(os.path.dirname(bin_path), exist_ok=True)
unzipped_folder_path = os.path.join(os.path.dirname(zip_file_path), "chrome-linux")
self.__remove_unnecessary_files(unzipped_folder_path)
util.safe_move_dir(unzipped_folder_path, os.path.dirname(bin_path))
cli.execute_and_return_status("chmod -R a+x %s" % os.path.dirname(bin_path))
# Remove temporary files in /tmp/COMMIT_POS
shutil.rmtree(os.path.dirname(zip_file_path))

def __remove_unnecessary_files(self, binary_folder_path: str) -> None:
"""
Remove binary files that are not necessary for default usage of the browser.
This is to improve performance, especially when caching binary files.
:param binary_folder_path: Path to the folder where the binary files are stored.
"""
locales_folder_path = os.path.join(binary_folder_path, 'locales')
if os.path.isdir(locales_folder_path):
util.remove_all_in_folder(locales_folder_path, except_files=['en-GB.pak', 'en-US.pak'])

def _get_version(self) -> str:
command = "./chrome --version"
if bin_path := self.get_bin_path():
Expand All @@ -86,11 +98,11 @@ def _get_version(self) -> str:

@staticmethod
def list_downloaded_binaries() -> list[dict[str, str]]:
return Binary.list_downloaded_binaries(BIN_FOLDER_PATH)
return Binary._list_downloaded_binaries(BIN_FOLDER_PATH)

@staticmethod
def get_artisanal_manager() -> ArtisanalBuildManager:
return Binary.get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME)
return Binary._get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME)

browser_version_to_driver_version = {
'88': "88.0.4324.96",
Expand Down
4 changes: 2 additions & 2 deletions bci/browser/binary/vendors/firefox.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,11 @@ def get_driver_version(self, browser_version):

@staticmethod
def list_downloaded_binaries() -> list[dict[str, str]]:
return Binary.list_downloaded_binaries(BIN_FOLDER_PATH)
return Binary._list_downloaded_binaries(BIN_FOLDER_PATH)

@staticmethod
def get_artisanal_manager() -> ArtisanalBuildManager:
return Binary.get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME)
return Binary._get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME)

browser_version_to_driver_version = {
'84': "0.28.0",
Expand Down
10 changes: 5 additions & 5 deletions bci/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,16 +70,17 @@ def initialize_folders():
def get_database_params() -> DatabaseParameters:
required_database_params = ['BCI_MONGO_HOST', 'BCI_MONGO_USERNAME', 'BCI_MONGO_DATABASE', 'BCI_MONGO_PASSWORD']
missing_database_params = [param for param in required_database_params if os.getenv(param) in ['', None]]
binary_cache_limit = int(os.getenv('BCI_BINARY_CACHE_LIMIT', 0))
if missing_database_params:
logger.info(f'Could not find database parameters {missing_database_params}, using database container...')
return container.run()
return container.run(binary_cache_limit)
else:
database_params = DatabaseParameters(
os.getenv('BCI_MONGO_HOST'),
os.getenv('BCI_MONGO_USERNAME'),
os.getenv('BCI_MONGO_PASSWORD'),
os.getenv('BCI_MONGO_DATABASE'),
int(os.getenv('BCI_BINARY_CACHE_LIMIT', 0)),
binary_cache_limit,
)
logger.info(f"Found database environment variables '{database_params}'")
return database_params
Expand Down Expand Up @@ -140,7 +141,7 @@ def configure_loggers():
bci_logger.addHandler(stream_handler)

# Configure file handler
file_handler = logging.handlers.RotatingFileHandler(f'/app/logs/{hostname}.log', mode='a', backupCount=2)
file_handler = logging.handlers.RotatingFileHandler(f'/app/logs/{hostname}.log', mode='a', backupCount=3, maxBytes=8*1024*1024)
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(Loggers.formatter)
bci_logger.addHandler(file_handler)
Expand All @@ -154,8 +155,7 @@ def configure_loggers():

# Configure memory handler
Loggers.memory_handler.setLevel(logging.INFO)
buffer_formatter = logging.handlers.BufferingHandler(Loggers.formatter)
Loggers.memory_handler.setFormatter(buffer_formatter)
Loggers.memory_handler.setFormatter(Loggers.formatter)
bci_logger.addHandler(Loggers.memory_handler)

# Log uncaught exceptions
Expand Down
76 changes: 55 additions & 21 deletions bci/database/mongo/binary_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def write_from_db(file_path: str, grid_file_id: str) -> None:
return True

@staticmethod
def store_binary_files(binary_executable_path: str, state: State) -> bool:
def store_binary_files(binary_executable_path: str, state: State):
"""
Stores the files in the folder of the given path in the database.
Expand All @@ -80,35 +80,60 @@ def store_binary_files(binary_executable_path: str, state: State) -> bool:
if MongoDB().binary_cache_limit <= 0:
return False

while BinaryCache.__count_cached_binaries() >= MongoDB.binary_cache_limit:
while BinaryCache.__count_cached_binaries() >= MongoDB().binary_cache_limit:
if BinaryCache.__count_cached_binaries(state_type='revision') <= 0:
# There are only version binaries in the cache, which will never be removed
return False
BinaryCache.__remove_least_used_revision_binary_files()

logger.debug(f"Caching binary files for {state}...")
fs = MongoDB().gridfs

binary_folder_path = os.path.dirname(binary_executable_path)
last_access_ts = datetime.datetime.now()
def store_file(file_path: str) -> None:
# Max chunk size is 16 MB (meta-data included)
chunk_size = 1024 * 1024 * 15
with open(file_path, 'rb') as file:
file_id = fs.new_file(
file_type='binary',
browser_name=state.browser_name,
state_type=state.type,
state_index=state.index,
relative_file_path=os.path.relpath(file_path, binary_folder_path),
access_count=0,
last_access_ts=last_access_ts,
chunk_size=chunk_size
)
while chunk := file.read(chunk_size):
file_id.write(chunk)
file_id.close()

start_time = time.time()
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
futures = []
for root, _, files in os.walk(binary_folder_path):
for file in files:
file_path = os.path.join(root, file)
with open(file_path, 'rb') as file:
executor.submit(
fs.put,
file.read(),
file_type='binary',
browser_name=state.browser_name,
state_type=state.type,
state_index=state.index,
relative_file_path=os.path.relpath(file_path, binary_folder_path),
access_count=0,
last_access_ts=datetime.datetime.now(),
)
future = executor.submit(store_file, file_path)
futures.append(future)
logger.debug(f"Number of files to cache: {len(futures)}")
executor.shutdown(wait=True)
elapsed_time = time.time() - start_time
logger.debug(f'Stored binary in {elapsed_time:.2f}s')
return True

futures_with_exception = [future for future in futures if future.exception() is not None]
if futures_with_exception:
logger.error(
(
f"Something went wrong caching binary files for {state}, "
"Removing possibly imcomplete binary files from cache."
),
exc_info=futures_with_exception[0].exception()
)
BinaryCache.__remove_revision_binary_files(state.type, state.index)
logger.debug(f"Removed possibly incomplete cached binary files for {state}.")
else:
elapsed_time = time.time() - start_time
logger.debug(f'Stored binary in {elapsed_time:.2f}s')

@staticmethod
def __count_cached_binaries(state_type: Optional[str] = None) -> int:
Expand All @@ -130,7 +155,6 @@ def __remove_least_used_revision_binary_files() -> None:
"""
Removes the least used revision binary files from the database.
"""
fs = MongoDB().gridfs
files_collection = MongoDB().get_collection('fs.files')

grid_cursor = files_collection.find(
Expand All @@ -139,6 +163,16 @@ def __remove_least_used_revision_binary_files() -> None:
)
for state_doc in grid_cursor:
state_index = state_doc['state_index']
for grid_doc in files_collection.find({'state_index': state_index, 'state_type': 'revision'}):
fs.delete(grid_doc['_id'])
BinaryCache.__remove_revision_binary_files('revision', state_index)
break

@staticmethod
def __remove_revision_binary_files(state_type: str, state_index: int) -> None:
"""
Removes the binary files associated with the parameters.
"""
fs = MongoDB().gridfs
files_collection = MongoDB().get_collection('fs.files')

for grid_doc in files_collection.find({'state_index': state_index, 'state_type': state_type}):
fs.delete(grid_doc['_id'])
6 changes: 3 additions & 3 deletions bci/database/mongo/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
DEFAULT_HOST = 'bh_db'


def run() -> DatabaseParameters:
def run(binary_cache_limit: int) -> DatabaseParameters:
docker_client = docker.from_env()
try:
mongo_container = docker_client.containers.get(DEFAULT_HOST)
Expand All @@ -33,7 +33,7 @@ def run() -> DatabaseParameters:
LOGGER.debug('MongoDB container not found, creating a new one...')
__create_new_container(DEFAULT_USER, DEFAULT_PW, DEFAULT_DB_NAME, DEFAULT_HOST)
LOGGER.debug('MongoDB container has started!')
return DatabaseParameters(DEFAULT_HOST, DEFAULT_USER, DEFAULT_PW, DEFAULT_DB_NAME, 0)
return DatabaseParameters(DEFAULT_HOST, DEFAULT_USER, DEFAULT_PW, DEFAULT_DB_NAME, binary_cache_limit)


def stop():
Expand All @@ -51,7 +51,7 @@ def __create_new_container(user: str, pw: str, db_name, db_host):
raise AttributeError("Could not create container because of missing HOST_PWD environment variable")
docker_client = docker.from_env()
docker_client.containers.run(
'mongo:5.0.17',
'mongo:6',
name=db_host,
hostname=db_host,
network=NETWORK_NAME,
Expand Down
1 change: 1 addition & 0 deletions bci/database/mongo/mongodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def connect(self, db_params: DatabaseParameters) -> None:
retryWrites=False,
serverSelectionTimeoutMS=10000,
)
self.binary_cache_limit = db_params.binary_cache_limit
logger.info(f'Binary cache limit set to {db_params.binary_cache_limit}')
# Force connection to check whether MongoDB server is reachable
try:
Expand Down
2 changes: 1 addition & 1 deletion bci/database/mongo/revision_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def store_firefox_binary_availability(data: dict) -> None:

collection.delete_many({})
collection.insert_many(values)
logger.info(f'Revision Cache was updates ({len(values)} documents).')
logger.info(f'Revision Cache was updated ({len(values)} documents).')

@staticmethod
def firefox_get_revision_number(revision_id: str) -> int:
Expand Down
Loading

0 comments on commit 76e2efe

Please sign in to comment.