Skip to content

Commit

Permalink
Improve binary sequence algorithm (#31)
Browse files Browse the repository at this point in the history
* WIP: start implementation of more efficient sequence algorithm

* WIP: add fixes and improvements

* Fix tests

* Fix issue where bgb search would not stop

* Fix out of bounds

* Update tooltip
  • Loading branch information
GJFR authored Oct 13, 2024
1 parent 358ed3e commit 65a2e92
Show file tree
Hide file tree
Showing 35 changed files with 1,106 additions and 1,249 deletions.
194 changes: 101 additions & 93 deletions bci/database/mongo/mongodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,17 @@
from pymongo.collection import Collection
from pymongo.errors import ServerSelectionTimeoutError

from bci.evaluations.logic import (DatabaseConnectionParameters,
PlotParameters, TestParameters, TestResult,
WorkerParameters)
from bci.version_control.states.state import State
from bci.evaluations.logic import (
DatabaseConnectionParameters,
EvaluationParameters,
PlotParameters,
StateResult,
TestParameters,
TestResult,
WorkerParameters,
)
from bci.evaluations.outcome_checker import OutcomeChecker
from bci.version_control.states.state import State, StateCondition

logger = logging.getLogger(__name__)

Expand All @@ -25,8 +32,8 @@ class MongoDB(ABC):
instance = None

binary_availability_collection_names = {
"chromium": "chromium_binary_availability",
"firefox": "firefox_central_binary_availability"
'chromium': 'chromium_binary_availability',
'firefox': 'firefox_central_binary_availability',
}

def __init__(self):
Expand All @@ -51,14 +58,15 @@ def connect(db_connection_params: DatabaseConnectionParameters):
password=db_connection_params.password,
authsource=db_connection_params.database_name,
retryWrites=False,
serverSelectionTimeoutMS=10000)
serverSelectionTimeoutMS=10000,
)
# Force connection to check whether MongoDB server is reachable
try:
CLIENT.server_info()
DB = CLIENT[db_connection_params.database_name]
logger.info("Connected to database!")
logger.info('Connected to database!')
except ServerSelectionTimeoutError as e:
logger.info("A timeout occurred while attempting to establish connection.", exc_info=True)
logger.info('A timeout occurred while attempting to establish connection.', exc_info=True)
raise ServerException from e

# Initialize collections
Expand All @@ -73,16 +81,13 @@ def disconnect():

@staticmethod
def __initialize_collections():
for collection_name in [
'chromium_binary_availability',
'firefox_central_binary_availability'
]:
for collection_name in ['chromium_binary_availability', 'firefox_central_binary_availability']:
if collection_name not in DB.list_collection_names():
DB.create_collection(collection_name)

def get_collection(self, name: str):
if name not in DB.list_collection_names():
logger.info(f'Collection \'{name}\' does not exist, creating it...')
logger.info(f"Collection '{name}' does not exist, creating it...")
DB.create_collection(name)
return DB[name]

Expand All @@ -102,18 +107,18 @@ def store_result(self, result: TestResult):
'mech_group': result.params.mech_group,
'results': result.data,
'dirty': result.is_dirty,
'ts': str(datetime.now(timezone.utc).replace(microsecond=0))
'ts': str(datetime.now(timezone.utc).replace(microsecond=0)),
}
if result.driver_version:
document["driver_version"] = result.driver_version
document['driver_version'] = result.driver_version

if browser_config.browser_name == "firefox":
if browser_config.browser_name == 'firefox':
build_id = self.get_build_id_firefox(result.params.state)
if build_id is None:
document["artisanal"] = True
document["build_id"] = "artisanal"
document['artisanal'] = True
document['build_id'] = 'artisanal'
else:
document["build_id"] = build_id
document['build_id'] = build_id

collection.insert_one(document)

Expand All @@ -123,10 +128,7 @@ def get_result(self, params: TestParameters) -> TestResult:
document = collection.find_one(query)
if document:
return params.create_test_result_with(
document['browser_version'],
document['binary_origin'],
document['results'],
document['dirty']
document['browser_version'], document['binary_origin'], document['results'], document['dirty']
)
else:
logger.error(f'Could not find document for query {query}')
Expand All @@ -143,24 +145,66 @@ def has_all_results(self, params: WorkerParameters) -> bool:
return False
return True

def get_evaluated_states(
self, params: EvaluationParameters, boundary_states: tuple[State, State], outcome_checker: OutcomeChecker
) -> list[State]:
collection = self.get_collection(params.database_collection)
query = {
'browser_config': params.browser_configuration.browser_setting,
'mech_group': params.evaluation_range.mech_groups[0], # TODO: fix this
'state.browser_name': params.browser_configuration.browser_name,
'results': {'$exists': True},
'state.type': 'version' if params.evaluation_range.only_release_revisions else 'revision',
'state.revision_number': {
'$gte': boundary_states[0].revision_nb,
'$lte': boundary_states[1].revision_nb,
},
}
if params.browser_configuration.extensions:
query['extensions'] = {
'$size': len(params.browser_configuration.extensions),
'$all': params.browser_configuration.extensions,
}
else:
query['extensions'] = []
if params.browser_configuration.cli_options:
query['cli_options'] = {
'$size': len(params.browser_configuration.cli_options),
'$all': params.browser_configuration.cli_options,
}
else:
query['cli_options'] = []
cursor = collection.find(query)
states = []
for doc in cursor:
state = State.from_dict(doc['state'])
state.result = StateResult.from_dict(doc['results'], is_dirty=doc['dirty'])
state.outcome = outcome_checker.get_outcome(state.result)
if doc['dirty']:
state.condition = StateCondition.FAILED
else:
state.condition = StateCondition.COMPLETED
states.append(state)
return states

def __to_query(self, params: TestParameters) -> dict:
query = {
'state': params.state.to_dict(),
'browser_automation': params.evaluation_configuration.automation,
'browser_config': params.browser_configuration.browser_setting,
'mech_group': params.mech_group
'mech_group': params.mech_group,
}
if len(params.browser_configuration.extensions) > 0:
query['extensions'] = {
'$size': len(params.browser_configuration.extensions),
'$all': params.browser_configuration.extensions
'$all': params.browser_configuration.extensions,
}
else:
query['extensions'] = []
if len(params.browser_configuration.cli_options) > 0:
query['cli_options'] = {
'$size': len(params.browser_configuration.cli_options),
'$all': params.browser_configuration.cli_options
'$all': params.browser_configuration.cli_options,
}
else:
query['cli_options'] = []
Expand All @@ -184,34 +228,30 @@ def get_binary_availability_collection(browser_name: str):
@staticmethod
def has_binary_available_online(browser: str, state: State):
collection = MongoDB.get_binary_availability_collection(browser)
document = collection.find_one({'state': state.to_dict(make_complete=False)})
document = collection.find_one({'state': state.to_dict()})
if document is None:
return None
return document["binary_online"]
return document['binary_online']

@staticmethod
def get_stored_binary_availability(browser):
collection = MongoDB.get_binary_availability_collection(browser)
result = collection.find(
{'binary_online': True},
{
"binary_online": True
'_id': False,
'state': True,
},
{
"_id": False,
"state": True,
}
)
if browser == "firefox":
if browser == 'firefox':
result.sort('build_id', -1)
return result

@staticmethod
def get_complete_state_dict_from_binary_availability_cache(state: State):
def get_complete_state_dict_from_binary_availability_cache(state: State) -> dict:
collection = MongoDB.get_binary_availability_collection(state.browser_name)
# We have to flatten the state dictionary to ignore missing attributes.
state_dict = {
'state': state.to_dict(make_complete=False)
}
state_dict = {'state': state.to_dict()}
query = flatten(state_dict, reducer='dot')
document = collection.find_one(query)
if document is None:
Expand All @@ -222,100 +262,68 @@ def get_complete_state_dict_from_binary_availability_cache(state: State):
def store_binary_availability_online_cache(browser: str, state: State, binary_online: bool, url: str = None):
collection = MongoDB.get_binary_availability_collection(browser)
collection.update_one(
{'state': state.to_dict()},
{
'state': state.to_dict()
},
{
"$set":
{
'$set': {
'state': state.to_dict(),
'binary_online': binary_online,
'url': url,
'ts': str(datetime.now(timezone.utc).replace(microsecond=0))
'ts': str(datetime.now(timezone.utc).replace(microsecond=0)),
}
},
upsert=True
upsert=True,
)

@staticmethod
def get_build_id_firefox(state: State):
collection = MongoDB.get_binary_availability_collection("firefox")

result = collection.find_one({
"state": state.to_dict()
}, {
"_id": False,
"build_id": 1
})
collection = MongoDB.get_binary_availability_collection('firefox')

result = collection.find_one({'state': state.to_dict()}, {'_id': False, 'build_id': 1})
# Result can only be None if the binary associated with the state_id is artisanal:
# This state_id will not be included in the binary_availability_collection and not have a build_id.
if result is None or len(result) == 0:
return None
return result["build_id"]
return result['build_id']

def get_documents_for_plotting(self, params: PlotParameters, releases: bool = False):
collection = self.get_collection(params.database_collection)
query = {
'mech_group': params.mech_group,
'browser_config': params.browser_config,
'state.type': 'version' if releases else 'revision'
}
query['extensions'] = {
'$size': len(params.extensions) if params.extensions else 0
'state.type': 'version' if releases else 'revision',
}
query['extensions'] = {'$size': len(params.extensions) if params.extensions else 0}
if params.extensions:
query['extensions']['$all'] = params.extensions
query['cli_options'] = {
'$size': len(params.cli_options) if params.cli_options else 0
}
query['cli_options'] = {'$size': len(params.cli_options) if params.cli_options else 0}
if params.cli_options:
query['cli_options']['$all'] = params.cli_options
if params.revision_number_range:
query['state.revision_number'] = {
'$gte': params.revision_number_range[0],
'$lte': params.revision_number_range[1]
'$lte': params.revision_number_range[1],
}
elif params.major_version_range:
query['padded_browser_version'] = {
'$gte': str(params.major_version_range[0]).zfill(4),
'$lte': str(params.major_version_range[1] + 1).zfill(4)
'$lte': str(params.major_version_range[1] + 1).zfill(4),
}

docs = collection.aggregate([
{
'$match': query
},
{
'$project': {
'_id': False,
'state': True,
'browser_version': True,
'dirty': True,
'results': True
}
},
{
'$sort': {
'rev_nb': 1
}
}
])
docs = collection.aggregate(
[
{'$match': query},
{'$project': {'_id': False, 'state': True, 'browser_version': True, 'dirty': True, 'results': True}},
{'$sort': {'rev_nb': 1}},
]
)
return list(docs)

@staticmethod
def get_info() -> dict:
if CLIENT and CLIENT.address:
return {
'type': 'mongo',
'host': CLIENT.address[0],
'connected': True
}
return {'type': 'mongo', 'host': CLIENT.address[0], 'connected': True}
else:
return {
'type': 'mongo',
'host': None,
'connected': False
}
return {'type': 'mongo', 'host': None, 'connected': False}


class ServerException(Exception):
Expand Down
Loading

0 comments on commit 65a2e92

Please sign in to comment.