Skip to content

Commit

Permalink
Merge pull request #345 from EGA-archive/mateName
Browse files Browse the repository at this point in the history
mateName parameter added and implemented request parameters for all e…
  • Loading branch information
costero-e authored Jul 2, 2024
2 parents d70f274 + 00c82e8 commit 9ee1246
Show file tree
Hide file tree
Showing 7 changed files with 134 additions and 369 deletions.
40 changes: 15 additions & 25 deletions beacon/db/analyses.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from beacon.request.model import AlphanumericFilter, Operator, RequestParams
from beacon.db.schemas import DefaultSchemas
from beacon.db.utils import get_documents, query_id, get_count, get_filtering_documents, get_docs_by_response_type
from beacon.db.g_variants import apply_request_parameters
from beacon.request.model import RequestParams

LOG = logging.getLogger(__name__)
Expand All @@ -16,31 +17,22 @@ def include_resultset_responses(query: Dict[str, List[dict]], qparams: RequestPa
include = qparams.query.include_resultset_responses
return query

def apply_request_parameters(query: Dict[str, List[dict]], qparams: RequestParams):
LOG.debug("Request parameters len = {}".format(len(qparams.query.request_parameters)))
for k, v in qparams.query.request_parameters.items():
query["$text"] = {}
if ',' in v:
v_list = v.split(',')
v_string=''
for val in v_list:
v_string += f'"{val}"'
query["$text"]["$search"]=v_string
else:
query["$text"]["$search"]=v
return query

def get_analyses(entry_id: Optional[str], qparams: RequestParams, dataset: str):
collection = 'analyses'
mongo_collection = client.beacon.analyses
match_list=[]
query = apply_request_parameters({}, qparams)
matching = apply_request_parameters({}, qparams)
match_list.append(matching)
match_big={}
match_big["$match"]=match_list[0]
LOG.debug(qparams.query.filters)
query = apply_filters(query, qparams.query.filters, collection, {})
parameters_as_filters=False
query_parameters, parameters_as_filters = apply_request_parameters({}, qparams)
LOG.debug(query_parameters)
LOG.debug(parameters_as_filters)
if parameters_as_filters == True and query_parameters != {'$and': []}:
query, parameters_as_filters = apply_request_parameters({}, qparams)
query_parameters={}
elif query_parameters != {'$and': []}:
query=query_parameters
elif query_parameters == {'$and': []}:
query_parameters = {}
query={}
query = apply_filters(query, qparams.query.filters, collection, query_parameters)
query = include_resultset_responses(query, qparams)
schema = DefaultSchemas.ANALYSES
#with open("beacon/request/datasets.yml", 'r') as datasets_file:
Expand All @@ -59,8 +51,7 @@ def get_analysis_with_id(entry_id: Optional[str], qparams: RequestParams, datase
collection = 'analyses'
idq="biosampleId"
mongo_collection = client.beacon.analyses
query = apply_request_parameters({}, qparams)
query = apply_filters(query, qparams.query.filters, collection, {})
query = apply_filters({}, qparams.query.filters, collection, {})
query = query_id(query, entry_id)
query = include_resultset_responses(query, qparams)
schema = DefaultSchemas.ANALYSES
Expand All @@ -78,7 +69,6 @@ def get_variants_of_analysis(entry_id: Optional[str], qparams: RequestParams, da
collection = 'analyses'
mongo_collection = client.beacon.genomicVariations
query = {"$and": [{"id": entry_id}]}
query = apply_request_parameters(query, qparams)
query = apply_filters(query, qparams.query.filters, collection, {})
analysis_ids = client.beacon.analyses \
.find_one(query, {"biosampleId": 1, "_id": 0})
Expand Down
43 changes: 15 additions & 28 deletions beacon/db/biosamples.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from beacon.db.filters import *
from beacon.db.schemas import DefaultSchemas
from beacon.db.utils import *
from beacon.db.g_variants import apply_request_parameters
from beacon.request.model import RequestParams

LOG = logging.getLogger(__name__)
Expand All @@ -16,32 +17,22 @@ def include_resultset_responses(query: Dict[str, List[dict]], qparams: RequestPa
LOG.debug("Include Resultset Responses = {}".format(qparams.query.include_resultset_responses))
return query

def apply_request_parameters(query: Dict[str, List[dict]], qparams: RequestParams):
LOG.debug("Request parameters len = {}".format(len(qparams.query.request_parameters)))
for k, v in qparams.query.request_parameters.items():
query["$text"] = {}
if ',' in v:
v_list = v.split(',')
v_string=''
for val in v_list:
v_string += f'"{val}"'
query["$text"]["$search"]=v_string
else:
query["$text"]["$search"]=v
return query


def get_biosamples(entry_id: Optional[str], qparams: RequestParams, dataset: str):
collection = 'biosamples'
mongo_collection = client.beacon.biosamples
query = apply_request_parameters({}, qparams)
match_list=[]
matching = apply_request_parameters({}, qparams)
match_list.append(matching)
match_big={}
match_big["$match"]=match_list[0]
LOG.debug(qparams.query.filters)
query = apply_filters(query, qparams.query.filters, collection, {})
parameters_as_filters=False
query_parameters, parameters_as_filters = apply_request_parameters({}, qparams)
LOG.debug(query_parameters)
LOG.debug(parameters_as_filters)
if parameters_as_filters == True and query_parameters != {'$and': []}:
query, parameters_as_filters = apply_request_parameters({}, qparams)
query_parameters={}
elif query_parameters != {'$and': []}:
query=query_parameters
elif query_parameters == {'$and': []}:
query_parameters = {}
query={}
query = apply_filters(query, qparams.query.filters, collection, query_parameters)
query = include_resultset_responses(query, qparams)
schema = DefaultSchemas.BIOSAMPLES
#with open("beacon/request/datasets.yml", 'r') as datasets_file:
Expand All @@ -60,8 +51,7 @@ def get_biosamples(entry_id: Optional[str], qparams: RequestParams, dataset: str
def get_biosample_with_id(entry_id: Optional[str], qparams: RequestParams, dataset: str):
collection = 'biosamples'
mongo_collection = client.beacon.biosamples
query = apply_request_parameters({}, qparams)
query = apply_filters(query, qparams.query.filters, collection, {})
query = apply_filters({}, qparams.query.filters, collection, {})
query = query_id(query, entry_id)
query = include_resultset_responses(query, qparams)
schema = DefaultSchemas.BIOSAMPLES
Expand All @@ -80,7 +70,6 @@ def get_variants_of_biosample(entry_id: Optional[str], qparams: RequestParams, d
collection = 'g_variants'
mongo_collection = client.beacon.genomicVariations
query = {"caseLevelData.biosampleId": entry_id}
query = apply_request_parameters(query, qparams)
query = apply_filters(query, qparams.query.filters, collection, {})
query = include_resultset_responses(query, qparams)
schema = DefaultSchemas.GENOMICVARIATIONS
Expand All @@ -100,7 +89,6 @@ def get_analyses_of_biosample(entry_id: Optional[str], qparams: RequestParams, d
collection = 'biosamples'
mongo_collection = client.beacon.analyses
query = {"biosampleId": entry_id}
query = apply_request_parameters(query, qparams)
query = apply_filters(query, qparams.query.filters, collection, {})
query = include_resultset_responses(query, qparams)
LOG.debug(query)
Expand All @@ -120,7 +108,6 @@ def get_runs_of_biosample(entry_id: Optional[str], qparams: RequestParams, datas
collection = 'biosamples'
mongo_collection = client.beacon.runs
query = {"individualId": entry_id}
query = apply_request_parameters(query, qparams)
query = apply_filters(query, qparams.query.filters, collection, {})
query = include_resultset_responses(query, qparams)
schema = DefaultSchemas.RUNS
Expand Down
62 changes: 60 additions & 2 deletions beacon/db/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ def apply_filters(query: dict, filters: List[dict], collection: str, query_param
LOG.debug(total_query)
try:
if len(request_parameters["$or"]) >= 1:
LOG.debug('heeey')
array_of_biosamples2=[]
array_of_biosamples=[]
for reqpam in request_parameters["$or"]:
Expand Down Expand Up @@ -328,7 +329,7 @@ def apply_filters(query: dict, filters: List[dict], collection: str, query_param
total_query["$and"]=[]
total_query["$and"].append(partial_query)
except Exception:
if collection != 'g_variants':
if collection == 'individuals':
partial_query = {}
LOG.debug(request_parameters)
biosample_ids = client.beacon.genomicVariations.find(request_parameters, {"caseLevelData.biosampleId": 1, "_id": 0})
Expand Down Expand Up @@ -364,7 +365,7 @@ def apply_filters(query: dict, filters: List[dict], collection: str, query_param
partial_query['$or']=def_list
if def_list != []:
try:
partial_query['$or'].def_list
partial_query['$or']=def_list
except Exception:
partial_query={}
partial_query['$or']=def_list
Expand All @@ -374,6 +375,62 @@ def apply_filters(query: dict, filters: List[dict], collection: str, query_param
total_query["$and"]=[]
total_query["$and"].append(partial_query)
#LOG.debug(query)
elif collection == 'biosamples':
partial_query = {}
LOG.debug(request_parameters)
biosample_ids = client.beacon.genomicVariations.find(request_parameters, {"caseLevelData.biosampleId": 1, "_id": 0})
LOG.debug(biosample_ids)
final_id='id'
original_id="biosampleId"
def_list=[]
partial_query['$or']=[]
for iditem in biosample_ids:
if isinstance(iditem, dict):
if iditem != {}:
for id_item in iditem['caseLevelData']:
if id_item != {}:
new_id={}
new_id[final_id] = id_item[original_id]
try:
#LOG.debug(new_id)
partial_query['$or'].append(new_id)
except Exception:
def_list.append(new_id)
LOG.debug(partial_query)
try:
total_query["$and"].append(partial_query)
except Exception:
total_query["$and"]=[]
total_query["$and"].append(partial_query)
#LOG.debug(query)
elif collection == 'analyses' or collection == 'runs':
partial_query = {}
LOG.debug(request_parameters)
biosample_ids = client.beacon.genomicVariations.find(request_parameters, {"caseLevelData.biosampleId": 1, "_id": 0})
LOG.debug(biosample_ids)
final_id='biosampleId'
original_id="biosampleId"
def_list=[]
partial_query['$or']=[]
for iditem in biosample_ids:
if isinstance(iditem, dict):
if iditem != {}:
for id_item in iditem['caseLevelData']:
if id_item != {}:
new_id={}
new_id[final_id] = id_item[original_id]
try:
#LOG.debug(new_id)
partial_query['$or'].append(new_id)
except Exception:
def_list.append(new_id)
LOG.debug(partial_query)
try:
total_query["$and"].append(partial_query)
except Exception:
total_query["$and"]=[]
total_query["$and"].append(partial_query)
#LOG.debug(query)
else:
try:
total_query["$and"].append(request_parameters)
Expand Down Expand Up @@ -672,6 +729,7 @@ def apply_alphanumeric_filter(query: dict, filter: AlphanumericFilter, collectio
#LOG.debug(filter.id)
if collection == 'g_variants' and scope != 'individual' and scope != 'run':
if filter.id == "identifiers.genomicHGVSId":
LOG.debug('hoaaaa')
list_chromosomes = ['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22']
dict_regex={}
if filter.value == 'GRCh38':
Expand Down
41 changes: 39 additions & 2 deletions beacon/db/g_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
"geneId": "molecularAttributes.geneIds",
"genomicAlleleShortForm": "identifiers.genomicHGVSId",
"aminoacidChange": "molecularAttributes.aminoacidChanges",
"clinicalRelevance": "caseLevelData.clinicalInterpretations.clinicalRelevance"
"clinicalRelevance": "caseLevelData.clinicalInterpretations.clinicalRelevance",
"mateName": "identifiers.genomicHGVSId"
}

def include_resultset_responses(query: Dict[str, List[dict]], qparams: RequestParams):
Expand Down Expand Up @@ -89,6 +90,8 @@ def apply_request_parameters(query: Dict[str, List[dict]], qparams: RequestParam
for reqparam in qparams.query.request_parameters:
subquery={}
subquery["$and"] = []
subqueryor={}
subqueryor["$or"] = []
for k, v in reqparam.items():
if k == "start":
if isinstance(v, str):
Expand Down Expand Up @@ -120,6 +123,14 @@ def apply_request_parameters(query: Dict[str, List[dict]], qparams: RequestParam
), collection))
except KeyError:
raise web.HTTPNotFound
elif k == "mateName" or k == 'referenceName':
try:
subqueryor["$or"].append(apply_alphanumeric_filter({}, AlphanumericFilter(
id=VARIANTS_PROPERTY_MAP[k],
value='max'+v
), collection))
except KeyError:
raise web.HTTPNotFound
elif k != 'filters':
try:
subquery["$and"].append(apply_alphanumeric_filter({}, AlphanumericFilter(
Expand All @@ -141,8 +152,18 @@ def apply_request_parameters(query: Dict[str, List[dict]], qparams: RequestParam
v_dict['id']=id
qparams.query.filters.append(v_dict)
return query, True
query["$or"].append(subquery)
try:
LOG.debug(subqueryor)
if subqueryor["$or"] != []:
subquery["$and"].append(subqueryor)
except Exception:
pass
query["$or"].append(subquery)
else:
subquery={}
subquery["$and"] = []
subqueryor={}
subqueryor["$or"] = []
for k, v in qparams.query.request_parameters.items():
if k == "start":
if isinstance(v, str):
Expand Down Expand Up @@ -174,6 +195,14 @@ def apply_request_parameters(query: Dict[str, List[dict]], qparams: RequestParam
), collection))
except KeyError:
raise web.HTTPNotFound
elif k == "mateName" or k == 'referenceName':
try:
subqueryor["$or"].append(apply_alphanumeric_filter({}, AlphanumericFilter(
id=VARIANTS_PROPERTY_MAP[k],
value=v
), collection))
except KeyError:
raise web.HTTPNotFound
elif k != 'filters':
try:
query["$and"].append(apply_alphanumeric_filter({}, AlphanumericFilter(
Expand All @@ -195,6 +224,14 @@ def apply_request_parameters(query: Dict[str, List[dict]], qparams: RequestParam
v_dict['id']=id
qparams.query.filters.append(v_dict)
return query, True
try:
LOG.debug(subqueryor)
if subqueryor["$or"] != []:
subquery["$and"].append(subqueryor)
except Exception:
pass
if subquery["$and"] != []:
query["$and"].append(subquery)


return query, False
Expand Down
Loading

0 comments on commit 9ee1246

Please sign in to comment.