Skip to content

Commit

Permalink
Merge pull request #17 from EGA-archive/caseLevelData
Browse files Browse the repository at this point in the history
caseLevelData reimplemented
  • Loading branch information
costero-e authored Nov 29, 2024
2 parents 639a1b5 + e90f157 commit ce4ecdc
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 60 deletions.
14 changes: 5 additions & 9 deletions beacon/connections/mongo/analyses.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,13 @@ def get_variants_of_analysis(self, entry_id: Optional[str], qparams: RequestPara
if bioid == analysis_ids["biosampleId"]:
break
position+=1
if position == len(bioids):
schema = DefaultSchemas.GENOMICVARIATIONS
return schema, 0, -1, None, dataset
position=str(position)
position1="^"+position+","
position2=","+position+","
position3=","+position+"$"
query_cl={ "$or": [
{"biosampleIds": {"$regex": position1}},
{"biosampleIds": {"$regex": position2}},
{"biosampleIds": {"$regex": position3}}
]}
query_cl={ position: "y", "datasetId": dataset}
string_of_ids = client.beacon.caseLevelData \
.find(query_cl, {"id": 1, "_id": 0})
.find(query_cl, {"id": 1, "_id": 0}).limit(qparams.query.pagination.limit).skip(qparams.query.pagination.skip)
HGVSIds=list(string_of_ids)
query={}
queryHGVS={}
Expand Down
14 changes: 5 additions & 9 deletions beacon/connections/mongo/biosamples.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,13 @@ def get_variants_of_biosample(self, entry_id: Optional[str], qparams: RequestPar
if bioid == entry_id:
break
position+=1
if position == len(bioids):
schema = DefaultSchemas.GENOMICVARIATIONS
return schema, 0, -1, None, dataset
position=str(position)
position1="^"+position+","
position2=","+position+","
position3=","+position+"$"
query_cl={ "$or": [
{"biosampleIds": {"$regex": position1}},
{"biosampleIds": {"$regex": position2}},
{"biosampleIds": {"$regex": position3}}
]}
query_cl={ position: "y", "datasetId": dataset}
string_of_ids = client.beacon.caseLevelData \
.find(query_cl, {"id": 1, "_id": 0})
.find(query_cl, {"id": 1, "_id": 0}).limit(qparams.query.pagination.limit).skip(qparams.query.pagination.skip)
HGVSIds=list(string_of_ids)
query={}
queryHGVS={}
Expand Down
46 changes: 23 additions & 23 deletions beacon/connections/mongo/g_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,16 +80,16 @@ def get_biosamples_of_variant(self, entry_id: Optional[str], qparams: RequestPar
HGVSId=HGVSIds[0]["identifiers"]["genomicHGVSId"]
queryHGVSId={"datasetId": dataset, "id": HGVSId}
string_of_ids = client.beacon.caseLevelData \
.find(queryHGVSId, {"biosampleIds": 1, "_id": 0})
.find(queryHGVSId)
targets = client.beacon.targets \
.find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0})
.find({"datasetId": HGVSDataset}, {"biosampleIds": 1, "_id": 0})
targets=list(targets)
list_of_targets=targets[0]["biosampleIds"]
list_of_positions_strings= string_of_ids[0]['biosampleIds'].split(',')
list_of_positions_strings= string_of_ids[0]
biosampleIds=[]
for position in list_of_positions_strings:
if position != '':
biosampleIds.append(list_of_targets[int(position)])
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id':
biosampleIds.append(list_of_targets[int(key)])
finalids=biosampleIds
try:
finalids=[]
Expand Down Expand Up @@ -131,16 +131,16 @@ def get_runs_of_variant(self, entry_id: Optional[str], qparams: RequestParams, d
HGVSId=HGVSIds[0]["identifiers"]["genomicHGVSId"]
queryHGVSId={"datasetId": dataset, "id": HGVSId}
string_of_ids = client.beacon.caseLevelData \
.find(queryHGVSId, {"biosampleIds": 1, "_id": 0})
.find(queryHGVSId)
targets = client.beacon.targets \
.find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0})
.find({"datasetId": HGVSDataset}, {"biosampleIds": 1, "_id": 0})
targets=list(targets)
list_of_targets=targets[0]["biosampleIds"]
list_of_positions_strings= string_of_ids[0]['biosampleIds'].split(',')
list_of_positions_strings= string_of_ids[0]
biosampleIds=[]
for position in list_of_positions_strings:
if position != '':
biosampleIds.append(list_of_targets[int(position)])
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id':
biosampleIds.append(list_of_targets[int(key)])
try:
finalids=[]
for bioid in biosampleIds:
Expand Down Expand Up @@ -181,16 +181,16 @@ def get_analyses_of_variant(self, entry_id: Optional[str], qparams: RequestParam
HGVSId=HGVSIds[0]["identifiers"]["genomicHGVSId"]
queryHGVSId={"datasetId": dataset, "id": HGVSId}
string_of_ids = client.beacon.caseLevelData \
.find(queryHGVSId, {"biosampleIds": 1, "_id": 0})
.find(queryHGVSId)
targets = client.beacon.targets \
.find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0})
.find({"datasetId": HGVSDataset}, {"biosampleIds": 1, "_id": 0})
targets=list(targets)
list_of_targets=targets[0]["biosampleIds"]
list_of_positions_strings= string_of_ids[0]['biosampleIds'].split(',')
list_of_positions_strings= string_of_ids[0]
biosampleIds=[]
for position in list_of_positions_strings:
if position != '':
biosampleIds.append(list_of_targets[int(position)])
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id':
biosampleIds.append(list_of_targets[int(key)])
try:
finalids=[]
for bioid in biosampleIds:
Expand Down Expand Up @@ -231,16 +231,16 @@ def get_individuals_of_variant(self, entry_id: Optional[str], qparams: RequestPa
HGVSId=HGVSIds[0]["identifiers"]["genomicHGVSId"]
queryHGVSId={"datasetId": HGVSDataset, "id": HGVSId}
string_of_ids = client.beacon.caseLevelData \
.find(queryHGVSId, {"biosampleIds": 1, "_id": 0})
.find(queryHGVSId)
targets = client.beacon.targets \
.find({"datasetId": HGVSDataset}, {"biosampleIds": 1, "_id": 0})
targets=list(targets)
list_of_targets=targets[0]["biosampleIds"]
list_of_positions_strings= string_of_ids[0]['biosampleIds'].split(',')
list_of_positions_strings= string_of_ids[0]
biosampleIds=[]
for position in list_of_positions_strings:
if position != '':
biosampleIds.append(list_of_targets[int(position)])
for key, value in list_of_positions_strings.items():
if key != 'datasetId' and key != 'id' and key != '_id':
biosampleIds.append(list_of_targets[int(key)])
try:
finalquery={}
finalquery["$or"]=[]
Expand Down
14 changes: 5 additions & 9 deletions beacon/connections/mongo/individuals.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,13 @@ def get_variants_of_individual(self, entry_id: Optional[str], qparams: RequestPa
if bioid == entry_id:
break
position+=1
if position == len(bioids):
schema = DefaultSchemas.GENOMICVARIATIONS
return schema, 0, -1, None, dataset
position=str(position)
position1="^"+position+","
position2=","+position+","
position3=","+position+"$"
query_cl={ "$or": [
{"biosampleIds": {"$regex": position1}},
{"biosampleIds": {"$regex": position2}},
{"biosampleIds": {"$regex": position3}}
]}
query_cl={ position: "y", "datasetId": dataset}
string_of_ids = client.beacon.caseLevelData \
.find(query_cl, {"id": 1, "_id": 0})
.find(query_cl, {"id": 1, "_id": 0}).limit(qparams.query.pagination.limit).skip(qparams.query.pagination.skip)
HGVSIds=list(string_of_ids)
query={}
queryHGVS={}
Expand Down
1 change: 1 addition & 0 deletions beacon/connections/mongo/reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
#client.beacon.genomicVariations.create_index([("datasetId", 1), ("variation.location.interval.start.value", 1), ("variation.referenceBases", 1), ("variation.alternateBases", 1)])
client.beacon.genomicVariations.create_index([("molecularAttributes.geneIds", 1), ("variation.variantType", 1)])
client.beacon.caseLevelData.create_index([("id", 1), ("datasetId", 1)])
client.beacon.caseLevelData.create_index([("datasetId", 1)])
#client.beacon.individuals.create_index([("$**", "text")])
#client.beacon.runs.create_index([("$**", "text")])
#collection_name = client.beacon.analyses
Expand Down
14 changes: 5 additions & 9 deletions beacon/connections/mongo/runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,17 +63,13 @@ def get_variants_of_run(self, entry_id: Optional[str], qparams: RequestParams, d
if bioid == run_ids["biosampleId"]:
break
position+=1
if position == len(bioids):
schema = DefaultSchemas.GENOMICVARIATIONS
return schema, 0, -1, None, dataset
position=str(position)
position1="^"+position+","
position2=","+position+","
position3=","+position+"$"
query_cl={ "$or": [
{"biosampleIds": {"$regex": position1}},
{"biosampleIds": {"$regex": position2}},
{"biosampleIds": {"$regex": position3}}
]}
query_cl={ position: "y", "datasetId": dataset}
string_of_ids = client.beacon.caseLevelData \
.find(query_cl, {"id": 1, "_id": 0})
.find(query_cl, {"id": 1, "_id": 0}).limit(qparams.query.pagination.limit).skip(qparams.query.pagination.skip)
HGVSIds=list(string_of_ids)
query={}
queryHGVS={}
Expand Down
2 changes: 1 addition & 1 deletion ri-tools/conf/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#### VCF Conversion config parameters ####
allele_frequency=1 # introduce float number, leave 1 if you want to convert all the variants
reference_genome='GRCh37' # Choose one between NCBI36, GRCh37, GRCh38
datasetId='COVID_pop11_fin_2'
datasetId='CINECA_synthetic_cohort_EUROPE_UK1'
case_level_data=True
num_rows=7000000

Expand Down

0 comments on commit ce4ecdc

Please sign in to comment.