From 928173c9144393dee2d2b8d22fab51ffab024f2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Tue, 23 Jun 2020 21:11:46 -0400 Subject: [PATCH 01/54] increase line limit --- setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index bdd62b1db5..93fe587234 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,8 +1,8 @@ [flake8] -max-line-length = 100 +max-line-length = 120 [coverage:run] branch = True -omit = +omit = */encode_schemas/* */encode_types/* */tests/* From d35a36bb29fc1f462e82ccb0f4c28bd7c6549c8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Tue, 23 Jun 2020 21:12:05 -0400 Subject: [PATCH 02/54] new relationship calc prop --- src/encoded/types/family.py | 384 ++++++++++++++++++++++++++++++++++++ 1 file changed, 384 insertions(+) diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py index a11897eaf0..39cfc705ff 100644 --- a/src/encoded/types/family.py +++ b/src/encoded/types/family.py @@ -104,6 +104,388 @@ class Family(Item): "analysis_groups.completed_processes", ] + @calculated_property(schema={ + "title": "Relationships", + "description": "Relationships to proband.", + "type": "array", + "items": { + "title": "Relation", + "type": "object", + "properties": { + "individual": { + "title": "Individual", + "type": "string" + }, + "association": { + "title": "Individual", + "type": "string", + "enum": [ + "paternal", + "maternal" + ] + }, + "sex": { + "title": "Individual", + "type": "string", + "enum": [ + "F", + "M", + "U" + ] + }, + "relationship": { + "title": "Relationship", + "type": "string", + "enum": ['proband', + 'father', + 'mother', + 'brother', + 'sister', + 'sibling', + 'half-brother', + 'half-sister', + 'half-sibling', + 'wife', + 'husband', + 'grandson', + 'granddaughter', + 'grandchild', + 'grandmother', + 'grandfather', + 'great-grandmother', + 'great-grandfather', + 'great-great-grandmother', + 'great-great-grandfather', + 'nephew', + 'niece', + 'nibling', + 'grandnephew', + 'grandniece', + 'grandnibling', + 'uncle', + 'aunt', + 'auncle', + 'granduncle', + 'grandaunt', + 'grandauncle', + 'cousin', + 'cousin once removed (descendant)', + 'cousin twice removed (descendant)', + 'cousin once removed (ascendant)', + 'second cousin', + 'second cousin once removed (descendant)', + 'second cousin twice removed (descendant)', + 'family-in-law', + 'extended-family' + ] + } + } + } + }) + def relationships(self, request, proband=None, members=None): + """Calculate relationships""" + def generate_ped(all_props, proband, family_id): + """Format family information into ped file + https://gatk.broadinstitute.org/hc/en-us/articles/360035531972 + This might be useful in the future for compatibility + Ped file columns are + *Family ID + *Individual ID + *Paternal ID + *Maternal ID + *Sex (1=male; 2=female; U=unknown) + *Phenotype (-9 missing 0 missing 1 unaffected 2 affected) + (at the moment only on proband has 2 on phenotype) + """ + ped_content = """""" + gender_map = {'M': '1', 'F': '2', 'U': '3'} + for props in all_props: + # all members have unknown phenotype by default + phenotype = '0' + member_id = props['accession'] + + def parent_id(properties, field): + 'extract parent accession from member info' + id = properties.get(field) + if id: + return id.split('/')[2] + else: + return '' + paternal_id = parent_id(props, 'father') + maternal_id = parent_id(props, 'mother') + sex = props.get('sex', 'U') + ped_sex = gender_map.get(sex, 'U') + # if member is proband, add phenotupe + if props['@id'] == proband: + phenotype = '2' + line_ele = [family_id, member_id, paternal_id, maternal_id, ped_sex, phenotype] + ped_content += '\t'.join(line_ele) + '\n' + return ped_content + + def extract_vectors(ped_content): + """given a ped file content, extract all primary relationship pairs + keys are listed in primary_vectors""" + fathers = [] + mothers = [] + daughters = [] + sons = [] + children = [] # when the gender of the kid is not known + for a_line in ped_content.split('\n'): + if not a_line: + continue + fam, ind, father, mother, sex, ph = a_line.split('\t') + if father: + fathers.append([father, ind]) + if sex == '1': + sons.append([ind, father]) + elif sex == '2': + daughters.append([ind, father]) + else: + children.append([ind, father]) + if mother: + mothers.append([mother, ind]) + if sex == '1': + sons.append([ind, mother]) + elif sex == '2': + daughters.append([ind, mother]) + else: + children.append([ind, mother]) + primary_vectors = { + 'fathers': fathers, + 'mothers': mothers, + 'daughters': daughters, + 'sons': sons, + 'children': children # when the gender of the kid is not known + } + return primary_vectors + + def construct_links(primary_vectors, seed): + """Given the primary vectors, constructs linkages for each individual + and filters for the shortest link + Use first letter of primary vector keys to construct these links + This linkages are calcualted from the seed, often starts with proband + seed should be accession""" + # starting pack + needs_analysis = [[seed, 'p'], ] + analyzed = [] + all_links = {seed: ['p', ]} + # loop overy every set of new collected individuals + while needs_analysis: + collect_connected = [] + for an_ind, starting_tag in needs_analysis: + if an_ind in analyzed: + continue + analyzed.append(an_ind) + if an_ind not in all_links: + print('should not happen') + for a_key in primary_vectors: + # extend the link list with this letter + extend_tag = a_key[0] + my_links = [i for i in primary_vectors[a_key] if i[1] == an_ind] + for a_link in my_links: + linked_ind = a_link[0] + new_tag = starting_tag + '-' + extend_tag + if linked_ind not in all_links: + all_links[linked_ind] = [new_tag, ] + else: + all_links[linked_ind].append(new_tag) + if linked_ind not in analyzed: + collect_connected.append([linked_ind, new_tag]) + needs_analysis = collect_connected + + # return the shortest links + def return_shortest(a_list): + """filter list for shortest items.""" + a_list = list(set(a_list)) + minimum = min(map(len, a_list)) + return [i for i in a_list if len(i) == minimum] + filtered_links = {} + for individual in all_links: + filtered_links[individual] = return_shortest(all_links[individual]) + return filtered_links + + def relationships_vocabulary(links): + """Convert links to relationships. + Nomenclature guided by + https://www.devonfhs.org.uk/pdfs/tools/eichhorn-rlationship-chart.pdf""" + # return a nested list of [acc, calculated_relation, association] + Converter = { + "p": "proband", + "p-f": "father", "p-m": "mother", "p-d": "daughter", "p-s": "son", "p-c": "child", + "p-f-s": "brother", "p-m-s": "brother", + "p-f-d": "sister", "p-m-d": "sister", + "p-f-c": "sibling", "p-m-c": "sibling", + "p-d-m": "wife", "p-s-m": "wife", "p-c-m": "wife", + "p-d-f": "husband", "p-s-f": "husband", "p-c-f": "husband", + } + # add grandchildren + all_children = [i for i in Converter if Converter[i] in ['daughter', 'son', 'child']] + for child in all_children: + Converter[child + '-s'] = 'grandson' + Converter[child + '-d'] = 'granddaughter' + Converter[child + '-c'] = 'grandchild' + # add niece nephew nibling (we can also add sister brother in law here but will skip non blood relatives) + all_siblings = [i for i in Converter if Converter[i] in ['brother', 'sister', 'sibling']] + for sib in all_siblings: + Converter[sib + '-s'] = 'nephew' + Converter[sib + '-d'] = 'niece' + Converter[sib + '-c'] = 'nibling' + # add grand niece nephew nibling + all_niblings = [i for i in Converter if Converter[i] in ['nephew', 'niece', 'nibling']] + for nib in all_niblings: + Converter[nib + '-s'] = 'grandnephew' + Converter[nib + '-d'] = 'grandniece' + Converter[nib + '-c'] = 'grandnibling' + # add Grandparents + all_parents = [i for i in Converter if Converter[i] in ['mother', 'father']] + for parent in all_parents: + Converter[parent + '-m'] = 'grandmother' + Converter[parent + '-f'] = 'grandfather' + # add Great-grandparents Uncle Aunt Auncle + all_g_parents = [i for i in Converter if Converter[i] in ['grandmother', 'grandfather']] + for g_parent in all_g_parents: + Converter[g_parent + '-m'] = 'great-grandmother' + Converter[g_parent + '-f'] = 'great-grandfather' + Converter[g_parent + '-s'] = 'uncle' + Converter[g_parent + '-d'] = 'aunt' + Converter[g_parent + '-c'] = 'auncle' + # add Great-great-grandparents granduncle grandaunt grandauncle + all_gg_parents = [i for i in Converter if Converter[i] in ['great-grandmother', 'great-grandfather']] + for gg_parent in all_gg_parents: + Converter[gg_parent + '-m'] = 'great-great-grandmother' + Converter[gg_parent + '-f'] = 'great-great-grandfather' + Converter[gg_parent + '-s'] = 'granduncle' + Converter[gg_parent + '-d'] = 'grandaunt' + Converter[gg_parent + '-c'] = 'grandauncle' + # add Cousin + all_auncle = [i for i in Converter if Converter[i] in ['uncle', 'aunt', 'auncle']] + for auncle in all_auncle: + Converter[auncle + '-s'] = 'cousin' + Converter[auncle + '-d'] = 'cousin' + Converter[auncle + '-c'] = 'cousin' + # add Cousin once removed (descendant) + all_cousins = [i for i in Converter if Converter[i] in ['cousin']] + for cousin in all_cousins: + Converter[cousin + '-s'] = 'cousin once removed (descendant)' + Converter[cousin + '-d'] = 'cousin once removed (descendant)' + Converter[cousin + '-c'] = 'cousin once removed (descendant)' + # add Cousin twice removed (descendant) + all_cousins_o_r = [i for i in Converter if Converter[i] in ['cousin once removed (descendant)']] + for cousin in all_cousins_o_r: + Converter[cousin + '-s'] = 'cousin twice removed (descendant)' + Converter[cousin + '-d'] = 'cousin twice removed (descendant)' + Converter[cousin + '-c'] = 'cousin twice removed (descendant)' + # add First cousin once removed (ascendant) + all_g_auncle = [i for i in Converter if Converter[i] in ['granduncle', 'grandaunt', 'grandauncle']] + for g_auncle in all_g_auncle: + Converter[g_auncle + '-s'] = 'cousin once removed (ascendant)' + Converter[g_auncle + '-d'] = 'cousin once removed (ascendant)' + Converter[g_auncle + '-c'] = 'cousin once removed (ascendant)' + # add Second Cousin + all_cora = [i for i in Converter if Converter[i] in ['cousin once removed (ascendant)']] + for cora in all_cora: + Converter[cora + '-s'] = 'second cousin' + Converter[cora + '-d'] = 'second cousin' + Converter[cora + '-c'] = 'second cousin' + # add Second Cousin once removed + all_s_cousins = [i for i in Converter if Converter[i] in ['second cousin']] + for s_cousin in all_s_cousins: + Converter[s_cousin + '-s'] = 'second cousin once removed (descendant)' + Converter[s_cousin + '-d'] = 'second cousin once removed (descendant)' + Converter[s_cousin + '-c'] = 'second cousin once removed (descendant)' + # add Second Cousin twice removed + all_s_cousins_o_r = [i for i in Converter if Converter[i] in ['second cousin once removed (descendant)']] + for s_cousin_o_r in all_s_cousins_o_r: + Converter[s_cousin_o_r + '-s'] = 'second cousin twice removed (descendant)' + Converter[s_cousin_o_r + '-d'] = 'second cousin twice removed (descendant)' + Converter[s_cousin_o_r + '-c'] = 'second cousin twice removed (descendant)' + + # calculate direction change (if more then 2, not blood relative) + def count_direction_change(relation_tag): + """If you are going down from proband, you need to keep going down + If you are going up from proband, you can change direction once + If you are out of these cases, you are not blood relative + We make an exception for the Husband and Wife""" + up = ['f', 'm'] + down = ['d', 's', 'c'] + state = 1 + changes = 0 + for a_letter in relation_tag.split('-'): + if a_letter in up: + new_state = 1 + elif a_letter in down: + new_state = -1 + else: # p + continue + if state == new_state: + continue + else: + state = new_state + changes += 1 + return changes + + relations = [] + for i in links: + association = '' + val = links[i][0] + if val in Converter: + relation = Converter[val] + # calculate half relation for siblings + if relation in ['sister', 'brother', 'sibling']: + # if they are full siblings, they should carry two link of same size + # if not, they are half + if len(links[i]) == 1: + relation = 'half-' + relation + # for extended family calculate paternal/maternal + if len(val) > 4: + # calculate for family that starts by going above 2 levels + if val[2:5] in ['m-m', 'm-f', 'f-f', 'f-m']: + association_pointer = val[2] + if association_pointer == 'f': + association = 'paternal' + elif association_pointer == 'm': + association = 'maternal' + else: + dir_change = count_direction_change(val) + if dir_change > 1: + relation = 'family-in-law' + else: + relation = 'extended-family' + relations.append([i, relation, association]) + return relations + + # Start of the function + # empty list to accumulate results + relations = [] + # we need both the proband and the members to calculate + if not proband or not members: + return relations + family_id = self.properties['accession'] + # collect members properties + all_props = [] + for a_member in members: + props = get_item_or_none(request, a_member, 'individuals') + all_props.append(props) + # convert to ped_file format + ped_text = generate_ped(all_props, proband, family_id) + primary_vectors = extract_vectors(ped_text) + proband_acc = proband.split('/')[2] + links = construct_links(primary_vectors, proband_acc) + relations = relationships_vocabulary(links) + results = [] + for rel in relations: + temp = {"individual": '', + "sex": '', + "relationship": ''} + temp['individual'] = rel[0] + temp['relationship'] = rel[1] + if rel[2]: + temp['association'] = rel[2] + sex = [i for i in all_props if i['accession'] == rel[0]][0].get('sex', 'U') + temp['sex'] = sex + results.append(temp) + return results + def get_parents(self, request, proband=None, members=None): parents = [] if proband and members: @@ -325,6 +707,8 @@ def cousins(self, request, proband=None, members=None): return csns + + @view_config(name='process-pedigree', context=Family, request_method='PATCH', permission='edit') @debug_log From 86959d309762c5b0d17863df6000c2f5511f75ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Tue, 23 Jun 2020 21:37:50 -0400 Subject: [PATCH 03/54] add family to case schema --- src/encoded/schemas/case.json | 6 ++++++ src/encoded/tests/data/inserts/case.json | 1 + 2 files changed, 7 insertions(+) diff --git a/src/encoded/schemas/case.json b/src/encoded/schemas/case.json index a3036b9e04..cc6263a29a 100644 --- a/src/encoded/schemas/case.json +++ b/src/encoded/schemas/case.json @@ -33,6 +33,12 @@ "description": "Description of the case", "type": "string" }, + "family": { + "title": "Family", + "description": "Primary family this case is associated with", + "type": "string", + "linkTo": "Family" + }, "individual": { "title": "Individual", "description": "The individual who comprises the case", diff --git a/src/encoded/tests/data/inserts/case.json b/src/encoded/tests/data/inserts/case.json index 79a4be069c..bbeec1859e 100644 --- a/src/encoded/tests/data/inserts/case.json +++ b/src/encoded/tests/data/inserts/case.json @@ -6,6 +6,7 @@ "status": "released", "uuid": "ad9883a2-6886-4ca0-9402-7c49734c12fa", "sample_processing": "4fdb481a-fbdb-4c0f-a68d-aac87f847a67", + "family": "aa7d83a2-6886-4ca0-9402-7c49734ce3d4", "individual": "5ec91041-78a0-4758-abef-21c7f5fd9f12" } ] From eac8e989dfb42952abfbdd6ef28d7f820fc312dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Tue, 23 Jun 2020 21:43:54 -0400 Subject: [PATCH 04/54] revlink for cases on family --- src/encoded/types/family.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py index 39cfc705ff..089d5dd80b 100644 --- a/src/encoded/types/family.py +++ b/src/encoded/types/family.py @@ -104,6 +104,21 @@ class Family(Item): "analysis_groups.completed_processes", ] + @calculated_property(schema={ + "title": "Cases", + "description": "Cases for this family", + "type": "array", + "items": { + "title": "Case", + "type": "string", + "linkTo": "Case" + } + }) + def case(self, request): + rs = self.rev_link_atids(request, "case") + if rs: + return rs + @calculated_property(schema={ "title": "Relationships", "description": "Relationships to proband.", From f31a13b1ee1a6372fca9e9164b7db742e026ca1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Tue, 23 Jun 2020 22:27:21 -0400 Subject: [PATCH 05/54] embed family in individual --- src/encoded/types/case.py | 1 + src/encoded/types/family.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/encoded/types/case.py b/src/encoded/types/case.py index 13f222d989..9e3be7ff41 100644 --- a/src/encoded/types/case.py +++ b/src/encoded/types/case.py @@ -69,6 +69,7 @@ class Case(Item): "individual.samples.files.quality_metric.url", "individual.samples.files.quality_metric.status", "individual.samples.completed_processes", + "individual.families.uuid", "sample_processing.analysis_type", "sample_processing.last_modified.*", "sample_processing.families.family_id", diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py index 089d5dd80b..d54cbbee47 100644 --- a/src/encoded/types/family.py +++ b/src/encoded/types/family.py @@ -32,7 +32,9 @@ class Family(Item): item_type = 'family' name_key = 'accession' schema = load_schema('encoded:schemas/family.json') - rev = {'sample_procs': ('SampleProcessing', 'families')} + rev = {'sample_procs': ('SampleProcessing', 'families'), + 'case': ('Case', 'family')} + embedded_list = [ "members.accession", "members.father", @@ -140,7 +142,7 @@ def case(self, request): ] }, "sex": { - "title": "Individual", + "title": "Sex", "type": "string", "enum": [ "F", From 6a0657f2dd1629a800d08fe379c1df2f1f04797b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Wed, 24 Jun 2020 15:36:21 -0400 Subject: [PATCH 06/54] embed family accession in case --- src/encoded/types/case.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/encoded/types/case.py b/src/encoded/types/case.py index 9e3be7ff41..6e9ee01157 100644 --- a/src/encoded/types/case.py +++ b/src/encoded/types/case.py @@ -107,7 +107,8 @@ class Case(Item): "sample_processing.sample_processed_files.processed_files.quality_metric.status", "sample_processing.completed_processes", "report.last_modified.*", - "report.status" + "report.status", + "family.accession" ] @calculated_property(schema={ From 5b729e95085a7f74a4a3b1ba99a876c79e23eb58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Mon, 29 Jun 2020 19:46:51 -0400 Subject: [PATCH 07/54] add comment --- src/encoded/types/family.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py index d54cbbee47..3efefd5a3c 100644 --- a/src/encoded/types/family.py +++ b/src/encoded/types/family.py @@ -481,6 +481,7 @@ def count_direction_change(relation_tag): # collect members properties all_props = [] for a_member in members: + # This might be a step to optimize if families get larger props = get_item_or_none(request, a_member, 'individuals') all_props.append(props) # convert to ped_file format From 7e1ed8e18f1f060e69d6852b0b31aef814ab2b70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Mon, 29 Jun 2020 21:26:07 -0400 Subject: [PATCH 08/54] add nexus (cohort) --- setup.cfg | 2 +- src/encoded/schemas/family.json | 6 +++++ src/encoded/schemas/nexus.json | 48 +++++++++++++++++++++++++++++++++ src/encoded/types/__init__.py | 23 ++++++++++++++++ src/encoded/types/cohort.py | 10 +++---- src/encoded/types/family.py | 10 +++---- 6 files changed, 87 insertions(+), 12 deletions(-) create mode 100644 src/encoded/schemas/nexus.json diff --git a/setup.cfg b/setup.cfg index 93fe587234..a250917006 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [flake8] -max-line-length = 120 +max-line-length = 160 [coverage:run] branch = True omit = diff --git a/src/encoded/schemas/family.json b/src/encoded/schemas/family.json index 50a6184d7b..4896ce8f3f 100644 --- a/src/encoded/schemas/family.json +++ b/src/encoded/schemas/family.json @@ -88,6 +88,12 @@ "title": "Description", "description": "Description of the family", "type": "string" + }, + "cohort": { + "title" : "Cohort", + "description": "Cohort of the family", + "type" : "string", + "linkTo": "Nexus" } } } diff --git a/src/encoded/schemas/nexus.json b/src/encoded/schemas/nexus.json new file mode 100644 index 0000000000..c10dc8d8ce --- /dev/null +++ b/src/encoded/schemas/nexus.json @@ -0,0 +1,48 @@ +{ + "title": "Cohort Case Collection", + "id": "/profiles/nexus.json", + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "required": ["title", "project", "institution"], + "identifyingProperties": ["uuid", "accession", "aliases"], + "additionalProperties": false, + "mixinProperties": [ + { "$ref": "mixins.json#/schema_version" }, + { "$ref": "mixins.json#/accession" }, + { "$ref": "mixins.json#/aliases" }, + { "$ref": "mixins.json#/uuid" }, + { "$ref": "mixins.json#/submitted" }, + { "$ref": "mixins.json#/modified" }, + { "$ref": "mixins.json#/status" }, + { "$ref": "mixins.json#/attribution" } + ], + "properties": { + "schema_version": { + "default": "1" + }, + "accession" : { + "accessionType" : "NE" + }, + "title": { + "title": "Title", + "description": "Title for this Cohort", + "type": "string" + }, + "description": { + "title": "Description", + "description": "Description of the cohort", + "type": "string" + }, + "pipeline": { + "title": "Pipeline", + "description": "Which pipeline to run on this cohort families", + "type": "string" + }, + "filter_set": { + "title": "Default Filter Set", + "description": "Default filter set that will be applied to cases of this cohort", + "type": "string", + "linkTo": "FilterSet" + } + } +} diff --git a/src/encoded/types/__init__.py b/src/encoded/types/__init__.py index 5c7b6b755b..f25d3496b2 100644 --- a/src/encoded/types/__init__.py +++ b/src/encoded/types/__init__.py @@ -308,3 +308,26 @@ class AnnotationField(Item): }) def display_title(self, field_name): return field_name + + +@collection( + name='nexuses', + unique_key='accession', + properties={ + 'title': 'Cohorts', + 'description': 'List of Cohorts', + }) +class Nexus(Item): + """Class for Cohorts.""" + item_type = 'nexus' + name_key = 'accession' + schema = load_schema('encoded:schemas/nexus.json') + embedded_list = [] + + @calculated_property(schema={ + "title": "Display Title", + "description": "A calculated title for every object in 4DN", + "type": "string" + }) + def display_title(self, title): + return title diff --git a/src/encoded/types/cohort.py b/src/encoded/types/cohort.py index d48069ff86..ee7edc501a 100644 --- a/src/encoded/types/cohort.py +++ b/src/encoded/types/cohort.py @@ -113,7 +113,7 @@ def display_title(self, title): @calculated_property(schema={ "title": "Phenotypic features", "description": "Phenotypic features that define the cohort", - "type" : "array", + "type": "array", "items": { "title": "Phenotypic feature", "type": "string", @@ -226,7 +226,7 @@ def process_pedigree(context, request): xml_extra = {'ped_datetime': ped_datetime} family_uuids = create_family_proband(testapp, xml_data, refs, 'managedObjectID', - cohort, post_extra, xml_extra) + cohort, post_extra, xml_extra) # create Document for input pedigree file # pbxml files are not handled by default. Do some mimetype processing @@ -291,7 +291,7 @@ def process_pedigree(context, request): ##################################### -### Pedigree processing functions ### +# ## Pedigree processing functions ### ##################################### @@ -748,7 +748,7 @@ def create_family_proband(testapp, xml_data, refs, ref_field, cohort, if ref_val is not None and 'xml_ref_fxn' in converted_dict: # will update data in place converted_dict['xml_ref_fxn'](testapp, ref_val, refs, data, - cohort, uuids_by_ref) + cohort, uuids_by_ref) elif ref_val is not None: data[converted_dict['corresponds_to']] = uuids_by_ref[ref_val] @@ -797,7 +797,7 @@ def create_family_proband(testapp, xml_data, refs, ref_field, cohort, # invert uuids_by_ref to sort family members by managedObjectID (xml ref) refs_by_uuid = {v: k for k, v in uuids_by_ref.items()} family = {'members': sorted([m['uuid'] for m in family_members.values()], - key=lambda v: int(refs_by_uuid[v]))} + key=lambda v: int(refs_by_uuid[v]))} if proband and proband in family_members: family['proband'] = family_members[proband]['uuid'] else: diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py index 3efefd5a3c..1fd844341b 100644 --- a/src/encoded/types/family.py +++ b/src/encoded/types/family.py @@ -725,8 +725,6 @@ def cousins(self, request, proband=None, members=None): return csns - - @view_config(name='process-pedigree', context=Family, request_method='PATCH', permission='edit') @debug_log @@ -818,7 +816,7 @@ def process_pedigree(context, request): xml_extra = {'ped_datetime': ped_datetime} family_uuids = create_family_proband(testapp, xml_data, refs, 'managedObjectID', - family_item, post_extra, xml_extra) + family_item, post_extra, xml_extra) # create Document for input pedigree file # pbxml files are not handled by default. Do some mimetype processing @@ -883,7 +881,7 @@ def process_pedigree(context, request): ##################################### -### Pedigree processing functions ### +# ## Pedigree processing functions ### ##################################### @@ -1340,7 +1338,7 @@ def create_family_proband(testapp, xml_data, refs, ref_field, family_item, if ref_val is not None and 'xml_ref_fxn' in converted_dict: # will update data in place converted_dict['xml_ref_fxn'](testapp, ref_val, refs, data, - family_item, uuids_by_ref) + family_item, uuids_by_ref) elif ref_val is not None: data[converted_dict['corresponds_to']] = uuids_by_ref[ref_val] @@ -1389,7 +1387,7 @@ def create_family_proband(testapp, xml_data, refs, ref_field, family_item, # invert uuids_by_ref to sort family members by managedObjectID (xml ref) refs_by_uuid = {v: k for k, v in uuids_by_ref.items()} family = {'members': sorted([m['uuid'] for m in family_members.values()], - key=lambda v: int(refs_by_uuid[v]))} + key=lambda v: int(refs_by_uuid[v]))} if proband and proband in family_members: family['proband'] = family_members[proband]['uuid'] else: From 21119e2729d82d970d8bb80f69879cfec7bed6c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Mon, 29 Jun 2020 21:33:36 -0400 Subject: [PATCH 09/54] cohort links from cases --- src/encoded/schemas/case.json | 6 ++++++ src/encoded/schemas/family.json | 6 ------ src/encoded/tests/data/inserts/nexus.json | 12 ++++++++++++ 3 files changed, 18 insertions(+), 6 deletions(-) create mode 100644 src/encoded/tests/data/inserts/nexus.json diff --git a/src/encoded/schemas/case.json b/src/encoded/schemas/case.json index cc6263a29a..69fa873081 100644 --- a/src/encoded/schemas/case.json +++ b/src/encoded/schemas/case.json @@ -56,6 +56,12 @@ "description": "The report generated from this case", "type": "string", "linkTo": "Report" + }, + "cohort": { + "title" : "Cohort", + "description": "Cohort of the family", + "type" : "string", + "linkTo": "Nexus" } } } diff --git a/src/encoded/schemas/family.json b/src/encoded/schemas/family.json index 4896ce8f3f..50a6184d7b 100644 --- a/src/encoded/schemas/family.json +++ b/src/encoded/schemas/family.json @@ -88,12 +88,6 @@ "title": "Description", "description": "Description of the family", "type": "string" - }, - "cohort": { - "title" : "Cohort", - "description": "Cohort of the family", - "type" : "string", - "linkTo": "Nexus" } } } diff --git a/src/encoded/tests/data/inserts/nexus.json b/src/encoded/tests/data/inserts/nexus.json new file mode 100644 index 0000000000..781a40e559 --- /dev/null +++ b/src/encoded/tests/data/inserts/nexus.json @@ -0,0 +1,12 @@ +[ + { + "title": "Rare Disease Trio Cohort", + "project": "12a92962-8265-4fc0-b2f8-cf14f05db58b", + "institution": "hms-dbmi", + "status": "released", + "uuid": "cc7d83a2-6886-4ca0-9402-7c49734cf111", + "description": "Cohort for test institute Trio cases of rare diseases", + "filter_set": "dcf15d5e-40aa-43bc-b81c-32c70c9afa99", + "pipeline": "Efficient Trio Analysis V10" + } +] From 3a2eabdea8a73b2f6c80ba7e6f431fefd4fabc75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Mon, 29 Jun 2020 21:35:47 -0400 Subject: [PATCH 10/54] embed filter_set in case --- src/encoded/tests/data/inserts/case.json | 3 ++- src/encoded/types/case.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/encoded/tests/data/inserts/case.json b/src/encoded/tests/data/inserts/case.json index bbeec1859e..3f51ed187e 100644 --- a/src/encoded/tests/data/inserts/case.json +++ b/src/encoded/tests/data/inserts/case.json @@ -7,6 +7,7 @@ "uuid": "ad9883a2-6886-4ca0-9402-7c49734c12fa", "sample_processing": "4fdb481a-fbdb-4c0f-a68d-aac87f847a67", "family": "aa7d83a2-6886-4ca0-9402-7c49734ce3d4", - "individual": "5ec91041-78a0-4758-abef-21c7f5fd9f12" + "individual": "5ec91041-78a0-4758-abef-21c7f5fd9f12", + "cohort": "cc7d83a2-6886-4ca0-9402-7c49734cf111" } ] diff --git a/src/encoded/types/case.py b/src/encoded/types/case.py index 6e9ee01157..a1b92e61e5 100644 --- a/src/encoded/types/case.py +++ b/src/encoded/types/case.py @@ -108,7 +108,8 @@ class Case(Item): "sample_processing.completed_processes", "report.last_modified.*", "report.status", - "family.accession" + "family.accession", + "cohort.filter_set.title" ] @calculated_property(schema={ From 83762e9230b344039df34ac6cd0cd7e35dffcdcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Mon, 29 Jun 2020 21:40:51 -0400 Subject: [PATCH 11/54] data fixtures --- src/encoded/tests/datafixtures.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/encoded/tests/datafixtures.py b/src/encoded/tests/datafixtures.py index 232207c036..8643ed7be6 100644 --- a/src/encoded/tests/datafixtures.py +++ b/src/encoded/tests/datafixtures.py @@ -3,10 +3,11 @@ ORDER = [ - 'user', 'project', 'institution', 'file_format', 'variant_consequence', 'phenotype', + 'user', 'project', 'institution', 'filter_set', 'nexus', 'file_format', + 'variant_consequence', 'phenotype', 'cohort', 'family', 'individual', 'sample', 'workflow', 'access_key', 'disorder', 'document', 'file_fastq', 'file_processed', 'file_reference', 'gene', 'sample_processing', - 'case', 'filter_set', 'report', 'page', 'quality_metric_fastqc', 'evidence_dis_pheno', + 'case', 'report', 'page', 'quality_metric_fastqc', 'evidence_dis_pheno', 'quality_metric_bamcheck', 'quality_metric_qclist', 'quality_metric_wgs_bamqc', 'quality_metric_cmphet', 'quality_metric_vcfcheck', 'quality_metric_workflowrun', 'software', 'static_section', From 22534d6f2ad6dfb29da201a4d92b05d5d66edac7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Mon, 29 Jun 2020 21:44:04 -0400 Subject: [PATCH 12/54] deploy mapping --- src/encoded/commands/create_mapping_on_deploy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/encoded/commands/create_mapping_on_deploy.py b/src/encoded/commands/create_mapping_on_deploy.py index 4179adfad0..4a2f7f3e20 100644 --- a/src/encoded/commands/create_mapping_on_deploy.py +++ b/src/encoded/commands/create_mapping_on_deploy.py @@ -19,6 +19,8 @@ 'AccessKey', 'Cohort', 'Family', + 'FilterSet', + 'Nexus', 'User', 'Workflow', 'WorkflowMapping', @@ -36,7 +38,6 @@ 'Disorder', 'Individual', 'Case', - 'FilterSet', 'Report', 'Document', 'QualityMetricBamcheck', From 9a348418ea27615799901b9635d8edf533a29deb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Mon, 29 Jun 2020 21:49:50 -0400 Subject: [PATCH 13/54] add unique title to filter sets --- src/encoded/schemas/filter_set.json | 7 ++++++- src/encoded/tests/data/inserts/filter_set.json | 1 + src/encoded/types/filter_set.py | 3 ++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/encoded/schemas/filter_set.json b/src/encoded/schemas/filter_set.json index 8965ea62c1..5b1ea0d306 100644 --- a/src/encoded/schemas/filter_set.json +++ b/src/encoded/schemas/filter_set.json @@ -4,7 +4,7 @@ "id": "/profiles/filter_set.json", "$schema": "http://json-schema.org/draft-04/schema#", "type": "object", - "required": ["institution", "project"], + "required": ["institution", "project", "title"], "identifyingProperties": ["uuid", "aliases"], "additionalProperties": false, "mixinProperties": [ @@ -22,6 +22,11 @@ "schema_version": { "default": "1" }, + "title": { + "title": "Title", + "description": "Title for this filter set", + "type": "string" + }, "search_type": { "title": "Item Type", "description": "Item type that the filter set will work on.", diff --git a/src/encoded/tests/data/inserts/filter_set.json b/src/encoded/tests/data/inserts/filter_set.json index a0b95cc33a..54adb3df53 100644 --- a/src/encoded/tests/data/inserts/filter_set.json +++ b/src/encoded/tests/data/inserts/filter_set.json @@ -7,6 +7,7 @@ "status": "released", "uuid": "dcf15d5e-40aa-43bc-b81c-32c70c9afa99", "search_type": "VariantSample", + "title": "Test filter set", "filter_blocks": [ { "query": "&variant.CHROM=1&variant.genes.genes_most_severe_consequence.impact=MODERATE", diff --git a/src/encoded/types/filter_set.py b/src/encoded/types/filter_set.py index 490be91274..fc020d3af7 100644 --- a/src/encoded/types/filter_set.py +++ b/src/encoded/types/filter_set.py @@ -10,6 +10,7 @@ @collection( name='filter-sets', + unique_key='nexus:title', properties={ 'title': 'Filter Sets', 'description': 'Filter Set for combining multiple queries' @@ -19,5 +20,5 @@ class FilterSet(Item): """The class to store information about 4DN file formats""" item_type = 'filter_set' schema = load_schema('encoded:schemas/filter_set.json') - name_key = 'filter_set' + name_key = 'title' embedded_list = [] From 7d8e13f770d5c2bfa76728f87f66fffaeb0bb71d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Mon, 29 Jun 2020 21:58:48 -0400 Subject: [PATCH 14/54] account for schemas ending with s --- src/encoded/tests/test_schemas.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/encoded/tests/test_schemas.py b/src/encoded/tests/test_schemas.py index dbf7d33fb5..6b0448b5c0 100644 --- a/src/encoded/tests/test_schemas.py +++ b/src/encoded/tests/test_schemas.py @@ -123,6 +123,8 @@ def pluralize(name): # otherwise just add 's' if name.endswith('ly'): return name[:-1] + 'ies' + if name.endswith('s'): + return name + 'es' return name + 's' From 97fec7b0964106d7293f58df4f6c1865a3c273c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Mon, 29 Jun 2020 22:09:32 -0400 Subject: [PATCH 15/54] fix filter set tests --- src/encoded/tests/test_schemas.py | 2 +- src/encoded/tests/test_types_filter_set.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/encoded/tests/test_schemas.py b/src/encoded/tests/test_schemas.py index 6b0448b5c0..a9b76a8103 100644 --- a/src/encoded/tests/test_schemas.py +++ b/src/encoded/tests/test_schemas.py @@ -120,7 +120,7 @@ def pluralize(name): return name.replace(sp, sp + 's') elif name.startswith(sp) and re.search('setting', name): return name.replace(sp, sp + 's') - # otherwise just add 's' + # otherwise just add 's/es/ies' if name.endswith('ly'): return name[:-1] + 'ies' if name.endswith('s'): diff --git a/src/encoded/tests/test_types_filter_set.py b/src/encoded/tests/test_types_filter_set.py index c48293ed8a..e3e2831aad 100644 --- a/src/encoded/tests/test_types_filter_set.py +++ b/src/encoded/tests/test_types_filter_set.py @@ -14,6 +14,7 @@ def barebones_filter_set(): """ A filter set with only the flag that designates the type """ return { + 'title': 'Test filter set', 'search_type': 'Variant', 'flags': '?type=Variant', 'project': 'hms-dbmi', @@ -25,6 +26,7 @@ def barebones_filter_set(): def simple_filter_set(): """ A filter set with only the flag that designates the type """ return { + 'title': 'Test filter set', 'search_type': 'Cohort', 'filter_blocks': [ { @@ -42,6 +44,7 @@ def simple_filter_set(): def typical_filter_set(): """ A filter set with two filter blocks and a flag """ return { + 'title': 'Test filter set', 'search_type': 'Cohort', 'filter_blocks': [ { @@ -63,6 +66,7 @@ def typical_filter_set(): def complex_filter_set(): """ A filter set with 3 filter_blocks and a flag """ return { + 'title': 'Test filter set', 'search_type': 'Variant', 'filter_blocks': [ { @@ -183,6 +187,7 @@ def test_filter_set_complex(workbook, testapp, complex_filter_set): # toggle off all the blocks filter_set = { + 'title': 'Test filter set', 'search_type': t, 'filter_blocks': filter_blocks, 'flags': flags @@ -220,6 +225,7 @@ def test_filter_set_intersection(workbook, testapp, complex_filter_set): filter_blocks = complex_filter_set['filter_blocks'] flags = complex_filter_set['flags'] filter_set = { + 'title': 'Test filter set', 'search_type': t, 'filter_blocks': filter_blocks, 'flags': flags, @@ -264,6 +270,7 @@ def execute_and_verify_generator_search(testapp, filter_set, expected): @pytest.fixture def filter_set_with_only_flags(): return { + 'title': 'Test filter set', 'search_type': 'Variant', 'flags': 'CHROM=1' } @@ -296,6 +303,7 @@ def test_compound_search_only_flags(workbook, testapp, filter_set_with_only_flag @pytest.fixture def filter_set_with_single_filter_block(): return { + 'title': 'Test filter set', 'search_type': 'Variant', 'filter_blocks': [{ 'query': '?type=Variant&POS.from=0&POS.to=10000000', @@ -319,6 +327,7 @@ def test_compound_search_single_filter_block(workbook, testapp, filter_set_with_ @pytest.fixture def filter_set_with_single_filter_block_and_flags(): return { + 'title': 'Test filter set', 'search_type': 'Variant', 'filter_blocks': [{ 'query': '?type=Variant&POS.from=0&POS.to=10000000', @@ -350,6 +359,7 @@ def test_compound_search_filter_and_flags(workbook, testapp, filter_set_with_sin @pytest.fixture def filter_set_with_multiple_disabled_flags(): return { + 'title': 'Test filter set', 'search_type': 'Variant', 'filter_blocks': [{ 'query': '?type=Variant&POS.from=0&POS.to=10000000', From b5fbbe1aa621f02621db9034865cea90461c6284 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Mon, 29 Jun 2020 22:27:10 -0400 Subject: [PATCH 16/54] remove name key --- src/encoded/types/filter_set.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/encoded/types/filter_set.py b/src/encoded/types/filter_set.py index fc020d3af7..1a6c4e7bea 100644 --- a/src/encoded/types/filter_set.py +++ b/src/encoded/types/filter_set.py @@ -20,5 +20,4 @@ class FilterSet(Item): """The class to store information about 4DN file formats""" item_type = 'filter_set' schema = load_schema('encoded:schemas/filter_set.json') - name_key = 'title' embedded_list = [] From 455f1b4f77dadf8f93147143a6c49cefb522985e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Mon, 29 Jun 2020 22:53:19 -0400 Subject: [PATCH 17/54] use static methods per Will's recommendation It is hard to test for nested class functions, static functions are an easy way to deal with them --- src/encoded/schemas/nexus.json | 2 +- src/encoded/types/family.py | 549 +++++++++++++++++---------------- 2 files changed, 276 insertions(+), 275 deletions(-) diff --git a/src/encoded/schemas/nexus.json b/src/encoded/schemas/nexus.json index c10dc8d8ce..6c7d21ea31 100644 --- a/src/encoded/schemas/nexus.json +++ b/src/encoded/schemas/nexus.json @@ -21,7 +21,7 @@ "default": "1" }, "accession" : { - "accessionType" : "NE" + "accessionType" : "CO" }, "title": { "title": "Title", diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py index 1fd844341b..ec1b12dd71 100644 --- a/src/encoded/types/family.py +++ b/src/encoded/types/family.py @@ -121,6 +121,277 @@ def case(self, request): if rs: return rs + @staticmethod + def generate_ped(all_props, proband, family_id): + """Format family information into ped file + https://gatk.broadinstitute.org/hc/en-us/articles/360035531972 + This might be useful in the future for compatibility + Ped file columns are + *Family ID + *Individual ID + *Paternal ID + *Maternal ID + *Sex (1=male; 2=female; U=unknown) + *Phenotype (-9 missing 0 missing 1 unaffected 2 affected) + (at the moment only on proband has 2 on phenotype) + """ + ped_content = """""" + gender_map = {'M': '1', 'F': '2', 'U': '3'} + for props in all_props: + # all members have unknown phenotype by default + phenotype = '0' + member_id = props['accession'] + + def parent_id(properties, field): + 'extract parent accession from member info' + id = properties.get(field) + if id: + return id.split('/')[2] + else: + return '' + paternal_id = parent_id(props, 'father') + maternal_id = parent_id(props, 'mother') + sex = props.get('sex', 'U') + ped_sex = gender_map.get(sex, 'U') + # if member is proband, add phenotupe + if props['@id'] == proband: + phenotype = '2' + line_ele = [family_id, member_id, paternal_id, maternal_id, ped_sex, phenotype] + ped_content += '\t'.join(line_ele) + '\n' + return ped_content + + @staticmethod + def extract_vectors(ped_content): + """given a ped file content, extract all primary relationship pairs + keys are listed in primary_vectors""" + fathers = [] + mothers = [] + daughters = [] + sons = [] + children = [] # when the gender of the kid is not known + for a_line in ped_content.split('\n'): + if not a_line: + continue + fam, ind, father, mother, sex, ph = a_line.split('\t') + if father: + fathers.append([father, ind]) + if sex == '1': + sons.append([ind, father]) + elif sex == '2': + daughters.append([ind, father]) + else: + children.append([ind, father]) + if mother: + mothers.append([mother, ind]) + if sex == '1': + sons.append([ind, mother]) + elif sex == '2': + daughters.append([ind, mother]) + else: + children.append([ind, mother]) + primary_vectors = { + 'fathers': fathers, + 'mothers': mothers, + 'daughters': daughters, + 'sons': sons, + 'children': children # when the gender of the kid is not known + } + return primary_vectors + + @staticmethod + def construct_links(primary_vectors, seed): + """Given the primary vectors, constructs linkages for each individual + and filters for the shortest link + Use first letter of primary vector keys to construct these links + This linkages are calcualted from the seed, often starts with proband + seed should be accession""" + # starting pack + needs_analysis = [[seed, 'p'], ] + analyzed = [] + all_links = {seed: ['p', ]} + # loop overy every set of new collected individuals + while needs_analysis: + collect_connected = [] + for an_ind, starting_tag in needs_analysis: + if an_ind in analyzed: + continue + analyzed.append(an_ind) + if an_ind not in all_links: + print('should not happen') + for a_key in primary_vectors: + # extend the link list with this letter + extend_tag = a_key[0] + my_links = [i for i in primary_vectors[a_key] if i[1] == an_ind] + for a_link in my_links: + linked_ind = a_link[0] + new_tag = starting_tag + '-' + extend_tag + if linked_ind not in all_links: + all_links[linked_ind] = [new_tag, ] + else: + all_links[linked_ind].append(new_tag) + if linked_ind not in analyzed: + collect_connected.append([linked_ind, new_tag]) + needs_analysis = collect_connected + filtered_links = {} + for individual in all_links: + # Return shorts links + a_list = list(set(all_links[individual])) + minimum = min(map(len, a_list)) + a_list = [i for i in a_list if len(i) == minimum] + filtered_links[individual] = a_list + return filtered_links + + @staticmethod + def relationships_vocabulary(links): + """Convert links to relationships. + Nomenclature guided by + https://www.devonfhs.org.uk/pdfs/tools/eichhorn-rlationship-chart.pdf""" + # return a nested list of [acc, calculated_relation, association] + Converter = { + "p": "proband", + "p-f": "father", "p-m": "mother", "p-d": "daughter", "p-s": "son", "p-c": "child", + "p-f-s": "brother", "p-m-s": "brother", + "p-f-d": "sister", "p-m-d": "sister", + "p-f-c": "sibling", "p-m-c": "sibling", + "p-d-m": "wife", "p-s-m": "wife", "p-c-m": "wife", + "p-d-f": "husband", "p-s-f": "husband", "p-c-f": "husband", + } + # add grandchildren + all_children = [i for i in Converter if Converter[i] in ['daughter', 'son', 'child']] + for child in all_children: + Converter[child + '-s'] = 'grandson' + Converter[child + '-d'] = 'granddaughter' + Converter[child + '-c'] = 'grandchild' + # add niece nephew nibling (we can also add sister brother in law here but will skip non blood relatives) + all_siblings = [i for i in Converter if Converter[i] in ['brother', 'sister', 'sibling']] + for sib in all_siblings: + Converter[sib + '-s'] = 'nephew' + Converter[sib + '-d'] = 'niece' + Converter[sib + '-c'] = 'nibling' + # add grand niece nephew nibling + all_niblings = [i for i in Converter if Converter[i] in ['nephew', 'niece', 'nibling']] + for nib in all_niblings: + Converter[nib + '-s'] = 'grandnephew' + Converter[nib + '-d'] = 'grandniece' + Converter[nib + '-c'] = 'grandnibling' + # add Grandparents + all_parents = [i for i in Converter if Converter[i] in ['mother', 'father']] + for parent in all_parents: + Converter[parent + '-m'] = 'grandmother' + Converter[parent + '-f'] = 'grandfather' + # add Great-grandparents Uncle Aunt Auncle + all_g_parents = [i for i in Converter if Converter[i] in ['grandmother', 'grandfather']] + for g_parent in all_g_parents: + Converter[g_parent + '-m'] = 'great-grandmother' + Converter[g_parent + '-f'] = 'great-grandfather' + Converter[g_parent + '-s'] = 'uncle' + Converter[g_parent + '-d'] = 'aunt' + Converter[g_parent + '-c'] = 'auncle' + # add Great-great-grandparents granduncle grandaunt grandauncle + all_gg_parents = [i for i in Converter if Converter[i] in ['great-grandmother', 'great-grandfather']] + for gg_parent in all_gg_parents: + Converter[gg_parent + '-m'] = 'great-great-grandmother' + Converter[gg_parent + '-f'] = 'great-great-grandfather' + Converter[gg_parent + '-s'] = 'granduncle' + Converter[gg_parent + '-d'] = 'grandaunt' + Converter[gg_parent + '-c'] = 'grandauncle' + # add Cousin + all_auncle = [i for i in Converter if Converter[i] in ['uncle', 'aunt', 'auncle']] + for auncle in all_auncle: + Converter[auncle + '-s'] = 'cousin' + Converter[auncle + '-d'] = 'cousin' + Converter[auncle + '-c'] = 'cousin' + # add Cousin once removed (descendant) + all_cousins = [i for i in Converter if Converter[i] in ['cousin']] + for cousin in all_cousins: + Converter[cousin + '-s'] = 'cousin once removed (descendant)' + Converter[cousin + '-d'] = 'cousin once removed (descendant)' + Converter[cousin + '-c'] = 'cousin once removed (descendant)' + # add Cousin twice removed (descendant) + all_cousins_o_r = [i for i in Converter if Converter[i] in ['cousin once removed (descendant)']] + for cousin in all_cousins_o_r: + Converter[cousin + '-s'] = 'cousin twice removed (descendant)' + Converter[cousin + '-d'] = 'cousin twice removed (descendant)' + Converter[cousin + '-c'] = 'cousin twice removed (descendant)' + # add First cousin once removed (ascendant) + all_g_auncle = [i for i in Converter if Converter[i] in ['granduncle', 'grandaunt', 'grandauncle']] + for g_auncle in all_g_auncle: + Converter[g_auncle + '-s'] = 'cousin once removed (ascendant)' + Converter[g_auncle + '-d'] = 'cousin once removed (ascendant)' + Converter[g_auncle + '-c'] = 'cousin once removed (ascendant)' + # add Second Cousin + all_cora = [i for i in Converter if Converter[i] in ['cousin once removed (ascendant)']] + for cora in all_cora: + Converter[cora + '-s'] = 'second cousin' + Converter[cora + '-d'] = 'second cousin' + Converter[cora + '-c'] = 'second cousin' + # add Second Cousin once removed + all_s_cousins = [i for i in Converter if Converter[i] in ['second cousin']] + for s_cousin in all_s_cousins: + Converter[s_cousin + '-s'] = 'second cousin once removed (descendant)' + Converter[s_cousin + '-d'] = 'second cousin once removed (descendant)' + Converter[s_cousin + '-c'] = 'second cousin once removed (descendant)' + # add Second Cousin twice removed + all_s_cousins_o_r = [i for i in Converter if Converter[i] in ['second cousin once removed (descendant)']] + for s_cousin_o_r in all_s_cousins_o_r: + Converter[s_cousin_o_r + '-s'] = 'second cousin twice removed (descendant)' + Converter[s_cousin_o_r + '-d'] = 'second cousin twice removed (descendant)' + Converter[s_cousin_o_r + '-c'] = 'second cousin twice removed (descendant)' + + # calculate direction change (if more then 2, not blood relative) + def count_direction_change(relation_tag): + """If you are going down from proband, you need to keep going down + If you are going up from proband, you can change direction once + If you are out of these cases, you are not blood relative + We make an exception for the Husband and Wife""" + up = ['f', 'm'] + down = ['d', 's', 'c'] + state = 1 + changes = 0 + for a_letter in relation_tag.split('-'): + if a_letter in up: + new_state = 1 + elif a_letter in down: + new_state = -1 + else: # p + continue + if state == new_state: + continue + else: + state = new_state + changes += 1 + return changes + + relations = [] + for i in links: + association = '' + val = links[i][0] + if val in Converter: + relation = Converter[val] + # calculate half relation for siblings + if relation in ['sister', 'brother', 'sibling']: + # if they are full siblings, they should carry two link of same size + # if not, they are half + if len(links[i]) == 1: + relation = 'half-' + relation + # for extended family calculate paternal/maternal + if len(val) > 4: + # calculate for family that starts by going above 2 levels + if val[2:5] in ['m-m', 'm-f', 'f-f', 'f-m']: + association_pointer = val[2] + if association_pointer == 'f': + association = 'paternal' + elif association_pointer == 'm': + association = 'maternal' + else: + dir_change = count_direction_change(val) + if dir_change > 1: + relation = 'family-in-law' + else: + relation = 'extended-family' + relations.append([i, relation, association]) + return relations + @calculated_property(schema={ "title": "Relationships", "description": "Relationships to proband.", @@ -201,276 +472,6 @@ def case(self, request): }) def relationships(self, request, proband=None, members=None): """Calculate relationships""" - def generate_ped(all_props, proband, family_id): - """Format family information into ped file - https://gatk.broadinstitute.org/hc/en-us/articles/360035531972 - This might be useful in the future for compatibility - Ped file columns are - *Family ID - *Individual ID - *Paternal ID - *Maternal ID - *Sex (1=male; 2=female; U=unknown) - *Phenotype (-9 missing 0 missing 1 unaffected 2 affected) - (at the moment only on proband has 2 on phenotype) - """ - ped_content = """""" - gender_map = {'M': '1', 'F': '2', 'U': '3'} - for props in all_props: - # all members have unknown phenotype by default - phenotype = '0' - member_id = props['accession'] - - def parent_id(properties, field): - 'extract parent accession from member info' - id = properties.get(field) - if id: - return id.split('/')[2] - else: - return '' - paternal_id = parent_id(props, 'father') - maternal_id = parent_id(props, 'mother') - sex = props.get('sex', 'U') - ped_sex = gender_map.get(sex, 'U') - # if member is proband, add phenotupe - if props['@id'] == proband: - phenotype = '2' - line_ele = [family_id, member_id, paternal_id, maternal_id, ped_sex, phenotype] - ped_content += '\t'.join(line_ele) + '\n' - return ped_content - - def extract_vectors(ped_content): - """given a ped file content, extract all primary relationship pairs - keys are listed in primary_vectors""" - fathers = [] - mothers = [] - daughters = [] - sons = [] - children = [] # when the gender of the kid is not known - for a_line in ped_content.split('\n'): - if not a_line: - continue - fam, ind, father, mother, sex, ph = a_line.split('\t') - if father: - fathers.append([father, ind]) - if sex == '1': - sons.append([ind, father]) - elif sex == '2': - daughters.append([ind, father]) - else: - children.append([ind, father]) - if mother: - mothers.append([mother, ind]) - if sex == '1': - sons.append([ind, mother]) - elif sex == '2': - daughters.append([ind, mother]) - else: - children.append([ind, mother]) - primary_vectors = { - 'fathers': fathers, - 'mothers': mothers, - 'daughters': daughters, - 'sons': sons, - 'children': children # when the gender of the kid is not known - } - return primary_vectors - - def construct_links(primary_vectors, seed): - """Given the primary vectors, constructs linkages for each individual - and filters for the shortest link - Use first letter of primary vector keys to construct these links - This linkages are calcualted from the seed, often starts with proband - seed should be accession""" - # starting pack - needs_analysis = [[seed, 'p'], ] - analyzed = [] - all_links = {seed: ['p', ]} - # loop overy every set of new collected individuals - while needs_analysis: - collect_connected = [] - for an_ind, starting_tag in needs_analysis: - if an_ind in analyzed: - continue - analyzed.append(an_ind) - if an_ind not in all_links: - print('should not happen') - for a_key in primary_vectors: - # extend the link list with this letter - extend_tag = a_key[0] - my_links = [i for i in primary_vectors[a_key] if i[1] == an_ind] - for a_link in my_links: - linked_ind = a_link[0] - new_tag = starting_tag + '-' + extend_tag - if linked_ind not in all_links: - all_links[linked_ind] = [new_tag, ] - else: - all_links[linked_ind].append(new_tag) - if linked_ind not in analyzed: - collect_connected.append([linked_ind, new_tag]) - needs_analysis = collect_connected - - # return the shortest links - def return_shortest(a_list): - """filter list for shortest items.""" - a_list = list(set(a_list)) - minimum = min(map(len, a_list)) - return [i for i in a_list if len(i) == minimum] - filtered_links = {} - for individual in all_links: - filtered_links[individual] = return_shortest(all_links[individual]) - return filtered_links - - def relationships_vocabulary(links): - """Convert links to relationships. - Nomenclature guided by - https://www.devonfhs.org.uk/pdfs/tools/eichhorn-rlationship-chart.pdf""" - # return a nested list of [acc, calculated_relation, association] - Converter = { - "p": "proband", - "p-f": "father", "p-m": "mother", "p-d": "daughter", "p-s": "son", "p-c": "child", - "p-f-s": "brother", "p-m-s": "brother", - "p-f-d": "sister", "p-m-d": "sister", - "p-f-c": "sibling", "p-m-c": "sibling", - "p-d-m": "wife", "p-s-m": "wife", "p-c-m": "wife", - "p-d-f": "husband", "p-s-f": "husband", "p-c-f": "husband", - } - # add grandchildren - all_children = [i for i in Converter if Converter[i] in ['daughter', 'son', 'child']] - for child in all_children: - Converter[child + '-s'] = 'grandson' - Converter[child + '-d'] = 'granddaughter' - Converter[child + '-c'] = 'grandchild' - # add niece nephew nibling (we can also add sister brother in law here but will skip non blood relatives) - all_siblings = [i for i in Converter if Converter[i] in ['brother', 'sister', 'sibling']] - for sib in all_siblings: - Converter[sib + '-s'] = 'nephew' - Converter[sib + '-d'] = 'niece' - Converter[sib + '-c'] = 'nibling' - # add grand niece nephew nibling - all_niblings = [i for i in Converter if Converter[i] in ['nephew', 'niece', 'nibling']] - for nib in all_niblings: - Converter[nib + '-s'] = 'grandnephew' - Converter[nib + '-d'] = 'grandniece' - Converter[nib + '-c'] = 'grandnibling' - # add Grandparents - all_parents = [i for i in Converter if Converter[i] in ['mother', 'father']] - for parent in all_parents: - Converter[parent + '-m'] = 'grandmother' - Converter[parent + '-f'] = 'grandfather' - # add Great-grandparents Uncle Aunt Auncle - all_g_parents = [i for i in Converter if Converter[i] in ['grandmother', 'grandfather']] - for g_parent in all_g_parents: - Converter[g_parent + '-m'] = 'great-grandmother' - Converter[g_parent + '-f'] = 'great-grandfather' - Converter[g_parent + '-s'] = 'uncle' - Converter[g_parent + '-d'] = 'aunt' - Converter[g_parent + '-c'] = 'auncle' - # add Great-great-grandparents granduncle grandaunt grandauncle - all_gg_parents = [i for i in Converter if Converter[i] in ['great-grandmother', 'great-grandfather']] - for gg_parent in all_gg_parents: - Converter[gg_parent + '-m'] = 'great-great-grandmother' - Converter[gg_parent + '-f'] = 'great-great-grandfather' - Converter[gg_parent + '-s'] = 'granduncle' - Converter[gg_parent + '-d'] = 'grandaunt' - Converter[gg_parent + '-c'] = 'grandauncle' - # add Cousin - all_auncle = [i for i in Converter if Converter[i] in ['uncle', 'aunt', 'auncle']] - for auncle in all_auncle: - Converter[auncle + '-s'] = 'cousin' - Converter[auncle + '-d'] = 'cousin' - Converter[auncle + '-c'] = 'cousin' - # add Cousin once removed (descendant) - all_cousins = [i for i in Converter if Converter[i] in ['cousin']] - for cousin in all_cousins: - Converter[cousin + '-s'] = 'cousin once removed (descendant)' - Converter[cousin + '-d'] = 'cousin once removed (descendant)' - Converter[cousin + '-c'] = 'cousin once removed (descendant)' - # add Cousin twice removed (descendant) - all_cousins_o_r = [i for i in Converter if Converter[i] in ['cousin once removed (descendant)']] - for cousin in all_cousins_o_r: - Converter[cousin + '-s'] = 'cousin twice removed (descendant)' - Converter[cousin + '-d'] = 'cousin twice removed (descendant)' - Converter[cousin + '-c'] = 'cousin twice removed (descendant)' - # add First cousin once removed (ascendant) - all_g_auncle = [i for i in Converter if Converter[i] in ['granduncle', 'grandaunt', 'grandauncle']] - for g_auncle in all_g_auncle: - Converter[g_auncle + '-s'] = 'cousin once removed (ascendant)' - Converter[g_auncle + '-d'] = 'cousin once removed (ascendant)' - Converter[g_auncle + '-c'] = 'cousin once removed (ascendant)' - # add Second Cousin - all_cora = [i for i in Converter if Converter[i] in ['cousin once removed (ascendant)']] - for cora in all_cora: - Converter[cora + '-s'] = 'second cousin' - Converter[cora + '-d'] = 'second cousin' - Converter[cora + '-c'] = 'second cousin' - # add Second Cousin once removed - all_s_cousins = [i for i in Converter if Converter[i] in ['second cousin']] - for s_cousin in all_s_cousins: - Converter[s_cousin + '-s'] = 'second cousin once removed (descendant)' - Converter[s_cousin + '-d'] = 'second cousin once removed (descendant)' - Converter[s_cousin + '-c'] = 'second cousin once removed (descendant)' - # add Second Cousin twice removed - all_s_cousins_o_r = [i for i in Converter if Converter[i] in ['second cousin once removed (descendant)']] - for s_cousin_o_r in all_s_cousins_o_r: - Converter[s_cousin_o_r + '-s'] = 'second cousin twice removed (descendant)' - Converter[s_cousin_o_r + '-d'] = 'second cousin twice removed (descendant)' - Converter[s_cousin_o_r + '-c'] = 'second cousin twice removed (descendant)' - - # calculate direction change (if more then 2, not blood relative) - def count_direction_change(relation_tag): - """If you are going down from proband, you need to keep going down - If you are going up from proband, you can change direction once - If you are out of these cases, you are not blood relative - We make an exception for the Husband and Wife""" - up = ['f', 'm'] - down = ['d', 's', 'c'] - state = 1 - changes = 0 - for a_letter in relation_tag.split('-'): - if a_letter in up: - new_state = 1 - elif a_letter in down: - new_state = -1 - else: # p - continue - if state == new_state: - continue - else: - state = new_state - changes += 1 - return changes - - relations = [] - for i in links: - association = '' - val = links[i][0] - if val in Converter: - relation = Converter[val] - # calculate half relation for siblings - if relation in ['sister', 'brother', 'sibling']: - # if they are full siblings, they should carry two link of same size - # if not, they are half - if len(links[i]) == 1: - relation = 'half-' + relation - # for extended family calculate paternal/maternal - if len(val) > 4: - # calculate for family that starts by going above 2 levels - if val[2:5] in ['m-m', 'm-f', 'f-f', 'f-m']: - association_pointer = val[2] - if association_pointer == 'f': - association = 'paternal' - elif association_pointer == 'm': - association = 'maternal' - else: - dir_change = count_direction_change(val) - if dir_change > 1: - relation = 'family-in-law' - else: - relation = 'extended-family' - relations.append([i, relation, association]) - return relations - # Start of the function # empty list to accumulate results relations = [] @@ -485,11 +486,11 @@ def count_direction_change(relation_tag): props = get_item_or_none(request, a_member, 'individuals') all_props.append(props) # convert to ped_file format - ped_text = generate_ped(all_props, proband, family_id) - primary_vectors = extract_vectors(ped_text) + ped_text = self.generate_ped(all_props, proband, family_id) + primary_vectors = self.extract_vectors(ped_text) proband_acc = proband.split('/')[2] - links = construct_links(primary_vectors, proband_acc) - relations = relationships_vocabulary(links) + links = self.construct_links(primary_vectors, proband_acc) + relations = self.relationships_vocabulary(links) results = [] for rel in relations: temp = {"individual": '', From e4a8fde7cab4f3f3fe4d9643b1ab7d6680a0d77d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Mon, 29 Jun 2020 22:59:34 -0400 Subject: [PATCH 18/54] remove redundant calc props --- src/encoded/tests/test_types_family.py | 31 +---- src/encoded/types/family.py | 170 +------------------------ 2 files changed, 2 insertions(+), 199 deletions(-) diff --git a/src/encoded/tests/test_types_family.py b/src/encoded/tests/test_types_family.py index 1ebce8c8b7..313790d809 100644 --- a/src/encoded/tests/test_types_family.py +++ b/src/encoded/tests/test_types_family.py @@ -26,7 +26,7 @@ def family_empty(testapp, project, institution): def test_family_analysis_groups(testapp, fam, sample_proc): - assert fam.get('analysis_groups') == None + assert fam.get('analysis_groups') is None family = testapp.get(fam['@id']).json assert [ag['@id'] for ag in family.get('analysis_groups', [])] == [sample_proc['@id']] @@ -39,35 +39,6 @@ def test_family_father(testapp, fam, father): assert fam.get('father') == father['@id'] -def test_family_siblings(testapp, fam, sister, father): - assert fam.get('half_siblings') == [sister['@id']] - res = testapp.patch_json(sister['@id'], {'father': father['@id']}, status=200) - result = testapp.get(fam['@id']).json - assert not result.get('half_siblings') - assert result.get('siblings') == [sister['@id']] - - -def test_family_grandparents(testapp, fam, grandpa, female_individual): - assert len(fam.get('grandparents')) == 2 - assert grandpa['@id'] in fam.get('grandparents') - assert female_individual['@id'] in fam.get('grandparents') - - -def test_family_aunts_and_uncles(testapp, fam, uncle): - assert fam.get('aunts_and_uncles') == [uncle['@id']] - - -def test_family_cousins(testapp, fam, cousin): - assert fam.get('cousins') == [cousin['@id']] - - -def test_family_children(testapp, fam, mother, child, sister): - res = testapp.patch_json(fam['@id'], {'proband': mother['@id']}, status=200).json['@graph'][0] - assert len(res.get('children')) == 2 - assert child['@id'] in res.get('children') - assert sister['@id'] in res.get('children') - - ########################## # PROCESS PEDIGREE TESTS # ########################## diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py index ec1b12dd71..fd9084b9a8 100644 --- a/src/encoded/types/family.py +++ b/src/encoded/types/family.py @@ -203,7 +203,7 @@ def construct_links(primary_vectors, seed): """Given the primary vectors, constructs linkages for each individual and filters for the shortest link Use first letter of primary vector keys to construct these links - This linkages are calcualted from the seed, often starts with proband + This linkages are calcualted from the seed, often starts with proband, seed should be accession""" # starting pack needs_analysis = [[seed, 'p'], ] @@ -505,27 +505,6 @@ def relationships(self, request, proband=None, members=None): results.append(temp) return results - def get_parents(self, request, proband=None, members=None): - parents = [] - if proband and members: - props = get_item_or_none(request, proband, 'individuals') - if props: - for p in ['mother', 'father']: - if props.get(p): - parents.append(props[p]) - return parents - - def get_grandparents(self, request, proband=None, members=None, parents=[]): - gp = [] - if proband and members and parents: - for item in parents: - p_props = get_item_or_none(request, item, 'individuals') - if p_props: - for p in ['mother', 'father']: - if p_props.get(p) and p_props[p] in members: - gp.append(p_props[p]) - return gp - @calculated_property(schema={ "title": "Display Title", "description": "A calculated title for every object in 4DN", @@ -578,153 +557,6 @@ def father(self, request, proband=None, members=[]): if props and props.get('father') and props['father'] in members: return props['father'] - @calculated_property(schema={ - "title": "Siblings", - "description": "Full siblings of proband", - "type": "string", - "items": { - "title": "Sibling", - "description": "Full sibling of proband", - "type": "string", - "linkTo": "Individual" - } - }) - def siblings(self, request, proband=None, members=None): - if proband and members: - sibs = [] - parents = self.get_parents(request, proband, members) - for member in members: - member_props = get_item_or_none(request, member, 'individuals') - if member_props and member != proband: - if member_props.get('mother') in parents and member_props.get('father') in parents: - sibs.append(member) - if sibs: - return sibs - - @calculated_property(schema={ - "title": "Half-siblings", - "description": "Half-siblings of proband", - "type": "string", - "items": { - "title": "Sibling", - "description": "Half sibling of proband", - "type": "string", - "linkTo": "Individual" - } - }) - def half_siblings(self, request, proband=None, members=None): - if proband and members: - sibs = [] - parents = self.get_parents(request, proband, members) - for member in members: - member_props = get_item_or_none(request, member, 'individuals') - if member_props and member != proband: - if member_props.get('mother') in parents and member_props.get('father') not in parents: - sibs.append(member) - elif member_props.get('mother') not in parents and member_props.get('father') in parents: - sibs.append(member) - if sibs: - return sibs - - @calculated_property(schema={ - "title": "Children", - "description": "Children of proband", - "type": "array", - "items": { - "title": "Child", - "description": "Child of proband", - "type": "string", - "linkTo": "Individual" - } - }) - def children(self, request, proband=None, members=[]): - if proband and members: - ch = [] - for member in members: - props = get_item_or_none(request, member, 'individuals') - if props and any(props.get(p) == proband for p in ['mother', 'father']): - ch.append(member) - if ch: - return ch - - @calculated_property(schema={ - "title": "Grandparents", - "description": "Grandparents of proband", - "type": "array", - "items": { - "title": "Grandparent", - "description": "Grandparent of proband", - "type": "string", - "linkTo": "Individual" - } - }) - def grandparents(self, request, proband=None, members=None): - if proband and members: - parents = self.get_parents(request, proband, members) - gp = self.get_grandparents(request, proband, members, parents=parents) - if gp: - return gp - - @calculated_property(schema={ - "title": "Aunts and Uncles", - "description": "Aunts and Uncles of proband", - "type": "array", - "items": { - "title": "Aunt or Uncle", - "description": "Aunt or Uncle of Proband", - "type": "string", - "linkTo": "Individual" - } - }) - def aunts_and_uncles(self, request, proband=None, members=None): - if proband and members: - parents = self.get_parents(request, proband, members) - gp = self.get_grandparents(request, proband, members, parents) - aunts_uncles = [] - for member in members: - member_props = get_item_or_none(request, member, 'individuals') - if member_props and member not in parents: - for p in ['mother', 'father']: - if member_props.get(p) and member_props[p] in gp: - aunts_uncles.append(member) - break - if aunts_uncles: - return aunts_uncles - - @calculated_property(schema={ - "title": "Cousins", - "description": "Cousins of proband", - "type": "string", - "items": { - "title": "Cousin", - "description": "Cousin of proband", - "type": "string", - "linkTo": "Individual" - } - }) - def cousins(self, request, proband=None, members=None): - if proband and members: - csns = [] - member_props = {} - parents = self.get_parents(request, proband, members) - gp = self.get_grandparents(request, proband, members, parents) - aunts_uncles = [] - for member in members: - member_props[member] = get_item_or_none(request, member, 'individuals') - if member_props[member] and member not in parents: - for p in ['mother', 'father']: - if member_props[member].get(p) and member_props[member][p] in gp: - aunts_uncles.append(member) - break - for member in members: - if member_props[member]: - for p in ['mother', 'father']: - if member_props[member].get(p) and member_props[member][p] in aunts_uncles: - csns.append(member) - break - if csns: - return csns - @view_config(name='process-pedigree', context=Family, request_method='PATCH', permission='edit') From 6608283f66a5ca70cb4a55bbd2bf486be906b263 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Mon, 29 Jun 2020 23:24:32 -0400 Subject: [PATCH 19/54] add end to end tests for relationship calculations --- src/encoded/tests/datafixtures.py | 29 ++++++++++++++++++-- src/encoded/tests/test_types_family.py | 38 ++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/src/encoded/tests/datafixtures.py b/src/encoded/tests/datafixtures.py index 8643ed7be6..4bcb555fbc 100644 --- a/src/encoded/tests/datafixtures.py +++ b/src/encoded/tests/datafixtures.py @@ -145,7 +145,7 @@ def access_key(testapp, submitter): @pytest.fixture def female_individual(testapp, project, institution): item = { - "accession": "GAPINOOOAAQ1", + "accession": "GAPINGRANDMA", "age": 53, "age_units": "year", 'project': project['@id'], @@ -160,6 +160,7 @@ def female_individual(testapp, project, institution): @pytest.fixture def grandpa(testapp, project, institution): item = { + "accession": "GAPIDGRANDPA", "age": 53, "age_units": "year", 'project': project['@id'], @@ -173,6 +174,7 @@ def grandpa(testapp, project, institution): @pytest.fixture def mother(testapp, project, institution, grandpa, female_individual): item = { + "accession": "GAPIDMOTHER1", "age": 33, "age_units": "year", 'project': project['@id'], @@ -187,6 +189,7 @@ def mother(testapp, project, institution, grandpa, female_individual): @pytest.fixture def father(testapp, project, institution): item = { + "accession": "GAPIDFATHER1", "age": 33, "age_units": "year", 'project': project['@id'], @@ -199,6 +202,7 @@ def father(testapp, project, institution): @pytest.fixture def uncle(testapp, project, institution, grandpa): item = { + "accession": "GAPIDUNCLE01", "age": 35, "age_units": "year", 'project': project['@id'], @@ -212,6 +216,7 @@ def uncle(testapp, project, institution, grandpa): @pytest.fixture def child(testapp, project, institution, mother, father): item = { + "accession": "GAPIDPROBAND", "age": 7, "age_units": "year", 'project': project['@id'], @@ -226,6 +231,7 @@ def child(testapp, project, institution, mother, father): @pytest.fixture def cousin(testapp, project, institution, uncle): item = { + "accession": "GAPIDCOUSIN1", "age": 11, "age_units": "year", 'project': project['@id'], @@ -239,6 +245,7 @@ def cousin(testapp, project, institution, uncle): @pytest.fixture def sister(testapp, project, institution, mother): item = { + "accession": "GAPIDHALFSIS", "age": 11, "age_units": "year", 'project': project['@id'], @@ -250,7 +257,23 @@ def sister(testapp, project, institution, mother): @pytest.fixture -def fam(testapp, project, female_individual, institution, grandpa, mother, father, uncle, child, cousin, sister): +def brother(testapp, project, institution, mother, father): + item = { + "accession": "GAPIDBROTHER", + "age": 13, + "age_units": "year", + 'project': project['@id'], + 'institution': institution['@id'], + "sex": "M", + "mother": mother['@id'], + "father": father['@id'] + } + return testapp.post_json('/individual', item).json['@graph'][0] + + +@pytest.fixture +def fam(testapp, project, female_individual, institution, grandpa, mother, father, uncle, + child, cousin, sister, brother): item = { "project": project['@id'], "institution": institution['@id'], @@ -259,6 +282,7 @@ def fam(testapp, project, female_individual, institution, grandpa, mother, fathe "members": [ child['@id'], sister['@id'], + brother['@id'], mother['@id'], father['@id'], uncle['@id'], @@ -280,6 +304,7 @@ def sample_f(testapp, project, institution, female_individual): } return testapp.post_json('/sample', data).json['@graph'][0] + @pytest.fixture def sample_proc(testapp, project, institution, sample_f, fam): data = { diff --git a/src/encoded/tests/test_types_family.py b/src/encoded/tests/test_types_family.py index 313790d809..5ce2af6b53 100644 --- a/src/encoded/tests/test_types_family.py +++ b/src/encoded/tests/test_types_family.py @@ -39,6 +39,43 @@ def test_family_father(testapp, fam, father): assert fam.get('father') == father['@id'] +def test_relationships_roles(testapp, fam): + """This is an end to end test for calculating relationships + Test for roles""" + calculated_relations = fam.get('relationships', []) + expected_values = { + "GAPIDPROBAND": "proband", + "GAPIDFATHER1": "father", + "GAPIDMOTHER1": "mother", + "GAPIDBROTHER": "brother", + "GAPIDGRANDPA": "grandfather", + "GAPINGRANDMA": "grandmother", + "GAPIDHALFSIS": "half-sister", + "GAPIDUNCLE01": "uncle", + "GAPIDCOUSIN1": "cousin" + } + for a_relation in calculated_relations: + assert a_relation['relationship'] == expected_values[a_relation['individual']] + + +def test_relationships_assosiation(testapp, fam): + """This is an end to end test for calculating relationships + Test for paternal maternal associations""" + calculated_relations = fam.get('relationships', []) + expected_values = { + "GAPIDPROBAND": "", + "GAPIDFATHER1": "", + "GAPIDMOTHER1": "", + "GAPIDBROTHER": "", + "GAPIDGRANDPA": "maternal", + "GAPINGRANDMA": "maternal", + "GAPIDHALFSIS": "", + "GAPIDUNCLE01": "maternal", + "GAPIDCOUSIN1": "maternal" + } + for a_relation in calculated_relations: + assert a_relation.get('association', "") == expected_values[a_relation['individual']] + ########################## # PROCESS PEDIGREE TESTS # ########################## @@ -147,6 +184,7 @@ def test_affected_xml_to_phenotypic_features_nonaffected(testapp, family_empty, assert not data.get('clinic_notes') assert not data.get('phenotypic_features') + def test_cause_of_death_xml_to_phenotype_term_not_found(testapp, family_empty, pedigree_ref_data, death_info): data = {} cause_of_death_xml_to_phenotype(testapp, death_info, {}, data, family_empty['@id'], {}) From 06bd3e48b2a969b180816fed093b6ea234c35570 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Tue, 30 Jun 2020 01:55:17 -0400 Subject: [PATCH 20/54] add secondary families and sample calc prop on case --- src/encoded/tests/conftest.py | 2 +- src/encoded/tests/datafixtures.py | 19 +++++----- src/encoded/tests/test_types_family.py | 2 +- src/encoded/types/case.py | 48 +++++++++++++++++++++++++- src/encoded/types/family.py | 29 +++++++++++----- 5 files changed, 79 insertions(+), 21 deletions(-) diff --git a/src/encoded/tests/conftest.py b/src/encoded/tests/conftest.py index 5510542633..08573b6f34 100644 --- a/src/encoded/tests/conftest.py +++ b/src/encoded/tests/conftest.py @@ -48,7 +48,7 @@ def es_app_settings(wsgi_server_host_port, elasticsearch_server, postgresql_serv settings['collection_datastore'] = 'elasticsearch' settings['item_datastore'] = 'elasticsearch' settings['indexer'] = True - settings['indexer.namespace'] = os.environ.get('TRAVIS_JOB_ID', '') # set namespace for tests + settings['indexer.namespace'] = os.environ.get('TRAVIS_JOB_ID', '') # set namespace for tests # use aws auth to access elasticsearch if aws_auth: diff --git a/src/encoded/tests/datafixtures.py b/src/encoded/tests/datafixtures.py index 4bcb555fbc..fc6d6b3fea 100644 --- a/src/encoded/tests/datafixtures.py +++ b/src/encoded/tests/datafixtures.py @@ -1,19 +1,18 @@ import pytest -import copy ORDER = [ - 'user', 'project', 'institution', 'filter_set', 'nexus', 'file_format', - 'variant_consequence', 'phenotype', - 'cohort', 'family', 'individual', 'sample', 'workflow', 'access_key', 'disorder', - 'document', 'file_fastq', 'file_processed', 'file_reference', 'gene', 'sample_processing', + 'user', 'project', 'institution', 'filter_set', 'nexus', + 'file_format', 'variant_consequence', 'phenotype', + 'cohort', 'family', 'individual', 'sample', 'workflow', + 'access_key', 'disorder', 'document', 'file_fastq', + 'file_processed', 'file_reference', 'gene', 'sample_processing', 'case', 'report', 'page', 'quality_metric_fastqc', 'evidence_dis_pheno', 'quality_metric_bamcheck', 'quality_metric_qclist', 'quality_metric_wgs_bamqc', - 'quality_metric_cmphet', - 'quality_metric_vcfcheck', 'quality_metric_workflowrun', 'software', 'static_section', - 'tracking_item', 'workflow_mapping', 'workflow_run_awsem', - 'workflow_run', 'annotation_field', 'variant_sample', 'variant', 'gene_annotation_field', - 'gene', + 'quality_metric_cmphet', 'quality_metric_vcfcheck', 'quality_metric_workflowrun', + 'software', 'static_section', 'tracking_item', 'workflow_mapping', + 'workflow_run_awsem', 'workflow_run', 'annotation_field', 'variant_sample', + 'variant', 'gene_annotation_field', 'gene', ] diff --git a/src/encoded/tests/test_types_family.py b/src/encoded/tests/test_types_family.py index 5ce2af6b53..78d368a207 100644 --- a/src/encoded/tests/test_types_family.py +++ b/src/encoded/tests/test_types_family.py @@ -76,10 +76,10 @@ def test_relationships_assosiation(testapp, fam): for a_relation in calculated_relations: assert a_relation.get('association', "") == expected_values[a_relation['individual']] + ########################## # PROCESS PEDIGREE TESTS # ########################## - @pytest.fixture def pedigree_ref_data(testapp): return { diff --git a/src/encoded/types/case.py b/src/encoded/types/case.py index a1b92e61e5..092b346faa 100644 --- a/src/encoded/types/case.py +++ b/src/encoded/types/case.py @@ -4,7 +4,10 @@ load_schema, display_title_schema ) -from .base import Item +from .base import ( + Item, + get_item_or_none +) @collection( @@ -122,3 +125,46 @@ def display_title(self, accession, title=None): return title + ' ({})'.format(accession) else: return accession + + @calculated_property(schema={ + "title": "Sample", + "description": "Primary sample used for this case", + "type": "string", + "linkTo": 'Sample' + }) + def sample(self, request, individual=None, sample_processing=None): + if not individual or not sample_processing: + return {} + ind_data = get_item_or_none(request, individual, 'individuals') + sp_data = get_item_or_none(request, sample_processing, 'sample-processings') + ind_samples = ind_data.get('samples', []) + sp_samples = sp_data.get('samples', []) + intersection = [i for i in ind_samples if i in sp_samples] + if not intersection: + return {} + if len(intersection) != 1: + # To Do we need to invoke a validation error + return {} + return intersection[0] + + @calculated_property(schema={ + "title": "Secondary Families", + "description": "Secondary families associated with the case", + "type": "array", + "items": { + "title": "Secondary Family", + "type": "string", + "linkTo": "Family" + } + }) + def secondary_families(self, request, individual=None, family=None): + if not individual or not family: + return [] + ind_data = get_item_or_none(request, individual, 'individuals', frame='embedded') + if not ind_data: + return [] + individual_families = ind_data.get('families', []) + secondary_families = [i['@id'] for i in individual_families if i['@id'] != family] + print(self.properties) + print() + return secondary_families diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py index fd9084b9a8..2fdab512f8 100644 --- a/src/encoded/types/family.py +++ b/src/encoded/types/family.py @@ -464,7 +464,8 @@ def count_direction_change(relation_tag): 'second cousin once removed (descendant)', 'second cousin twice removed (descendant)', 'family-in-law', - 'extended-family' + 'extended-family', + 'not linked' ] } } @@ -483,6 +484,8 @@ def relationships(self, request, proband=None, members=None): all_props = [] for a_member in members: # This might be a step to optimize if families get larger + # TODO: make sure all mother fathers are in member list, if not fetch them too + # for complete connection tracing props = get_item_or_none(request, a_member, 'individuals') all_props.append(props) # convert to ped_file format @@ -492,16 +495,26 @@ def relationships(self, request, proband=None, members=None): links = self.construct_links(primary_vectors, proband_acc) relations = self.relationships_vocabulary(links) results = [] - for rel in relations: + for a_member in members: + a_member_resp = [i for i in all_props if i['@id'] == a_member][0] temp = {"individual": '', "sex": '', - "relationship": ''} - temp['individual'] = rel[0] - temp['relationship'] = rel[1] - if rel[2]: - temp['association'] = rel[2] - sex = [i for i in all_props if i['accession'] == rel[0]][0].get('sex', 'U') + "relationship": '', + "association": ''} + mem_acc = a_member_resp['accession'] + temp['individual'] = mem_acc + sex = a_member_resp.get('sex', 'U') temp['sex'] = sex + relation_dic = [i for i in relations if i[0] == mem_acc] + if not relation_dic: + temp['relationship'] = 'not linked' + # the individual is not linked to proband through individuals listed in members + results.append(temp) + continue + relation = relation_dic[0] + temp['relationship'] = relation[1] + if relation[2]: + temp['association'] = relation[2] results.append(temp) return results From a10a12e0ba4abce7b046894cd4b4b9cdaa455171 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Tue, 30 Jun 2020 01:56:11 -0400 Subject: [PATCH 21/54] del print --- src/encoded/types/case.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/encoded/types/case.py b/src/encoded/types/case.py index 092b346faa..e45e34eb88 100644 --- a/src/encoded/types/case.py +++ b/src/encoded/types/case.py @@ -165,6 +165,4 @@ def secondary_families(self, request, individual=None, family=None): return [] individual_families = ind_data.get('families', []) secondary_families = [i['@id'] for i in individual_families if i['@id'] != family] - print(self.properties) - print() return secondary_families From 5f359b8157530c8517c2e3b83b95cfded40d8734 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Tue, 30 Jun 2020 02:28:56 -0400 Subject: [PATCH 22/54] duplicate disorder on mapping list --- src/encoded/commands/create_mapping_on_deploy.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/encoded/commands/create_mapping_on_deploy.py b/src/encoded/commands/create_mapping_on_deploy.py index 4a2f7f3e20..1778ab0347 100644 --- a/src/encoded/commands/create_mapping_on_deploy.py +++ b/src/encoded/commands/create_mapping_on_deploy.py @@ -27,7 +27,6 @@ 'WorkflowRun', 'WorkflowRunAwsem', 'VariantConsequence', - 'Disorder', 'FileFormat', 'FileFastq', 'FileProcessed', From c5b9a736207a15a2790507769fe8dc239b5fdea4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Tue, 30 Jun 2020 02:30:11 -0400 Subject: [PATCH 23/54] duplicate document --- src/encoded/commands/create_mapping_on_deploy.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/encoded/commands/create_mapping_on_deploy.py b/src/encoded/commands/create_mapping_on_deploy.py index 1778ab0347..3dda43842b 100644 --- a/src/encoded/commands/create_mapping_on_deploy.py +++ b/src/encoded/commands/create_mapping_on_deploy.py @@ -51,7 +51,6 @@ 'Sample', 'SampleProcessing', 'StaticSection', - 'Document', 'Page', 'AnnotationField', 'Variant', From 69d22286ee07db39622506b5d30138b0679ac6dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Tue, 30 Jun 2020 20:36:37 -0400 Subject: [PATCH 24/54] add calculated vcf file --- src/encoded/types/case.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/encoded/types/case.py b/src/encoded/types/case.py index e45e34eb88..c902d4881b 100644 --- a/src/encoded/types/case.py +++ b/src/encoded/types/case.py @@ -112,7 +112,7 @@ class Case(Item): "report.last_modified.*", "report.status", "family.accession", - "cohort.filter_set.title" + "cohort.filter_set.*" ] @calculated_property(schema={ @@ -166,3 +166,23 @@ def secondary_families(self, request, individual=None, family=None): individual_families = ind_data.get('families', []) secondary_families = [i['@id'] for i in individual_families if i['@id'] != family] return secondary_families + + @calculated_property(schema={ + "title": "VCF File", + "description": "VCF file that will be used in variant digestion", + "type": "string", + "LinkTo": "FileProcessed" + }) + def vcf_file(self, request, sample_processing=None): + """Map the vcf file to be digested + Currently we have a single file on processed_files field of sample processing""" + if not sample_processing: + return {} + sp_data = get_item_or_none(request, sample_processing, 'sample-processings') + if not sp_data: + return {} + files = sp_data.get('processed_files', []) + if not files: + return {} + return files[0] + From 08a8eb3ca0340df4ed6773ae9ed03b5ebf7b7e6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Tue, 30 Jun 2020 20:36:56 -0400 Subject: [PATCH 25/54] add calculated filter set add on --- src/encoded/types/case.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/encoded/types/case.py b/src/encoded/types/case.py index c902d4881b..da349ee0e6 100644 --- a/src/encoded/types/case.py +++ b/src/encoded/types/case.py @@ -186,3 +186,22 @@ def vcf_file(self, request, sample_processing=None): return {} return files[0] + @calculated_property(schema={ + "title": "Filter Set Flag add-on", + "description": "tag to be added to the filter set flag for limiting search to varants/sample variants from this case", + "type": "string" + }) + def filter_set_flag_addon(self, request, sample_processing=None, individual=None): + """use vcf file and sample accessions to limit variant/variantsample to this case""" + if not individual or not sample_processing: + return '' + sample = self.sample(request, individual, sample_processing) + if not sample: + return '' + vcf = self.vcf_file(request, sample_processing) + if not vcf: + return '' + sample_acc = sample.split('/')[2] + vcf_acc = vcf.split('/')[2] + add_on = "&sample={}&file={}".format(sample_acc, vcf_acc) + return add_on From a12cf707558803876ff4e116e467d08932faf42a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Tue, 30 Jun 2020 20:50:22 -0400 Subject: [PATCH 26/54] update variant sample sample and file fields --- src/encoded/schemas/variant_sample.json | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/encoded/schemas/variant_sample.json b/src/encoded/schemas/variant_sample.json index 4514d45228..753469d4a2 100644 --- a/src/encoded/schemas/variant_sample.json +++ b/src/encoded/schemas/variant_sample.json @@ -371,8 +371,13 @@ }, "sample": { "title": "Sample", - "type": "string", - "linkTo": "Sample" + "description": "String Accession of the sample", + "type": "string" + }, + "file": { + "title": "File", + "description": "String Accession of the vcf file used in digestion", + "type": "string" }, "variant": { "title": "Variant", From 8115ba84ccfaa1c3019c5c2a9a85b0fc97fce7f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Tue, 30 Jun 2020 22:33:22 -0400 Subject: [PATCH 27/54] fix indexing --- src/encoded/types/case.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/encoded/types/case.py b/src/encoded/types/case.py index da349ee0e6..f707b86823 100644 --- a/src/encoded/types/case.py +++ b/src/encoded/types/case.py @@ -170,21 +170,22 @@ def secondary_families(self, request, individual=None, family=None): @calculated_property(schema={ "title": "VCF File", "description": "VCF file that will be used in variant digestion", - "type": "string", - "LinkTo": "FileProcessed" + "type": "string" }) def vcf_file(self, request, sample_processing=None): + vcf_file = "" """Map the vcf file to be digested Currently we have a single file on processed_files field of sample processing""" if not sample_processing: - return {} + return vcf_file sp_data = get_item_or_none(request, sample_processing, 'sample-processings') if not sp_data: - return {} + return vcf_file files = sp_data.get('processed_files', []) if not files: - return {} - return files[0] + return vcf_file + vcf_file = files[0] + return vcf_file @calculated_property(schema={ "title": "Filter Set Flag add-on", From d1566f2aa35a0c38ef81611750e9b9dabf374d1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Tue, 30 Jun 2020 22:36:26 -0400 Subject: [PATCH 28/54] bring linkTo back --- src/encoded/types/case.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/encoded/types/case.py b/src/encoded/types/case.py index f707b86823..bd8ef5885d 100644 --- a/src/encoded/types/case.py +++ b/src/encoded/types/case.py @@ -170,10 +170,11 @@ def secondary_families(self, request, individual=None, family=None): @calculated_property(schema={ "title": "VCF File", "description": "VCF file that will be used in variant digestion", - "type": "string" + "type": "string", + "linkTo": "File" }) def vcf_file(self, request, sample_processing=None): - vcf_file = "" + vcf_file = {} """Map the vcf file to be digested Currently we have a single file on processed_files field of sample processing""" if not sample_processing: From 98bae18282a421c32158b73b1cb3fdda57db93db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Wed, 1 Jul 2020 10:35:21 -0400 Subject: [PATCH 29/54] simplify loadxl lists --- src/encoded/loadxl.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/encoded/loadxl.py b/src/encoded/loadxl.py index 5ee92d6979..93de7fb805 100644 --- a/src/encoded/loadxl.py +++ b/src/encoded/loadxl.py @@ -30,21 +30,10 @@ def includeme(config): # order of items references with linkTo in a field in 'required' in schemas ORDER = [ - 'user', 'project', 'institution', + 'user', 'file_format', - 'phenotype', - 'disorder', - # 'variant_consequence', - # 'variant', # links to ^ variant_consequence - # 'variant_sample', # links to ^ variant - 'cohort', - 'family', - 'individual', - 'case', - 'report', - 'sample', 'workflow', ] From 51cde4e067305b08353ed6f071530918f0cfb6e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Wed, 1 Jul 2020 10:35:33 -0400 Subject: [PATCH 30/54] simplify family roles --- src/encoded/types/family.py | 57 ++++++++++--------------------------- 1 file changed, 15 insertions(+), 42 deletions(-) diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py index 2fdab512f8..acddcb4b89 100644 --- a/src/encoded/types/family.py +++ b/src/encoded/types/family.py @@ -295,48 +295,21 @@ def relationships_vocabulary(links): Converter[gg_parent + '-s'] = 'granduncle' Converter[gg_parent + '-d'] = 'grandaunt' Converter[gg_parent + '-c'] = 'grandauncle' - # add Cousin - all_auncle = [i for i in Converter if Converter[i] in ['uncle', 'aunt', 'auncle']] - for auncle in all_auncle: - Converter[auncle + '-s'] = 'cousin' - Converter[auncle + '-d'] = 'cousin' - Converter[auncle + '-c'] = 'cousin' - # add Cousin once removed (descendant) - all_cousins = [i for i in Converter if Converter[i] in ['cousin']] - for cousin in all_cousins: - Converter[cousin + '-s'] = 'cousin once removed (descendant)' - Converter[cousin + '-d'] = 'cousin once removed (descendant)' - Converter[cousin + '-c'] = 'cousin once removed (descendant)' - # add Cousin twice removed (descendant) - all_cousins_o_r = [i for i in Converter if Converter[i] in ['cousin once removed (descendant)']] - for cousin in all_cousins_o_r: - Converter[cousin + '-s'] = 'cousin twice removed (descendant)' - Converter[cousin + '-d'] = 'cousin twice removed (descendant)' - Converter[cousin + '-c'] = 'cousin twice removed (descendant)' - # add First cousin once removed (ascendant) - all_g_auncle = [i for i in Converter if Converter[i] in ['granduncle', 'grandaunt', 'grandauncle']] - for g_auncle in all_g_auncle: - Converter[g_auncle + '-s'] = 'cousin once removed (ascendant)' - Converter[g_auncle + '-d'] = 'cousin once removed (ascendant)' - Converter[g_auncle + '-c'] = 'cousin once removed (ascendant)' - # add Second Cousin - all_cora = [i for i in Converter if Converter[i] in ['cousin once removed (ascendant)']] - for cora in all_cora: - Converter[cora + '-s'] = 'second cousin' - Converter[cora + '-d'] = 'second cousin' - Converter[cora + '-c'] = 'second cousin' - # add Second Cousin once removed - all_s_cousins = [i for i in Converter if Converter[i] in ['second cousin']] - for s_cousin in all_s_cousins: - Converter[s_cousin + '-s'] = 'second cousin once removed (descendant)' - Converter[s_cousin + '-d'] = 'second cousin once removed (descendant)' - Converter[s_cousin + '-c'] = 'second cousin once removed (descendant)' - # add Second Cousin twice removed - all_s_cousins_o_r = [i for i in Converter if Converter[i] in ['second cousin once removed (descendant)']] - for s_cousin_o_r in all_s_cousins_o_r: - Converter[s_cousin_o_r + '-s'] = 'second cousin twice removed (descendant)' - Converter[s_cousin_o_r + '-d'] = 'second cousin twice removed (descendant)' - Converter[s_cousin_o_r + '-c'] = 'second cousin twice removed (descendant)' + # given a relation, map the new relation for that relations children when new role is gender independent + children_roles = [ + {'roles': ['uncle', 'aunt', 'auncle'], 'children': 'cousin'}, + {'roles': ['cousin'], 'children': 'cousin once removed (descendant)'}, + {'roles': ['cousin once removed (descendant)'], 'children': 'cousin twice removed (descendant)'}, + {'roles': ['granduncle', 'grandaunt', 'grandauncle'], 'children': 'cousin once removed (ascendant)'}, + {'roles': ['cousin once removed (ascendant)'], 'children': 'second cousin'}, + {'roles': ['second cousin'], 'children': 'second cousin once removed (descendant)'}, + {'roles': ['second cousin once removed (descendant)'], 'children': 'second cousin twice removed (descendant)'}, + ] + for an_extension in children_roles: + all_combinations = [i for i in Converter if Converter[i] in an_extension['roles']] + for a_combination in all_combinations: + for a_child_tag in ['-s', '-d', '-c']: + Converter[a_combination + a_child_tag] = an_extension['children'] # calculate direction change (if more then 2, not blood relative) def count_direction_change(relation_tag): From 8c5cbaa3dc772e4f9747270b0d60c5352c7f653c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Wed, 1 Jul 2020 14:13:03 -0400 Subject: [PATCH 31/54] tests for relationship static functions --- src/encoded/tests/test_types_family.py | 94 ++++++++++++++++++++++++++ src/encoded/types/family.py | 18 +++++ 2 files changed, 112 insertions(+) diff --git a/src/encoded/tests/test_types_family.py b/src/encoded/tests/test_types_family.py index 78d368a207..94e7108198 100644 --- a/src/encoded/tests/test_types_family.py +++ b/src/encoded/tests/test_types_family.py @@ -77,6 +77,100 @@ def test_relationships_assosiation(testapp, fam): assert a_relation.get('association', "") == expected_values[a_relation['individual']] +@pytest.fixture +def ptolemaic_pedigree(): + # from https://en.wikipedia.org/wiki/Ptolemaic_dynasty#Ptolemaic_family_tree + ptolemaic_pedigree = """Ptolemaic_dynasty 4DNFIBERENNI 2 -9 + Ptolemaic_dynasty 4DNFIPTOLEMI 1 -9 + Ptolemaic_dynasty 4DNFIARSINII 4DNFIPTOLEMI 4DNFIBERENNI 2 -9 + Ptolemaic_dynasty 4DNFIPTOLEII 4DNFIPTOLEMI 4DNFIBERENNI 1 -9 + Ptolemaic_dynasty 4DNFIARSINOI 2 -9 + Ptolemaic_dynasty 4DNFIPHILIPP 1 -9 + Ptolemaic_dynasty 4DNFIPTOLIII 4DNFIPTOLEII 4DNFIARSINOI 1 -9 + Ptolemaic_dynasty 4DNFIMAGASCY 4DNFIPHILIPP 4DNFIBERENNI 2 -9 + Ptolemaic_dynasty 4DNFIBERENII 4DNFIMAGASCY 2 -9 + Ptolemaic_dynasty 4DNFIPTOLEIV 4DNFIPTOLIII 4DNFIBERENII 1 -9 + Ptolemaic_dynasty 4DNFIARSNIII 4DNFIPTOLIII 4DNFIBERENII 2 -9 + Ptolemaic_dynasty 4DNFIPTOLEMV 4DNFIPTOLEIV 4DNFIARSNIII 1 -9 + Ptolemaic_dynasty 4DNFICLEOPOI 2 -9 + Ptolemaic_dynasty 4DNFIPTOLEVI 4DNFIPTOLEMV 4DNFICLEOPOI 1 -9 + Ptolemaic_dynasty 4DNFICLEOPII 4DNFIPTOLEMV 4DNFICLEOPOI 2 -9 + Ptolemaic_dynasty 4DNFIPTOVIII 4DNFIPTOLEMV 4DNFICLEOPOI 1 -9 + Ptolemaic_dynasty 4DNFIEIRENEE 2 -9 + Ptolemaic_dynasty 4DNFIPTOLVII 4DNFIPTOLEVI 4DNFICLEOPII 1 -9 + Ptolemaic_dynasty 4DNFICLEOIII 4DNFIPTOLEVI 4DNFICLEOPII 2 -9 + Ptolemaic_dynasty 4DNFIPLOTEMM 4DNFIPTOVIII 4DNFICLEOPII 1 -9 + Ptolemaic_dynasty 4DNFIPLOTEMA 4DNFIPTOVIII 4DNFIEIRENEE 1 -9 + Ptolemaic_dynasty 4DNFICLEOPIV 4DNFIPTOVIII 4DNFICLEOIII 2 -9 + Ptolemaic_dynasty 4DNFIPLOTEIX 4DNFIPTOVIII 4DNFICLEOIII 1 -9 + Ptolemaic_dynasty 4DNFICLOEPAI 4DNFIPTOVIII 4DNFICLEOIII 2 -9 + Ptolemaic_dynasty 4DNFIPTOEMYX 4DNFIPTOVIII 4DNFICLEOIII 1 -9 + Ptolemaic_dynasty 4DNFIBEREIII 4DNFIPLOTEIX 4DNFICLOEPAI 2 -9 + Ptolemaic_dynasty 4DNFIPTOLEXI 4DNFIPTOEMYX 4DNFICLOEPAI 1 -9 + Ptolemaic_dynasty 4DNFIPTOLXII 4DNFIPLOTEIX 4DNFICLEOPIV 1 -9 + Ptolemaic_dynasty 4DNFICLEOPAV 4DNFIPTOEMYX 4DNFIBEREIII 2 -9 + Ptolemaic_dynasty 4DNFICLEOPVI 4DNFIPTOLXII 4DNFICLEOPAV 2 -9 + Ptolemaic_dynasty 4DNFIBERENIV 4DNFIPTOLXII 4DNFICLEOPAV 2 -9 + Ptolemaic_dynasty 4DNFIPTOXIII 4DNFIPTOLXII 4DNFICLEOPAV 1 -9 + Ptolemaic_dynasty 4DNFICLEOPAT 4DNFIPTOLXII 4DNFICLEOPAV 2 2 + Ptolemaic_dynasty 4DNFICLEOXIV 4DNFIPTOLXII 4DNFICLEOPAV 1 -9 + Ptolemaic_dynasty 4DNFIARSINIV 4DNFIPTOLXII 4DNFICLEOPAV 2 -9 + Ptolemaic_dynasty 4DNFIJCAESAR 1 -9 + Ptolemaic_dynasty 4DNFIPTOLEXV 4DNFIJCAESAR 4DNFICLEOPAT 1 -9 + Ptolemaic_dynasty 4DNFIMARKANT 1 -9 + Ptolemaic_dynasty 4DNFIAHELIOS 4DNFIMARKANT 4DNFICLEOPAT 1 -9 + Ptolemaic_dynasty 4DNFIPTOLXVI 4DNFIMARKANT 4DNFICLEOPAT 1 -9 + Ptolemaic_dynasty 4DNFICLEOSII 4DNFIMARKANT 4DNFICLEOPAT 2 -9""" + return ptolemaic_pedigree + + +def test_extract_vectors(ptolemaic_pedigree): + primary_vectors = Family.extract_vectors(ptolemaic_pedigree) + assert ['4DNFICLEOPAV', '4DNFICLEOPAT'] in primary_vectors['mothers'] + assert ['4DNFIPTOLXII', '4DNFICLEOPAT'] in primary_vectors['fathers'] + assert ['4DNFICLEOSII', '4DNFICLEOPAT'] in primary_vectors['daughters'] + assert ['4DNFIAHELIOS', '4DNFICLEOPAT'] in primary_vectors['sons'] + assert primary_vectors['children'] == [] + + +def test_construct_links(ptolemaic_pedigree): + primary_vectors = Family.extract_vectors(ptolemaic_pedigree) + # create links from Cleopatra's perspective + all_links = Family.construct_links(primary_vectors, '4DNFICLEOPAT') + # look at links to cleopatra III - wow + con_to_4DNFICLEOIII = all_links['4DNFICLEOIII'] + assert 'p-f-m-m' in con_to_4DNFICLEOIII + assert 'p-f-f-m' in con_to_4DNFICLEOIII + assert 'p-m-f-m' in con_to_4DNFICLEOIII + + # try looking at same person (4DNFICLEOIII) from 4DNFIPTOLIII perspective + all_links = Family.construct_links(primary_vectors, '4DNFIPTOLIII') + # look at links to cleopatra III - wow + con_to_4DNFICLEOIII = all_links['4DNFICLEOIII'] + print(con_to_4DNFICLEOIII) + assert 'p-d-s-s-d' in con_to_4DNFICLEOIII + assert 'p-d-s-d-d' in con_to_4DNFICLEOIII + + +def test_relationships_vocabulary(ptolemaic_pedigree): + primary_vectors = Family.extract_vectors(ptolemaic_pedigree) + # create links from Cleopatra's perspective + all_links = Family.construct_links(primary_vectors, '4DNFICLEOPAT') + relations = Family.relationships_vocabulary(all_links) + # assert all members have a calculated relationships + for rel in relations: + assert rel[1] + # look at links to cleopatra III + rel_to_4DNFICLEOIII = [i for i in relations if i[0] == '4DNFICLEOIII'][0][1] + assert rel_to_4DNFICLEOIII == 'great-grandmother' + # try looking at same person (4DNFICLEOIII) from 4DNFIPTOLIII perspective + all_links = Family.construct_links(primary_vectors, '4DNFIPTOLIII') + relations = Family.relationships_vocabulary(all_links) + # look at links to cleopatra III + rel_to_4DNFICLEOIII = [i for i in relations if i[0] == '4DNFICLEOIII'][0][1] + assert rel_to_4DNFICLEOIII == 'great-great-granddaughter' + + ########################## # PROCESS PEDIGREE TESTS # ########################## diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py index acddcb4b89..d7ec28eb09 100644 --- a/src/encoded/types/family.py +++ b/src/encoded/types/family.py @@ -262,6 +262,18 @@ def relationships_vocabulary(links): Converter[child + '-s'] = 'grandson' Converter[child + '-d'] = 'granddaughter' Converter[child + '-c'] = 'grandchild' + # add great-grandchildren + all_g_children = [i for i in Converter if Converter[i] in ['grandson', 'granddaughter', 'grandchild']] + for g_child in all_g_children: + Converter[g_child + '-s'] = 'great-grandson' + Converter[g_child + '-d'] = 'great-granddaughter' + Converter[g_child + '-c'] = 'great-grandchild' + # add great-grandchildren + all_gg_children = [i for i in Converter if Converter[i] in ['great-grandson', 'great-granddaughter', 'great-grandchild']] + for gg_child in all_gg_children: + Converter[gg_child + '-s'] = 'great-great-grandson' + Converter[gg_child + '-d'] = 'great-great-granddaughter' + Converter[gg_child + '-c'] = 'great-great-grandchild' # add niece nephew nibling (we can also add sister brother in law here but will skip non blood relatives) all_siblings = [i for i in Converter if Converter[i] in ['brother', 'sister', 'sibling']] for sib in all_siblings: @@ -413,6 +425,12 @@ def count_direction_change(relation_tag): 'grandchild', 'grandmother', 'grandfather', + 'great-grandson', + 'great-granddaughter', + 'great-grandchild', + 'great-great-grandson', + 'great-great-granddaughter', + 'great-great-grandchild', 'great-grandmother', 'great-grandfather', 'great-great-grandmother', From 450c05405e0d65de72b6930aab5594f6cae646b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Thu, 2 Jul 2020 10:24:47 -0400 Subject: [PATCH 32/54] simplify going up in roles --- src/encoded/types/family.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py index d7ec28eb09..3427139a62 100644 --- a/src/encoded/types/family.py +++ b/src/encoded/types/family.py @@ -256,6 +256,11 @@ def relationships_vocabulary(links): "p-d-m": "wife", "p-s-m": "wife", "p-c-m": "wife", "p-d-f": "husband", "p-s-f": "husband", "p-c-f": "husband", } + parent_roles = [ + {"roles": ['mother', 'father'], 'parents': ['grandmother', 'grandfather']}, + {"roles": ['grandmother', 'grandfather'], "parents": ['great-grandmother', 'great-grandfather']}, + {"roles": ['great-grandmother', 'great-grandfather'], "parents": ['great-great-grandmother', 'great-great-grandfather']} + ] # add grandchildren all_children = [i for i in Converter if Converter[i] in ['daughter', 'son', 'child']] for child in all_children: From 316d5c65358f98c032e385171ae104d20f16084c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Thu, 2 Jul 2020 10:26:31 -0400 Subject: [PATCH 33/54] comments --- src/encoded/types/family.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py index 3427139a62..05dcdb619c 100644 --- a/src/encoded/types/family.py +++ b/src/encoded/types/family.py @@ -247,6 +247,7 @@ def relationships_vocabulary(links): Nomenclature guided by https://www.devonfhs.org.uk/pdfs/tools/eichhorn-rlationship-chart.pdf""" # return a nested list of [acc, calculated_relation, association] + # start convert with seed roles Converter = { "p": "proband", "p-f": "father", "p-m": "mother", "p-d": "daughter", "p-s": "son", "p-c": "child", @@ -256,6 +257,9 @@ def relationships_vocabulary(links): "p-d-m": "wife", "p-s-m": "wife", "p-c-m": "wife", "p-d-f": "husband", "p-s-f": "husband", "p-c-f": "husband", } + # list of dictionary for assigning roles to parents of given set of roles + # roles : the input roles to be extended + # parents : new roles that are in sequence female, male, non-gender parent_roles = [ {"roles": ['mother', 'father'], 'parents': ['grandmother', 'grandfather']}, {"roles": ['grandmother', 'grandfather'], "parents": ['great-grandmother', 'great-grandfather']}, From f0480ea14cd2a5c4f92ca18e286d8acfdc57c7e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Thu, 2 Jul 2020 10:34:28 -0400 Subject: [PATCH 34/54] add gendered children connections --- src/encoded/types/family.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py index 05dcdb619c..eacb984334 100644 --- a/src/encoded/types/family.py +++ b/src/encoded/types/family.py @@ -257,7 +257,7 @@ def relationships_vocabulary(links): "p-d-m": "wife", "p-s-m": "wife", "p-c-m": "wife", "p-d-f": "husband", "p-s-f": "husband", "p-c-f": "husband", } - # list of dictionary for assigning roles to parents of given set of roles + # list of dictionary for assigning roles to members of given set of roles # roles : the input roles to be extended # parents : new roles that are in sequence female, male, non-gender parent_roles = [ @@ -265,6 +265,17 @@ def relationships_vocabulary(links): {"roles": ['grandmother', 'grandfather'], "parents": ['great-grandmother', 'great-grandfather']}, {"roles": ['great-grandmother', 'great-grandfather'], "parents": ['great-great-grandmother', 'great-great-grandfather']} ] + # roles : the input roles to be extended + # children : new roles that are in sequence female, male, non-gender + children_roles_gendered = [ + {"roles": ['daughter', 'son', 'child'], "children": ['granddaughter', 'grandson', 'grandchild']}, + {"roles": ['granddaughter', 'grandson', 'grandchild'], "children": ['great-granddaughter', 'great-grandson', 'great-grandchild']}, + {"roles": ['great-granddaughter', 'great-grandson', 'great-grandchild'], "children": ['great-great-granddaughter', 'great-great-grandson', 'great-great-grandchild']}, + {"roles": ['sister', 'brother', 'sibling'], "children": ['niece', 'nephew', 'nibling']}, + {"roles": ['niece', 'nephew', 'nibling'], "children": ['grandniece', 'grandnephew', 'grandnibling']}, + {"roles": ['grandmother', 'grandfather'], "children": ['aunt', 'uncle', 'auncle']}, + {"roles": ['great-grandmother', 'great-grandfather'], "children": ['grandaunt', 'granduncle', 'grandauncle']} + ] # add grandchildren all_children = [i for i in Converter if Converter[i] in ['daughter', 'son', 'child']] for child in all_children: From 05a1f8a58afb2f125d14b44bdc2abf2837b6c151 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Thu, 2 Jul 2020 10:37:08 -0400 Subject: [PATCH 35/54] add all parents --- src/encoded/types/family.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py index eacb984334..8a4a1b1ccb 100644 --- a/src/encoded/types/family.py +++ b/src/encoded/types/family.py @@ -265,6 +265,10 @@ def relationships_vocabulary(links): {"roles": ['grandmother', 'grandfather'], "parents": ['great-grandmother', 'great-grandfather']}, {"roles": ['great-grandmother', 'great-grandfather'], "parents": ['great-great-grandmother', 'great-great-grandfather']} ] + for an_extension in parent_roles: + all_combinations = [i for i in Converter if Converter[i] in an_extension['roles']] + for a_combination in all_combinations: + Converter.update({a_combination + '-m': an_extension['parents'][0], a_combination + '-f': an_extension['parents'][1]}) # roles : the input roles to be extended # children : new roles that are in sequence female, male, non-gender children_roles_gendered = [ From 566ab4693f0e4dc5add1123c8a68c8f7fba2e4d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Thu, 2 Jul 2020 10:40:26 -0400 Subject: [PATCH 36/54] simplify dict operation --- src/encoded/types/family.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py index 8a4a1b1ccb..4a232f1604 100644 --- a/src/encoded/types/family.py +++ b/src/encoded/types/family.py @@ -268,7 +268,8 @@ def relationships_vocabulary(links): for an_extension in parent_roles: all_combinations = [i for i in Converter if Converter[i] in an_extension['roles']] for a_combination in all_combinations: - Converter.update({a_combination + '-m': an_extension['parents'][0], a_combination + '-f': an_extension['parents'][1]}) + for ind, parent_tag in enumerate(['-m', '-f']): + Converter[a_combination + parent_tag] = an_extension['parents'][ind] # roles : the input roles to be extended # children : new roles that are in sequence female, male, non-gender children_roles_gendered = [ From 5a37728c23454b197ed79f4b44d9da2105d29021 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Thu, 2 Jul 2020 10:42:19 -0400 Subject: [PATCH 37/54] add gendered children roles --- src/encoded/types/family.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py index 4a232f1604..b1854c07c7 100644 --- a/src/encoded/types/family.py +++ b/src/encoded/types/family.py @@ -281,6 +281,11 @@ def relationships_vocabulary(links): {"roles": ['grandmother', 'grandfather'], "children": ['aunt', 'uncle', 'auncle']}, {"roles": ['great-grandmother', 'great-grandfather'], "children": ['grandaunt', 'granduncle', 'grandauncle']} ] + for an_extension in children_roles_gendered: + all_combinations = [i for i in Converter if Converter[i] in an_extension['roles']] + for a_combination in all_combinations: + for ind, parent_tag in enumerate(['-d', '-s', '-c']): + Converter[a_combination + parent_tag] = an_extension['children'][ind] # add grandchildren all_children = [i for i in Converter if Converter[i] in ['daughter', 'son', 'child']] for child in all_children: From 439f3091d1e96f71f99fef00a732edfc38019727 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Thu, 2 Jul 2020 10:43:43 -0400 Subject: [PATCH 38/54] cleanup old rules --- src/encoded/types/family.py | 51 ------------------------------------- 1 file changed, 51 deletions(-) diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py index b1854c07c7..0e2ab0c560 100644 --- a/src/encoded/types/family.py +++ b/src/encoded/types/family.py @@ -286,57 +286,6 @@ def relationships_vocabulary(links): for a_combination in all_combinations: for ind, parent_tag in enumerate(['-d', '-s', '-c']): Converter[a_combination + parent_tag] = an_extension['children'][ind] - # add grandchildren - all_children = [i for i in Converter if Converter[i] in ['daughter', 'son', 'child']] - for child in all_children: - Converter[child + '-s'] = 'grandson' - Converter[child + '-d'] = 'granddaughter' - Converter[child + '-c'] = 'grandchild' - # add great-grandchildren - all_g_children = [i for i in Converter if Converter[i] in ['grandson', 'granddaughter', 'grandchild']] - for g_child in all_g_children: - Converter[g_child + '-s'] = 'great-grandson' - Converter[g_child + '-d'] = 'great-granddaughter' - Converter[g_child + '-c'] = 'great-grandchild' - # add great-grandchildren - all_gg_children = [i for i in Converter if Converter[i] in ['great-grandson', 'great-granddaughter', 'great-grandchild']] - for gg_child in all_gg_children: - Converter[gg_child + '-s'] = 'great-great-grandson' - Converter[gg_child + '-d'] = 'great-great-granddaughter' - Converter[gg_child + '-c'] = 'great-great-grandchild' - # add niece nephew nibling (we can also add sister brother in law here but will skip non blood relatives) - all_siblings = [i for i in Converter if Converter[i] in ['brother', 'sister', 'sibling']] - for sib in all_siblings: - Converter[sib + '-s'] = 'nephew' - Converter[sib + '-d'] = 'niece' - Converter[sib + '-c'] = 'nibling' - # add grand niece nephew nibling - all_niblings = [i for i in Converter if Converter[i] in ['nephew', 'niece', 'nibling']] - for nib in all_niblings: - Converter[nib + '-s'] = 'grandnephew' - Converter[nib + '-d'] = 'grandniece' - Converter[nib + '-c'] = 'grandnibling' - # add Grandparents - all_parents = [i for i in Converter if Converter[i] in ['mother', 'father']] - for parent in all_parents: - Converter[parent + '-m'] = 'grandmother' - Converter[parent + '-f'] = 'grandfather' - # add Great-grandparents Uncle Aunt Auncle - all_g_parents = [i for i in Converter if Converter[i] in ['grandmother', 'grandfather']] - for g_parent in all_g_parents: - Converter[g_parent + '-m'] = 'great-grandmother' - Converter[g_parent + '-f'] = 'great-grandfather' - Converter[g_parent + '-s'] = 'uncle' - Converter[g_parent + '-d'] = 'aunt' - Converter[g_parent + '-c'] = 'auncle' - # add Great-great-grandparents granduncle grandaunt grandauncle - all_gg_parents = [i for i in Converter if Converter[i] in ['great-grandmother', 'great-grandfather']] - for gg_parent in all_gg_parents: - Converter[gg_parent + '-m'] = 'great-great-grandmother' - Converter[gg_parent + '-f'] = 'great-great-grandfather' - Converter[gg_parent + '-s'] = 'granduncle' - Converter[gg_parent + '-d'] = 'grandaunt' - Converter[gg_parent + '-c'] = 'grandauncle' # given a relation, map the new relation for that relations children when new role is gender independent children_roles = [ {'roles': ['uncle', 'aunt', 'auncle'], 'children': 'cousin'}, From 5bf851348976c3f10c353a5fc86a5c01fcb10f1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Thu, 2 Jul 2020 10:47:00 -0400 Subject: [PATCH 39/54] final comments --- src/encoded/types/family.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py index 0e2ab0c560..34a637eaec 100644 --- a/src/encoded/types/family.py +++ b/src/encoded/types/family.py @@ -244,6 +244,12 @@ def construct_links(primary_vectors, seed): @staticmethod def relationships_vocabulary(links): """Convert links to relationships. + Start with a seed dictionary of basic roles (Converter) + Extend going up (parent_roles) + Extend going down - gendered (children_roles_gendered) + - and non-gendered (children_roles) + All roles should be used in sequence + (ie if x created in children_roles, can not be used in parent roles) Nomenclature guided by https://www.devonfhs.org.uk/pdfs/tools/eichhorn-rlationship-chart.pdf""" # return a nested list of [acc, calculated_relation, association] From 3f033da14819b2698056376aa06a8ed4547610b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Mon, 6 Jul 2020 11:39:20 -0400 Subject: [PATCH 40/54] add bam_sample_id --- src/encoded/schemas/sample.json | 7 +++++++ src/encoded/schemas/variant_sample.json | 5 ----- src/encoded/types/case.py | 6 ++++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/encoded/schemas/sample.json b/src/encoded/schemas/sample.json index 81367add26..4bd1023bf0 100644 --- a/src/encoded/schemas/sample.json +++ b/src/encoded/schemas/sample.json @@ -424,6 +424,13 @@ "title": "Completed process", "type": "string" } + }, + "bam_sample_id": { + "title": "Bam Sample ID", + "description": "ID used for read groups and vcf call info", + "type": "string", + "lookup": 21, + "uniqueKey": true } } } diff --git a/src/encoded/schemas/variant_sample.json b/src/encoded/schemas/variant_sample.json index 753469d4a2..048fc56da5 100644 --- a/src/encoded/schemas/variant_sample.json +++ b/src/encoded/schemas/variant_sample.json @@ -369,11 +369,6 @@ "schema_version": { "default": "1" }, - "sample": { - "title": "Sample", - "description": "String Accession of the sample", - "type": "string" - }, "file": { "title": "File", "description": "String Accession of the vcf file used in digestion", diff --git a/src/encoded/types/case.py b/src/encoded/types/case.py index bd8ef5885d..2a63693f17 100644 --- a/src/encoded/types/case.py +++ b/src/encoded/types/case.py @@ -203,7 +203,9 @@ def filter_set_flag_addon(self, request, sample_processing=None, individual=None vcf = self.vcf_file(request, sample_processing) if not vcf: return '' - sample_acc = sample.split('/')[2] + sample_read_group = sample.get('bam_sample_id', '') + if not sample_read_group: + return '' vcf_acc = vcf.split('/')[2] - add_on = "&sample={}&file={}".format(sample_acc, vcf_acc) + add_on = "&CALL_INFO={}&file={}".format(sample_read_group, vcf_acc) return add_on From 1f498cdc0e074553f2798ec2975f3c5623aedbfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Mon, 6 Jul 2020 11:41:26 -0400 Subject: [PATCH 41/54] add field to the inserts --- src/encoded/tests/data/inserts/sample.json | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/encoded/tests/data/inserts/sample.json b/src/encoded/tests/data/inserts/sample.json index 02c238284c..b745f00b4f 100644 --- a/src/encoded/tests/data/inserts/sample.json +++ b/src/encoded/tests/data/inserts/sample.json @@ -3,19 +3,22 @@ "project": "12a92962-8265-4fc0-b2f8-cf14f05db58b", "institution": "hms-dbmi", "status": "released", - "uuid": "4fdb481a-fbdb-4c0f-a68d-aac87f847bec" + "uuid": "4fdb481a-fbdb-4c0f-a68d-aac87f847bec", + "bam_sample_id": "test001_sample" }, { "project": "12a92962-8265-4fc0-b2f8-cf14f05db58b", "institution": "hms-dbmi", "status": "released", - "uuid": "283f6047-61cd-4ddb-8881-ec1c87082569" + "uuid": "283f6047-61cd-4ddb-8881-ec1c87082569", + "bam_sample_id": "test002_sample" }, { "project": "12a92962-8265-4fc0-b2f8-cf14f05db58b", "institution": "hms-dbmi", "status": "released", - "uuid": "f4bbc709-40fe-48a4-afde-8e886b571903" + "uuid": "f4bbc709-40fe-48a4-afde-8e886b571903", + "bam_sample_id": "test003_sample" }, { "project": "12a92962-8265-4fc0-b2f8-cf14f05db58b", @@ -29,6 +32,7 @@ "indication": "cervical cancer", "specimen_accession": "12345", "specimen_accession_date": "2020-03-01", + "bam_sample_id": "test003_sample", "requisition_acceptance": { "accepted_rejected": "Rejected", "rejection_reason": "DOB missing", From aa69eb3e66bcdde124e748e325f9e8d5190c5306 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Mon, 6 Jul 2020 12:13:38 -0400 Subject: [PATCH 42/54] get sample and check --- src/encoded/types/case.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/encoded/types/case.py b/src/encoded/types/case.py index 2a63693f17..d3256b8a1e 100644 --- a/src/encoded/types/case.py +++ b/src/encoded/types/case.py @@ -203,7 +203,8 @@ def filter_set_flag_addon(self, request, sample_processing=None, individual=None vcf = self.vcf_file(request, sample_processing) if not vcf: return '' - sample_read_group = sample.get('bam_sample_id', '') + sp_data = get_item_or_none(request, sample_processing, 'sample-processings') + sample_read_group = sp_data.get('bam_sample_id', '') if not sample_read_group: return '' vcf_acc = vcf.split('/')[2] From 0cd7ff424fc2b08695a0732b1c29b845c257e9f5 Mon Sep 17 00:00:00 2001 From: Will Ronchetti Date: Mon, 6 Jul 2020 14:32:32 -0400 Subject: [PATCH 43/54] C4-212 Refactor type=cohort out of tests (#140) * C4-212 scrub cohort from filter_set tests * C4-212 add cohort specific tests back for now * C4-212 refactor remaining search tests * C4-212 finish off some tricky tests * fix last typo in docstring * version bump * 'correct' version bump * C4-212 review changes --- pyproject.toml | 2 +- src/encoded/tests/test_purge_item_type.py | 3 - src/encoded/tests/test_search.py | 293 +++++++++------------ src/encoded/tests/test_types_filter_set.py | 21 +- 4 files changed, 135 insertions(+), 184 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 527b013626..247b453fef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] # Note: Various modules refer to this system as "encoded", not "cgap-portal". name = "encoded" -version = "2.1.6" +version = "2.1.7" description = "Clinical Genomics Analysis Platform" authors = ["4DN-DCIC Team "] license = "MIT" diff --git a/src/encoded/tests/test_purge_item_type.py b/src/encoded/tests/test_purge_item_type.py index bdbb980357..9f6b16343c 100644 --- a/src/encoded/tests/test_purge_item_type.py +++ b/src/encoded/tests/test_purge_item_type.py @@ -67,6 +67,3 @@ def test_purge_item_type_with_links_fails(testapp, workbook): testapp.post_json('/index', {'record': True}) # must index everything so individual links show up time.sleep(5) # wait for indexing to catch up assert not purge_item_type_from_storage(testapp, ['individual']) - assert purge_item_type_from_storage(testapp, ['cohort']) is True # this one will work since it is not linkedTo - testapp.post_json('/index', {'record': True}) - testapp.get('/search/?type=Cohort', status=404) diff --git a/src/encoded/tests/test_search.py b/src/encoded/tests/test_search.py index 52984114c9..d1b6c2b4d8 100644 --- a/src/encoded/tests/test_search.py +++ b/src/encoded/tests/test_search.py @@ -89,14 +89,13 @@ def test_collections_redirect_to_search(workbook, testapp): def test_search_with_embedding(workbook, testapp): - """ Searches for a cohort and checks some embedded items are properly resolved """ - res = testapp.get('/search/?type=Cohort&limit=all').json - res_json = [dis for dis in res['@graph'] if dis['uuid'] == 'cc7d83a2-6886-4ca0-9402-7c49734cf3c4'] - assert len(res_json) == 1 - test_json = res_json[0] - assert test_json['display_title'] == 'People with Blue Thumbs' - assert test_json['project']['display_title'] == 'Test Project' - assert test_json['families'][0]['original_pedigree']['uuid'] == 'dcf15d5e-40aa-43bc-b81c-32c70c9afc50' + """ Searches for a family and checks members.*, an embedded field, is properly resolved """ + res = testapp.get('/search/?type=Family&limit=all').json + embed = res['@graph'][0]['members'] + assert embed[0]['father']['display_title'] == 'GAPID3PW26SK' # all are same so order does not matter + assert embed[0]['mother']['display_title'] == 'GAPIDISC7R73' + assert embed[1]['father']['display_title'] == 'GAPID3PW26SK' + assert embed[1]['mother']['display_title'] == 'GAPIDISC7R73' def test_search_with_simple_query(workbook, testapp): @@ -187,18 +186,6 @@ def test_search_facets_and_columns_order(workbook, testapp, registry): assert res['columns'][key]['title'] == val['title'] -@pytest.mark.skip # XXX: Not clear how to best port -def test_search_embedded_file_by_accession(workbook, testapp): - res = testapp.get('/search/?type=Cohort&families.original_pedigree.uuid=dcf15d5e-40aa-43bc-b81c-32c70c9afc50').json - assert len(res['@graph']) > 0 - item_uuids = [item['uuid'] for item in res['@graph'] if 'uuid' in item] - for item_uuid in item_uuids: - item_res = testapp.get('/experiments-hi-c/%s/' % item_uuid, status=301) - exp = item_res.follow().json - file_uuids = [f['uuid'] for f in exp['files']] - assert '46e82a90-49e5-4c33-afab-9ec90d65faa0' in file_uuids - - @pytest.fixture def dd_dts(testapp, workbook): # returns a dictionary of strings of various date and datetimes @@ -278,8 +265,10 @@ def test_search_date_range_dontfind_without(dd_dts, testapp, workbook): def test_search_query_string_AND_NOT_cancel_out(workbook, testapp): - # if you use + and - with same field you should get no result - search = '/search/?q=cell+-cell&type=Cohort' + """ + Tests if you use + and - with same field you should get no result + """ + search = '/search/?q=cell+-cell&type=Family' assert testapp.get(search, status=404) @@ -602,13 +591,11 @@ def test_search_with_principals_allowed_fails(workbook, anontestapp): with pytest.raises(AppError): anontestapp.get('/search/?type=Item&principals_allowed.view=group.PERMISSION_YOU_DONT_HAVE') with pytest.raises(AppError): - anontestapp.get('/search/?type=Cohort' - '&families.proband.display_title=GAPID8J9B9CR' + anontestapp.get('/search/?type=Family' + '&proband.display_title=GAPID8J9B9CR' '&principals_allowed.view=group.PERMISSION_YOU_DONT_HAVE') with pytest.raises(AppError): - anontestapp.get('/search/?type=Cohort' - '&families.proband.display_title=GAPID5HBSLG6' - '&families.clinic_notes=testing' + anontestapp.get('/search/?type=Gene' '&principals_allowed.view=group.PERMISSION_YOU_DONT_HAVE') @@ -619,35 +606,6 @@ class TestNestedSearch(object): def assert_length_is_expected(result, expected): assert len(result['@graph']) == expected - @staticmethod - def is_blue_thumbs(result): - """ Checks that this result is the 'People with Blue Thumbs' cohort """ - return 'Blue' in result['title'] - - @staticmethod - def is_red_feet(result): - """ Checks that this result is the 'People with Red Feet' cohort """ - return 'Red' in result['title'] - - @staticmethod - def is_swollen_ears(result): - """ Checks that this result is the 'People with Swollen Ears' cohort """ - return 'Swollen' in result['title'] - - @staticmethod - def result_contains_two(result, f1, f2): - compound = True - for res in result: - compound = compound and (f1(res) or f2(res)) - return compound - - @staticmethod - def result_contains_all(result, f1, f2, f3): - compound = True - for res in result: - compound = compound and (f1(res) or f2(res) or f3(res)) - return compound - @staticmethod def verify_facet(facets, name, count): """ Checks that a given facet name has the correct number of terms """ @@ -657,163 +615,160 @@ def verify_facet(facets, name, count): return def test_search_on_single_nested_field(self, workbook, testapp): - """ Should match only once since one has a family with a proband with display_title GAPID8J9B9CR """ - res = testapp.get('/search/?type=Cohort' - '&families.proband.display_title=GAPID8J9B9CR').json + """ One match for variant with hg19.hg19_pos=12185955 """ + res = testapp.get('/search/?type=Variant' + '&hg19.hg19_pos=12185955').json self.assert_length_is_expected(res, 1) - assert self.is_blue_thumbs(res['@graph'][0]) + assert res['@graph'][0]['uuid'] == 'f6aef055-4c88-4a3e-a306-d37a71535d8b' def test_or_search_on_same_nested_field(self, workbook, testapp): - """ Should match all 3 since this is interpreted as an OR search on this field """ - res = testapp.get('/search/?type=Cohort' - '&families.proband.display_title=GAPID8J9B9CR' - '&families.proband.display_title=GAPID5HBSLG6').json - self.assert_length_is_expected(res, 3) - assert self.result_contains_all(res['@graph'], self.is_blue_thumbs, self.is_red_feet, self.is_swollen_ears) + """ Should match 2 since OR on this field """ + res = testapp.get('/search/?type=Variant' + '&hg19.hg19_hgvsg=NC_000001.11:g.12185956del' + '&hg19.hg19_hgvsg=NC_000001.11:g.11901816A>T').follow().json + self.assert_length_is_expected(res, 2) + for variant in res['@graph']: + assert variant['uuid'] in ['f6aef055-4c88-4a3e-a306-d37a71535d8b', '852bb349-203e-437d-974a-e8d6cb56810a'] def test_and_search_on_nested_field_that_does_not_match(self, workbook, testapp): - """ This has clinic notes that do not match with any proband object, so will give no results """ - testapp.get('/search/?type=Cohort' - '&families.proband.display_title=GAPID8J9B9CR' - '&families.clinic_notes=gnitset', status=404) + """ This has a chrom value that does not match the position, so will give no results """ + testapp.get('/search/?type=Variant' + '&hg19.hg19_pos=12185955' + '&hg19.hg19_chrom=chr3', status=404) def test_and_search_on_nested_field_that_matches_one(self, workbook, testapp): - """ This has the correct 'clinic_notes', so should match """ - res = testapp.get('/search/?type=Cohort' - '&families.proband.display_title=GAPID5HBSLG6' - '&families.clinic_notes=testing').json + """ This has the correct 'hg19_chrom', so should match one """ + res = testapp.get('/search/?type=Variant' + '&hg19.hg19_pos=12185955' + '&hg19.hg19_chrom=chr1').json self.assert_length_is_expected(res, 1) - assert self.is_blue_thumbs(res['@graph'][0]) - - def test_or_search_on_nested_clinic_notes_that_matches_two(self, workbook, testapp): - """ Do an OR search on clinic_notes, matching two cohorts """ - res = testapp.get('/search/?type=Cohort' - '&families.proband.display_title=GAPID5HBSLG6' - '&families.clinic_notes=xyz' - '&families.clinic_notes=testing').json - self.assert_length_is_expected(res, 2) - assert self.result_contains_two(res['@graph'], self.is_blue_thumbs, self.is_red_feet) + assert res['@graph'][0]['uuid'] == 'f6aef055-4c88-4a3e-a306-d37a71535d8b' + + def test_or_search_on_nested_hg_19_multiple_match(self, workbook, testapp): + """ Do an OR search on hg19.hg19_chrom, matching three variants """ + res = testapp.get('/search/?type=Variant' + '&hg19.hg19_chrom=chr1').json + self.assert_length_is_expected(res, 3) + for variant in res['@graph']: + assert variant['uuid'] in [ + 'f6aef055-4c88-4a3e-a306-d37a71535d8b', + '852bb349-203e-437d-974a-e8d6cb56810a', + '842b1b54-32fb-4ff3-bfd1-c5b51bc35d7f' + ] def test_negative_search_on_clinic_notes(self, workbook, testapp): - """ Do an OR search with clinic_notes with a negative, should eliminate red_feet and match swollen_ears """ - res = testapp.get('/search/?type=Cohort' - '&families.proband.display_title=GAPID5HBSLG6' - '&families.clinic_notes!=xyz').follow().json + """ Do an OR search with hg19_post with a negative, should eliminate a variant """ + res = testapp.get('/search/?type=Variant' + '&hg19.hg19_chrom=chr1' + '&hg19.hg19_pos!=12185955').follow().json self.assert_length_is_expected(res, 2) - assert self.result_contains_two(res['@graph'], self.is_blue_thumbs, self.is_swollen_ears) + for variant in res['@graph']: + assert variant['uuid'] in [ + '852bb349-203e-437d-974a-e8d6cb56810a', + '842b1b54-32fb-4ff3-bfd1-c5b51bc35d7f' + ] def test_and_search_that_matches_one(self, workbook, testapp): - """ Check two properties that occur in the same sub-embedded object in 1 cohort """ - res = testapp.get('/search/?type=Cohort' - '&families.members.mother.display_title=GAPID6ZUDPO2' - '&families.members.father.display_title=GAPIDRU2NWFO').json + """ Check three properties that occur in the same sub-embedded object in 1 variant """ + res = testapp.get('/search/?type=Variant' + '&hg19.hg19_chrom=chr1' + '&hg19.hg19_pos=12185955' + '&hg19.hg19_hgvsg=NC_000001.11:g.12185956del').follow().json self.assert_length_is_expected(res, 1) - assert self.is_blue_thumbs(res['@graph'][0]) + assert res['@graph'][0]['uuid'] == 'f6aef055-4c88-4a3e-a306-d37a71535d8b' + testapp.get('/search/?type=Variant' # should give no results + '&hg19.hg19_chrom=chr2' # change should be sufficient for no results + '&hg19.hg19_pos=12185955' + '&hg19.hg19_hgvsg=NC_000001.11:g.12185956del', status=404) + @pytest.mark.skip # re-enable once workbook inserts are built out more def test_and_search_that_matches_multiple(self, workbook, testapp): - """ Check two properties that occur in the same sub-embedded object in 3 cohorts """ - res = testapp.get('/search/?type=Cohort' + """ Check two properties that occur in the same sub-embedded object in 3 variants """ + res = testapp.get('/search/?type=Variant' '&families.members.mother.display_title=GAPIDISC7R73' '&families.members.father.display_title=GAPID3PW26SK').json self.assert_length_is_expected(res, 3) - assert self.result_contains_all(res['@graph'], self.is_blue_thumbs, self.is_red_feet, self.is_swollen_ears) - - def test_and_search_with_disqualifier(self, workbook, testapp): - """ Check three properties - two of which occur in the same sub-embedded object in - 2 cohorts with an additional property that removes both """ - testapp.get('/search/?type=Cohort' - '&families.members.mother.display_title=GAPIDISC7R73' - '&families.members.father.display_title=GAPID3PW26SK' - '&families.proband.display_title=GAPID8J9B9CR', status=404) def test_and_search_on_three_fields(self, workbook, testapp): - """ Search for 3 properties that all occur in the cohorts """ - res = testapp.get('/search/?type=Cohort' - '&families.members.mother.display_title=GAPIDISC7R73' - '&families.members.father.display_title=GAPID3PW26SK' - '&families.proband.display_title=GAPID5HBSLG6').json + """ OR search that will match all variants with these fields""" + res = testapp.get('/search/?type=Variant' + '&hg19.hg19_chrom=chr1' + '&hg19.hg19_pos=12185955' + '&hg19.hg19_pos=11901816' + '&hg19.hg19_pos=11780388' + '&hg19.hg19_hgvsg=NC_000001.11:g.12185956del' + '&hg19.hg19_hgvsg=NC_000001.11:g.11901816A>T' + '&hg19.hg19_hgvsg=NC_000001.11:g.11780388G>A').follow().json self.assert_length_is_expected(res, 3) - assert self.result_contains_all(res['@graph'], self.is_blue_thumbs, self.is_red_feet, self.is_swollen_ears) - - def test_and_search_on_three_fields_that_matches_one(self, workbook, testapp): - """ Change the parents such that only one cohort matches now """ - res = testapp.get('/search/?type=Cohort' - '&families.members.mother.display_title=GAPID6ZUDPO2' - '&families.members.father.display_title=GAPIDRU2NWFO' - '&families.proband.display_title=GAPID8J9B9CR').json - self.assert_length_is_expected(res, 1) - assert self.is_blue_thumbs(res['@graph'][0]) + for variant in res['@graph']: + assert variant['uuid'] in [ + 'f6aef055-4c88-4a3e-a306-d37a71535d8b', + '852bb349-203e-437d-974a-e8d6cb56810a', + '842b1b54-32fb-4ff3-bfd1-c5b51bc35d7f' + ] def test_search_with_non_existant_combinations(self, workbook, testapp): """ Test that swapping around fields that would match across different sub-embedded objects does not actually do so (ie: returns no results). """ - testapp.get('/search/?type=Cohort' # Swap the parents - '&families.members.mother.display_title=GAPID3PW26SK' - '&families.members.father.display_title=GAPIDISC7R73', status=404) - testapp.get('/search/?type=Cohort' # Swap just the father - '&families.members.mother.display_title=GAPIDISC7R73' - '&families.members.father.display_title=GAPIDRU2NWFO', status=404) - testapp.get('/search/?type=Cohort' # Swap just the mother - '&families.members.mother.display_title=GAPID6ZUDPO2' - '&families.members.father.display_title=GAPIDISC7R73', status=404) + testapp.get('/search/?type=Variant' + '&hg19.hg19_pos=12185955' + '&hg19.hg19_hgvsg=NC_000001.11:g.11901816A>T', status=404) + testapp.get('/search/?type=Variant' + '&hg19.hg19_pos=11901816' + '&hg19.hg19_hgvsg=NC_000001.11:g.11780388G>A', status=404) + testapp.get('/search/?type=Variant' + '&hg19.hg19_pos=11780388' + '&hg19.hg19_hgvsg=NC_000001.11:g.12185956del', status=404) def test_nested_search_with_no_value(self, workbook, testapp): - """ Tests searching on 'No value' alone on a nested field """ - res = testapp.get('/search/?type=Cohort' - '&families.clinic_notes=No+value').json - self.assert_length_is_expected(res, 1) - assert self.is_swollen_ears(res['@graph'][0]) + """ Tests searching on 'No value' alone on a nested field """ + res = testapp.get('/search/?type=Variant' + '&hg19.hg19_chrom!=No+value').follow().json + self.assert_length_is_expected(res, 3) def test_nested_search_with_no_value_combined(self, workbook, testapp): """ Tests searching on 'No value' combined with another nested field, in this case should give no results (no matter the ordering) """ - testapp.get('/search/?type=Cohort' - '&families.clinic_notes=No+value' - '&families.proband.display_title=GAPID8J9B9CR', status=404) - testapp.get('/search/?type=Cohort' - '&families.proband.display_title=GAPID8J9B9CR' - '&families.clinic_notes=No+value', status=404) - testapp.get('/search/?type=Cohort' - '&families.clinic_notes=No+value' - '&families.proband.display_title=GAPIDISC7R74', status=404) - testapp.get('/search/?type=Cohort' - '&families.proband.display_title=GAPIDISC7R74' - '&families.clinic_notes=No+value', status=404) + testapp.get('/search/?type=Variant' + '&hg19.hg19_pos=No+value' + '&hg19.hg19_hgvsg=NC_000001.11:g.12185956del', status=404) + testapp.get('/search/?type=Variant' + '&hg19.hg19_pos=No+value' + '&hg19.hg19_hgvsg=NC_000001.11:g.11780388G>A', status=404) + testapp.get('/search/?type=Variant' + '&hg19.hg19_pos=No+value' + '&hg19.hg19_hgvsg=NC_000001.11:g.12185956del', status=404) + testapp.get('/search/?type=Variant' + '&hg19.hg19_pos=11780388' + '&hg19.hg19_hgvsg=No+value', status=404) def test_search_nested_with_non_nested_fields(self, workbook, testapp): """ Tests that combining a nested search with a non-nested one works in any order """ - res = testapp.get('/search/?type=Cohort' - '&families.clinic_notes=No+value' - '&title=People+with+Swollen+Ears').json - self.assert_length_is_expected(res, 1) - assert self.is_swollen_ears(res['@graph'][0]) - res = testapp.get('/search/?type=Cohort' - '&title=People+with+Swollen+Ears' - '&families.clinic_notes=No+value').json + res = testapp.get('/search/?type=Variant' + '&hg19.hg19_pos!=11720331' + '&POS=88832').follow().json self.assert_length_is_expected(res, 1) - assert self.is_swollen_ears(res['@graph'][0]) + assert res['@graph'][0]['uuid'] == 'cedff838-99af-4936-a0ae-4dfc63ba8bf4' def test_search_nested_no_value_with_multiple_other_fields(self, workbook, testapp): """ Tests that combining a 'No value' search with another nested search and a different non-nested field works correctly """ - res = testapp.get('/search/?type=Cohort' - '&title=People+with+Swollen+Ears' - '&families.clinic_notes=No+value' - '&families.proband=GAPID5HBSLG6').follow().json + res = testapp.get('/search/?type=Variant' + '&POS=88832' + '&REF=A').json self.assert_length_is_expected(res, 1) - assert self.is_swollen_ears(res['@graph'][0]) - testapp.get('/search/?type=Cohort' - '&title=People+with+Swollen+Ears' - '&families.clinic_notes=No+value' - '&families.proband=GAPIDISC7R74', status=404) # proband should disqualify + assert res['@graph'][0]['uuid'] == 'cedff838-99af-4936-a0ae-4dfc63ba8bf4' + testapp.get('/search/?type=Variant' + '&POS=88832' + '&hg19.hg19_pos=No+value' + '&REF=G', status=404) # REF should disqualify def test_search_nested_facets_are_correct(self, workbook, testapp): """ Tests that nested facets are properly rendered """ - facets = testapp.get('/search/?type=Cohort').json['facets'] - self.verify_facet(facets, 'families.proband.display_title', 3) - facets = testapp.get('/search/?type=Cohort' - '&families.proband.display_title=GAPID8J9B9CR').json['facets'] - self.verify_facet(facets, 'families.proband.display_title', 2) + facets = testapp.get('/search/?type=Variant').json['facets'] + self.verify_facet(facets, 'hg19.hg19_chrom', 1) + self.verify_facet(facets, 'hg19.hg19_pos', 3) + self.verify_facet(facets, 'hg19.hg19_hgvsg', 3) def test_search_nested_exists_query(self, testapp): """ Tests doing a !=No+value search on a nested sub-field. """ diff --git a/src/encoded/tests/test_types_filter_set.py b/src/encoded/tests/test_types_filter_set.py index e3e2831aad..ab15259473 100644 --- a/src/encoded/tests/test_types_filter_set.py +++ b/src/encoded/tests/test_types_filter_set.py @@ -6,7 +6,6 @@ pytestmark = [pytest.mark.working, pytest.mark.search] COMPOUND_SEARCH_URL = '/compound_search' FILTER_SET_URL = '/filter_set' -COHORT_URL = '/cohort' VARIANT_URL = '/variant' @@ -27,14 +26,14 @@ def simple_filter_set(): """ A filter set with only the flag that designates the type """ return { 'title': 'Test filter set', - 'search_type': 'Cohort', + 'search_type': 'Variant', 'filter_blocks': [ { - 'query': 'families.proband=GAPID5HBSLG6', + 'query': 'REF=G&ALT=A', 'flag_applied': True } ], - 'flags': '?type=Cohort', + 'flags': '?type=Variant', 'project': 'hms-dbmi', 'institution': 'hms-dbmi' } @@ -45,18 +44,18 @@ def typical_filter_set(): """ A filter set with two filter blocks and a flag """ return { 'title': 'Test filter set', - 'search_type': 'Cohort', + 'search_type': 'Variant', 'filter_blocks': [ { - 'query': 'families.proband=GAPID8J9B9CR', + 'query': 'ALT=T&hg19.hg19_chrom=chr1', 'flag_applied': True }, { - 'query': 'families.clinic_notes=xyz', + 'query': 'REF=G&ALT=A', 'flag_applied': True - } + }, ], - 'flags': '?type=Cohort', + 'flags': '?type=Variant', 'project': 'hms-dbmi', 'institution': 'hms-dbmi' } @@ -161,7 +160,7 @@ def test_filter_set_simple(workbook, testapp, simple_filter_set): # do similar search with @id compound_search_res = testapp.post_json(COMPOUND_SEARCH_URL, {'@id': uuid}).json['@graph'] - assert len(compound_search_res) == 3 + assert len(compound_search_res) == 1 def test_filter_set_complete(workbook, testapp, typical_filter_set): @@ -171,7 +170,7 @@ def test_filter_set_complete(workbook, testapp, typical_filter_set): # execute the more complicated filter_set by @id compound_search_res = testapp.post_json(COMPOUND_SEARCH_URL, {'@id': uuid}).json['@graph'] - assert len(compound_search_res) == 2 + assert len(compound_search_res) == 3 # typical_filter_set matches 3/4 variants def test_filter_set_complex(workbook, testapp, complex_filter_set): From 26bde1b8cb3fc78422142efd278b749707b54bf5 Mon Sep 17 00:00:00 2001 From: SooLee Date: Mon, 6 Jul 2020 17:05:45 -0400 Subject: [PATCH 44/54] added a schema for quality_metric_vcfqc --- src/encoded/commands/create_mapping_on_deploy.py | 1 + src/encoded/schemas/quality_metric_cmphet.json | 4 ++-- .../tests/data/inserts/quality_metric_cmphet.json | 2 +- src/encoded/tests/datafixtures.py | 1 + src/encoded/types/quality_metric.py | 14 ++++++++++++++ 5 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/encoded/commands/create_mapping_on_deploy.py b/src/encoded/commands/create_mapping_on_deploy.py index 60c4b8c1c3..c3cd69f0f3 100644 --- a/src/encoded/commands/create_mapping_on_deploy.py +++ b/src/encoded/commands/create_mapping_on_deploy.py @@ -46,6 +46,7 @@ 'QualityMetricCmphet', 'QualityMetricWorkflowrun', 'QualityMetricVcfcheck', + 'QualityMetricVcfqc', 'TrackingItem', 'Software', 'Sample', diff --git a/src/encoded/schemas/quality_metric_cmphet.json b/src/encoded/schemas/quality_metric_cmphet.json index 7aae64632c..258ba3cfe2 100644 --- a/src/encoded/schemas/quality_metric_cmphet.json +++ b/src/encoded/schemas/quality_metric_cmphet.json @@ -1,7 +1,7 @@ { - "title" : "Quality Metric for Compound Het caller Granite ComHet", + "title" : "Quality Metric for VCF files", "description": "Schema for reporting the specific calculation of an quality metrics", - "id": "/profiles/quality_metric_cmphet.json", + "id": "/profiles/quality_metric_vcfqc.json", "$schema": "http://json-schema.org/draft-04/schema#", "type": "object", "required": [ "project", "institution"], diff --git a/src/encoded/tests/data/inserts/quality_metric_cmphet.json b/src/encoded/tests/data/inserts/quality_metric_cmphet.json index e6b03bf721..494af3b789 100644 --- a/src/encoded/tests/data/inserts/quality_metric_cmphet.json +++ b/src/encoded/tests/data/inserts/quality_metric_cmphet.json @@ -1,6 +1,6 @@ [ { - "uuid": "dcaa6bf3-82f3-4b69-ae05-d5f00fec5466", + "uuid": "595e3ec5-e4f8-4930-9c70-6e65e9a72d93", "project": "12a92962-8265-4fc0-b2f8-cf14f05db58b", "institution": "hms-dbmi", "submitted_by": "duplexa@gmail.com" diff --git a/src/encoded/tests/datafixtures.py b/src/encoded/tests/datafixtures.py index fc6d6b3fea..7219d3bc15 100644 --- a/src/encoded/tests/datafixtures.py +++ b/src/encoded/tests/datafixtures.py @@ -10,6 +10,7 @@ 'case', 'report', 'page', 'quality_metric_fastqc', 'evidence_dis_pheno', 'quality_metric_bamcheck', 'quality_metric_qclist', 'quality_metric_wgs_bamqc', 'quality_metric_cmphet', 'quality_metric_vcfcheck', 'quality_metric_workflowrun', + 'quality_metric_vcfqc', 'software', 'static_section', 'tracking_item', 'workflow_mapping', 'workflow_run_awsem', 'workflow_run', 'annotation_field', 'variant_sample', 'variant', 'gene_annotation_field', 'gene', diff --git a/src/encoded/types/quality_metric.py b/src/encoded/types/quality_metric.py index 66d6ad9f4f..632e9c55d5 100644 --- a/src/encoded/types/quality_metric.py +++ b/src/encoded/types/quality_metric.py @@ -156,3 +156,17 @@ class QualityMetricCmphet(QualityMetric): item_type = 'quality_metric_cmphet' schema = load_schema('encoded:schemas/quality_metric_cmphet.json') embedded_list = QualityMetric.embedded_list + + +@collection( + name='quality-metrics-vcfqc', + properties={ + 'title': 'QC Quality Metrics for VCF files', + 'description': 'Listing of QC Quality Metrics for VCF files' + }) +class QualityMetricVcfqc(QualityMetric): + """Subclass of quality matrics for VCF files""" + + item_type = 'quality_metric_vcfqc' + schema = load_schema('encoded:schemas/quality_metric_vcfqc.json') + embedded_list = QualityMetric.embedded_list From d61cbe7f8b9bf66dc0f18834ebac57fde2b5e923 Mon Sep 17 00:00:00 2001 From: SooLee Date: Mon, 6 Jul 2020 17:51:23 -0400 Subject: [PATCH 45/54] fixed a mixup --- .../schemas/quality_metric_cmphet.json | 4 +- src/encoded/schemas/quality_metric_vcfqc.json | 41 +++++++++++++++++++ .../data/inserts/quality_metric_cmphet.json | 2 +- .../data/inserts/quality_metric_vcfqc.json | 8 ++++ 4 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 src/encoded/schemas/quality_metric_vcfqc.json create mode 100644 src/encoded/tests/data/inserts/quality_metric_vcfqc.json diff --git a/src/encoded/schemas/quality_metric_cmphet.json b/src/encoded/schemas/quality_metric_cmphet.json index 258ba3cfe2..7aae64632c 100644 --- a/src/encoded/schemas/quality_metric_cmphet.json +++ b/src/encoded/schemas/quality_metric_cmphet.json @@ -1,7 +1,7 @@ { - "title" : "Quality Metric for VCF files", + "title" : "Quality Metric for Compound Het caller Granite ComHet", "description": "Schema for reporting the specific calculation of an quality metrics", - "id": "/profiles/quality_metric_vcfqc.json", + "id": "/profiles/quality_metric_cmphet.json", "$schema": "http://json-schema.org/draft-04/schema#", "type": "object", "required": [ "project", "institution"], diff --git a/src/encoded/schemas/quality_metric_vcfqc.json b/src/encoded/schemas/quality_metric_vcfqc.json new file mode 100644 index 0000000000..258ba3cfe2 --- /dev/null +++ b/src/encoded/schemas/quality_metric_vcfqc.json @@ -0,0 +1,41 @@ +{ + "title" : "Quality Metric for VCF files", + "description": "Schema for reporting the specific calculation of an quality metrics", + "id": "/profiles/quality_metric_vcfqc.json", + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "required": [ "project", "institution"], + "additionalProperties": true, + "identifyingProperties": ["uuid", "aliases"], + "mixinProperties": [ + { "$ref": "mixins.json#/schema_version" }, + { "$ref": "quality_metric.json#/properties" }, + { "$ref": "mixins.json#/uuid" }, + { "$ref": "mixins.json#/aliases" }, + { "$ref": "mixins.json#/attribution" }, + { "$ref": "mixins.json#/submitted" }, + { "$ref": "mixins.json#/modified" }, + { "$ref": "mixins.json#/status" }, + { "$ref": "mixins.json#/static_embeds" }, + { "$ref": "quality_metric.json#/properties" } + ], + "mixinFacets" : [ + { "$ref": "mixins.json#/facets_common" } + ], + "properties": { + "schema_version": { + "default": "1" + } + }, + "facets": { + "overall_quality_status": { + "title" : "Overall Quality" + } + }, + "columns": { + "overall_quality_status": { + "title" : "Overall Quality", + "default_hidden": true + } + } +} diff --git a/src/encoded/tests/data/inserts/quality_metric_cmphet.json b/src/encoded/tests/data/inserts/quality_metric_cmphet.json index 494af3b789..e6b03bf721 100644 --- a/src/encoded/tests/data/inserts/quality_metric_cmphet.json +++ b/src/encoded/tests/data/inserts/quality_metric_cmphet.json @@ -1,6 +1,6 @@ [ { - "uuid": "595e3ec5-e4f8-4930-9c70-6e65e9a72d93", + "uuid": "dcaa6bf3-82f3-4b69-ae05-d5f00fec5466", "project": "12a92962-8265-4fc0-b2f8-cf14f05db58b", "institution": "hms-dbmi", "submitted_by": "duplexa@gmail.com" diff --git a/src/encoded/tests/data/inserts/quality_metric_vcfqc.json b/src/encoded/tests/data/inserts/quality_metric_vcfqc.json new file mode 100644 index 0000000000..494af3b789 --- /dev/null +++ b/src/encoded/tests/data/inserts/quality_metric_vcfqc.json @@ -0,0 +1,8 @@ +[ + { + "uuid": "595e3ec5-e4f8-4930-9c70-6e65e9a72d93", + "project": "12a92962-8265-4fc0-b2f8-cf14f05db58b", + "institution": "hms-dbmi", + "submitted_by": "duplexa@gmail.com" + } +] From 840358a8242666b9fda1669c26d547cd66cf0670 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Tue, 7 Jul 2020 23:10:57 -0400 Subject: [PATCH 46/54] Fix a bug. --- pyproject.toml | 2 +- src/encoded/tests/data/inserts/sample.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 247b453fef..fc559f87a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] # Note: Various modules refer to this system as "encoded", not "cgap-portal". name = "encoded" -version = "2.1.7" +version = "2.1.8" description = "Clinical Genomics Analysis Platform" authors = ["4DN-DCIC Team "] license = "MIT" diff --git a/src/encoded/tests/data/inserts/sample.json b/src/encoded/tests/data/inserts/sample.json index b745f00b4f..34c1def17e 100644 --- a/src/encoded/tests/data/inserts/sample.json +++ b/src/encoded/tests/data/inserts/sample.json @@ -32,7 +32,7 @@ "indication": "cervical cancer", "specimen_accession": "12345", "specimen_accession_date": "2020-03-01", - "bam_sample_id": "test003_sample", + "bam_sample_id": "test004_sample", "requisition_acceptance": { "accepted_rejected": "Rejected", "rejection_reason": "DOB missing", From cf9777c1be6ef331831bfaf8161452bfefcfcc66 Mon Sep 17 00:00:00 2001 From: Will Ronchetti Date: Wed, 8 Jul 2020 09:16:57 -0400 Subject: [PATCH 47/54] C4-213 Filter Set API Change (#141) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * new flag schema and insert * C4-212 scrub cohort from filter_set tests * update insert * C4-212 add cohort specific tests back for now * C4-212 refactor remaining search tests * C4-212 finish off some tricky tests * fix last typo in docstring * version bump * 'correct' version bump * C4-213 continue refactoring API top-down * C4-213 fix schema, first set of tests * C4-213 complete filter_set API rework * C4-213 small changes, comments * C4-213 review changes * actually merge? * fix missing titles Co-authored-by: Koray Kırlı --- pyproject.toml | 2 +- src/encoded/schemas/filter_set.json | 40 +- src/encoded/search/compound_search.py | 183 +++++--- .../tests/data/inserts/filter_set.json | 18 +- src/encoded/tests/test_types_filter_set.py | 410 +++++++++++------- 5 files changed, 416 insertions(+), 237 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 247b453fef..fc559f87a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] # Note: Various modules refer to this system as "encoded", not "cgap-portal". name = "encoded" -version = "2.1.7" +version = "2.1.8" description = "Clinical Genomics Analysis Platform" authors = ["4DN-DCIC Team "] license = "MIT" diff --git a/src/encoded/schemas/filter_set.json b/src/encoded/schemas/filter_set.json index 5b1ea0d306..3b442ed0be 100644 --- a/src/encoded/schemas/filter_set.json +++ b/src/encoded/schemas/filter_set.json @@ -42,22 +42,50 @@ "title": "Filter Block", "type": "object", "properties": { + "name": { + "title": "Name", + "type": "string", + "description": "Name of the filter block" + }, "query": { "title": "Single query", + "description": "URL Query string", "type": "string" }, - "flag_applied": { - "title": "Flag applied", - "description": "Whether or not this filter_block is toggled", - "type": "boolean" + "flags_applied": { + "title": "Flags applied", + "description": "Flag names that will be applied to this filter block", + "type": "array", + "uniqueItems": true, + "items": { + "title": "Flag", + "type": "string" + } } } } }, "flags": { "title": "Flags", - "description": "Query filter to apply globally on this filter set.", - "type": "string" + "description": "Flags that will be applied to filter blocks with name mapping.", + "type": "array", + "uniqueItems": true, + "items": { + "title": "Flag", + "type": "object", + "properties": { + "name": { + "title": "Name", + "type": "string", + "description": "Name of the flag" + }, + "query": { + "title": "Single query", + "description": "URL Query string", + "type": "string" + } + } + } } } } diff --git a/src/encoded/search/compound_search.py b/src/encoded/search/compound_search.py index a690a83384..5360a20184 100644 --- a/src/encoded/search/compound_search.py +++ b/src/encoded/search/compound_search.py @@ -26,7 +26,8 @@ class CompoundSearchBuilder: TYPE = 'search_type' ID = '@id' QUERY = 'query' - FLAG_APPLIED = 'flag_applied' + NAME = 'name' + FLAGS_APPLIED = 'flags_applied' BUILD_QUERY_URL = '/build_query/' @staticmethod @@ -61,7 +62,7 @@ def build_subreq_from_single_query(cls, request, query, route='/search/', from_= return subreq @staticmethod - def combine_flags_and_block(flags, block): + def combine_query_strings(qstring1, qstring2): """ Builds a single URL query from the given flags and blocks. :param flags: flags, usually ? prefixed @@ -74,7 +75,7 @@ def query_str_to_dict(x): def merge_query_strings(x, y): return urllib.parse.urlencode(dict(query_str_to_dict(x), **query_str_to_dict(y))) - return merge_query_strings(flags, block) + return merge_query_strings(qstring1, qstring2) @staticmethod def format_filter_set_results(request, es_results, return_generator=False): @@ -121,86 +122,124 @@ def invoke_search(context, request, subreq, return_generator=False): @staticmethod def _add_type_to_flag_if_needed(flags, type_flag): - """ Modifies 'flags' in place by adding type query if it is not present """ + """ Modifies 'flags' in place by adding type query if it is not present + + :param flags: query substring + :param type_flag: query substring containing type requirement + :return: query string that combines the two, if type requirement isn't already there + """ if type_flag not in flags or type_flag.lower() not in flags: flags += '&' + type_flag return flags @classmethod - def execute_filter_set(cls, context, request, filter_set, from_=0, to=10, return_generator=False, intersect=False): + def execute_filter_set(cls, context, request, filter_set, from_=0, to=10, + global_flags=None, return_generator=False, intersect=False): """ Executes the given filter_set. This function contains the core functionality of the class. A filter_set with respect to this function is just a dictionary containing the following things: - 1. 'type' is the item type we are executing on. Required. - 2. 'filter_blocks' contains the filter blocks we would like to apply - 3. 'flags' contains the query to be combined with all filter_blocks + 1. 'search_type' is the item type we are executing on. Required. + 2. 'filter_blocks' contains the filter blocks we would like to apply with named flags we + would like to apply on this block as well + 3. 'flags' is a dictionary containing named flags to be applied to individual filter_blocks + by name. NOTE: if neither 'flags' nor 'filter_blocks' is specified then a generic type=Item - search will be executed. + search will be executed. If just 'flags' is specified with no filter_blocks, the + flags will be ignored (since there are no filter_blocks to apply it to). """ filter_blocks = filter_set.get(FILTER_BLOCKS, []) flags = filter_set.get(FLAGS, None) t = filter_set.get(cls.TYPE, 'Item') # if type not set, attempt to search on item type_flag = 'type=%s' % t - # if we have no filter blocks, pass flags alone to search + # if we have no filter blocks, there is no context to enable flags, so + # pass type_flag + global_flags if not filter_blocks and flags: - flags = cls._add_type_to_flag_if_needed(flags, type_flag) - subreq = cls.build_subreq_from_single_query(request, flags, from_=from_, to=to) + if global_flags: + query = cls.combine_query_strings(global_flags, type_flag) + else: + query = type_flag + subreq = cls.build_subreq_from_single_query(request, query, from_=from_, to=to) return cls.invoke_search(context, request, subreq, return_generator=return_generator) - # if we have only a single filter block with no flags, pass single filter_block to search + # if we specified global_flags, combine that query with the single filter_block, + # otherwise pass the filter_block query directly elif not flags and len(filter_blocks) == 1: block = filter_blocks[0] - block_query = block['query'] - if type_flag not in block: - block_query = cls._add_type_to_flag_if_needed(block_query, type_flag) - if block[cls.FLAG_APPLIED]: - subreq = cls.build_subreq_from_single_query(request, block_query, from_=from_, to=to) + block_query = block[cls.QUERY] + if global_flags: + query = cls.combine_query_strings(global_flags, block_query) else: - subreq = cls.build_subreq_from_single_query(request, type_flag, from_=from_, to=to) + query = block_query + query = cls._add_type_to_flag_if_needed(query, type_flag) + subreq = cls.build_subreq_from_single_query(request, query, from_=from_, to=to) return cls.invoke_search(context, request, subreq, return_generator=return_generator) - # if given flags and single filter block, combine and pass + # Extract query string and list of applied flags, add global_flags to block_query first + # then add flags as applied and type_flag if needed. elif flags and len(filter_blocks) == 1: - block = filter_blocks[0] - if type_flag not in flags and type_flag not in block: - flags = cls._add_type_to_flag_if_needed(flags, type_flag) - if block[cls.FLAG_APPLIED]: - combined_query = cls.combine_flags_and_block(flags, block['query']) - subreq = cls.build_subreq_from_single_query(request, combined_query, from_=from_, to=to) + block_query = filter_blocks[0][cls.QUERY] + flags_applied = filter_blocks[0][cls.FLAGS_APPLIED] + if global_flags: + query = cls.combine_query_strings(global_flags, block_query) else: - subreq = cls.build_subreq_from_single_query(request, flags, from_=from_, to=to) + query = block_query + for applied_flag in flags_applied: + for flag in flags: + if flag[cls.NAME] == applied_flag: + query = cls.combine_query_strings(query, flag[cls.QUERY]) + break + query = cls._add_type_to_flag_if_needed(query, type_flag) + subreq = cls.build_subreq_from_single_query(request, query, from_=from_, to=to) return cls.invoke_search(context, request, subreq, return_generator=return_generator) # Build the compound_query + # Iterate through filter_blocks, adding global_flags if specified and adding flags if specified else: sub_queries = [] for block in filter_blocks: - if block[cls.FLAG_APPLIED]: # only build sub_query if this block is applied - if flags: - combined_query = cls.combine_flags_and_block(flags, block[cls.QUERY]) - subreq = cls.build_subreq_from_single_query(request, combined_query, route=cls.BUILD_QUERY_URL, - from_=from_, to=to) - else: - subreq = cls.build_subreq_from_single_query(request, block[cls.QUERY], - route=cls.BUILD_QUERY_URL, from_=from_, to=to) - sub_query = request.invoke_subrequest(subreq).json[cls.QUERY] - sub_queries.append(sub_query) - - if len(sub_queries) == 0: # if all blocks are disabled, just execute the flags - if not flags: - flags = type_flag - else: - flags = cls._add_type_to_flag_if_needed(flags, type_flag) - subreq = cls.build_subreq_from_single_query(request, flags, from_=from_, to=to) - return cls.invoke_search(context, request, subreq, return_generator=return_generator) - - else: # build, execute compound query and return a response with @graph containing results - compound_query = LuceneBuilder.compound_search(sub_queries, intersect=intersect) - subreq = cls.build_subreq_from_single_query(request, ('?type=' + t)) - search = SearchBuilder.from_search(context, subreq, compound_query, from_=from_, size=to) - es_results = execute_search(subreq, search.search) - return cls.format_filter_set_results(request, es_results, return_generator) + block_query = block[cls.QUERY] + flags_applied = block[cls.FLAGS_APPLIED] + query = block_query + if global_flags: + query = cls.combine_query_strings(global_flags, block_query) + for applied_flag in flags_applied: + for flag in flags: + if flag[cls.NAME] == applied_flag: + query = cls.combine_query_strings(query, flag[cls.QUERY]) + break + query = cls._add_type_to_flag_if_needed(query, type_flag) + subreq = cls.build_subreq_from_single_query(request, query, route=cls.BUILD_QUERY_URL, + from_=from_, to=to) + sub_query = request.invoke_subrequest(subreq).json[cls.QUERY] + sub_queries.append(sub_query) + + compound_query = LuceneBuilder.compound_search(sub_queries, intersect=intersect) + compound_subreq = cls.build_subreq_from_single_query(request, ('?type=' + t)) + search = SearchBuilder.from_search(context, compound_subreq, compound_query, from_=from_, size=to) + es_results = execute_search(compound_subreq, search.search) + return cls.format_filter_set_results(request, es_results, return_generator) + + @classmethod + def validate_flag(cls, flag): + """ Validates a given flag has the correct structure/types """ + if cls.NAME not in flag or cls.QUERY not in flag: # existence + raise HTTPBadRequest('Passed a bad flag with missing structure: %s' % flag) + elif not isinstance(flag[cls.NAME], str): # type + raise HTTPBadRequest('Passed a bad flag with incorrect parameter for field %s: %s' % (cls.NAME, flag)) + elif not isinstance(flag[cls.QUERY], str): # type + raise HTTPBadRequest('Passed a bad flag with incorrect parameter for field %s: %s' % (cls.QUERY, flag)) + + @classmethod + def validate_filter_block(cls, filter_block): + """ Validates a given filter_block has correct structure/types """ + if cls.QUERY not in filter_block or cls.FLAGS_APPLIED not in filter_block: + raise HTTPBadRequest('Passed a bad filter_block with missing structure: %s' % filter_block) + elif not isinstance(filter_block[cls.QUERY], str): + raise HTTPBadRequest('Passed a bad filter_block with wrong type for field %s: %s' % (cls.QUERY, filter_block)) + elif not isinstance(filter_block[cls.FLAGS_APPLIED], list): + raise HTTPBadRequest('Passed a bad filter_block with wrong type for field %s: %s' % + (cls.FLAGS_APPLIED, filter_block)) @classmethod def extract_filter_set_from_search_body(cls, request, body): @@ -210,7 +249,6 @@ def extract_filter_set_from_search_body(cls, request, body): :param body: body of POST request (in JSON) :return: a filter_set, to be executed """ - # TODO: Test (and should HTTPBadRequest be thrown here?) - Will 6-23-2020 if cls.ID in body: # prioritize @id return get_item_or_none(request, body[cls.ID]) else: @@ -220,12 +258,16 @@ def extract_filter_set_from_search_body(cls, request, body): else: raise HTTPBadRequest('Tried to execute a filter_set without specifying a type!') if FLAGS in body: - if not isinstance(body[FLAGS], str): - raise HTTPBadRequest('Passed a bad value for flags: %s -- Expected a string.' % body[FLAGS]) + if not isinstance(body[FLAGS], list): + raise HTTPBadRequest('Passed a bad value for flags: %s -- Expected a list.' % body[FLAGS]) + for flag in body[FLAGS]: + cls.validate_flag(flag) filter_set[FLAGS] = body[FLAGS] if FILTER_BLOCKS in body: if not isinstance(body[FILTER_BLOCKS], list): raise HTTPBadRequest('Passed a bad value for flags: %s -- Expected a list.' % body[FILTER_BLOCKS]) + for filter_block in body[FILTER_BLOCKS]: + cls.validate_filter_block(filter_block) filter_set[FILTER_BLOCKS] = body[FILTER_BLOCKS] return filter_set @@ -253,16 +295,35 @@ def compound_search(context, request): POST Body Syntax: { + # flags to be applied globally to the search + "global_flags": , + # uuid of a filter_set item to execute "uuid": , # NOTE: if you provide this, the following filter_set related fields are IGNORED "search_type": , # item type this filter_set is searching on - "flags": , # flags to be applied globally to the search - "filter_blocks": [ # list of objects with below structure + "flags": [ + { + "name": "flag_name_one", + "query": + }, + { + "name": "flag_name_two", + "query": + } + ... + ] + + # list of queries to be compounded with below structure + "filter_blocks": [ { - "query": , - "flag_applied": true/false + "query": , (to be combined with global_flags, if specified) + "flags_applied": [ + "flag_name_one", + "flag_name_two" + ] } + ... ] # other options @@ -279,7 +340,9 @@ def compound_search(context, request): from_ = body.get('from', 0) limit = body.get('limit', 25) return_generator = body.get('return_generator', False) + global_flags = body.get('global_flags', None) if from_ < 0 or limit < 0: raise HTTPBadRequest('Passed bad from, to request body params: %s, %s' % (from_, limit)) return CompoundSearchBuilder.execute_filter_set(context, request, filter_set, from_=from_, to=limit, - return_generator=return_generator, intersect=intersect) + global_flags=global_flags, return_generator=return_generator, + intersect=intersect) diff --git a/src/encoded/tests/data/inserts/filter_set.json b/src/encoded/tests/data/inserts/filter_set.json index 54adb3df53..7b9eeed8eb 100644 --- a/src/encoded/tests/data/inserts/filter_set.json +++ b/src/encoded/tests/data/inserts/filter_set.json @@ -10,14 +10,26 @@ "title": "Test filter set", "filter_blocks": [ { + "name": "Basic Settings", "query": "&variant.CHROM=1&variant.genes.genes_most_severe_consequence.impact=MODERATE", - "flag_applied": true + "flags_applied": ["Flag1", "Flag2"] }, { + "name": "Advanced Settings", "query": "&variant.conservation_phylop100.from=0.2&variant.conservation_phylop100.to=1&variant.cytoband_cytoband=1p36.33", - "flag_applied": false + "flags_applied": ["Flag2"] } ], - "flags": "&variant.transcript.vep_clin_sig=benign%26uncertain_significance" + "flags": [ + + { + "name": "Flag1", + "query": "&variant.transcript.vep_clin_sig=benign%26uncertain_significance" + }, + { + "name": "Flag2", + "query": "&variant.CHROM=1" + } + ] } ] diff --git a/src/encoded/tests/test_types_filter_set.py b/src/encoded/tests/test_types_filter_set.py index ab15259473..d1f3920197 100644 --- a/src/encoded/tests/test_types_filter_set.py +++ b/src/encoded/tests/test_types_filter_set.py @@ -15,91 +15,17 @@ def barebones_filter_set(): return { 'title': 'Test filter set', 'search_type': 'Variant', - 'flags': '?type=Variant', - 'project': 'hms-dbmi', - 'institution': 'hms-dbmi' - } - - -@pytest.fixture -def simple_filter_set(): - """ A filter set with only the flag that designates the type """ - return { - 'title': 'Test filter set', - 'search_type': 'Variant', - 'filter_blocks': [ + 'flags': [ { - 'query': 'REF=G&ALT=A', - 'flag_applied': True + 'name': 'variant', + 'query': '?type=Variant' } ], - 'flags': '?type=Variant', 'project': 'hms-dbmi', 'institution': 'hms-dbmi' } -@pytest.fixture -def typical_filter_set(): - """ A filter set with two filter blocks and a flag """ - return { - 'title': 'Test filter set', - 'search_type': 'Variant', - 'filter_blocks': [ - { - 'query': 'ALT=T&hg19.hg19_chrom=chr1', - 'flag_applied': True - }, - { - 'query': 'REF=G&ALT=A', - 'flag_applied': True - }, - ], - 'flags': '?type=Variant', - 'project': 'hms-dbmi', - 'institution': 'hms-dbmi' - } - - -@pytest.fixture -def complex_filter_set(): - """ A filter set with 3 filter_blocks and a flag """ - return { - 'title': 'Test filter set', - 'search_type': 'Variant', - 'filter_blocks': [ - { - 'query': 'ALT=T&hg19.hg19_chrom=chr1', - 'flag_applied': True - }, - { - 'query': 'REF=G&ALT=A', - 'flag_applied': True - }, - { - 'query': 'POS.from=0&POS.to=12125898', - 'flag_applied': True - } - ], - 'flags': '?type=Variant&CHROM=1', - 'project': 'hms-dbmi', - 'institution': 'hms-dbmi', - 'uuid': '5145195f-c203-41be-9642-7ba6fb4bfb16' - } - - -def toggle_filter_blocks(filter_set, on=True): - """ Helper method for testing that will 'toggle' filter blocks to True if on=True else - it will disable them with False. - - :param filter_set: set containing filter_blocks we'd like to toggle - :param on: whether or not to toggle on, default True - """ - filter_blocks = filter_set.get('filter_blocks', []) - for block in filter_blocks: - block['flag_applied'] = True if on else False - - def test_filter_set_barebones(workbook, testapp, barebones_filter_set): """ Tests posting a filter set and executing it through the /compound_search route """ res = testapp.post_json(FILTER_SET_URL, barebones_filter_set, status=201).json @@ -112,18 +38,51 @@ def test_filter_set_barebones(workbook, testapp, barebones_filter_set): # execute given flags only compound_search_res = testapp.post_json(COMPOUND_SEARCH_URL, { - 'flags': '?type=project', + 'flags': [ # should have no effect, since no filter_blocks toggle it + { + 'name': 'project', + 'query': '?type=Project' + } + ], 'search_type': 'Project' # NOTE: will work since we are not actually validating this }).json['@graph'] assert len(compound_search_res) == 1 # do it again, this time with a type that will return 404 testapp.post_json(COMPOUND_SEARCH_URL, { - 'flags': '?type=gene', + 'flags': [ # should have no effect, since no filter_blocks toggle it + { + 'name': 'gene', + 'query': '?type=Gene' + } + ], 'search_type': 'Gene' }, status=404) +@pytest.fixture +def simple_filter_set(): + """ A filter set with only the flag that designates the type """ + return { + 'title': 'Test filter set', + 'search_type': 'Variant', + 'filter_blocks': [ + { + 'query': 'REF=G&ALT=A', + 'flags_applied': ['variant'] + } + ], + 'flags': [ + { + 'name': 'variant', + 'query': '?type=Variant' + } + ], + 'project': 'hms-dbmi', + 'institution': 'hms-dbmi' + } + + def test_filter_set_simple(workbook, testapp, simple_filter_set): """ Test posting a non-trivial (but simple) filter set """ res = testapp.post_json(FILTER_SET_URL, simple_filter_set, status=201).json @@ -134,7 +93,7 @@ def test_filter_set_simple(workbook, testapp, simple_filter_set): compound_search_res = testapp.post_json(COMPOUND_SEARCH_URL, { 'filter_blocks': [{ 'query': 'type=variant&CHROM=1', - 'flag_applied': True + 'flags_applied': [] }], 'search_type': 'Variant' }).json['@graph'] @@ -142,7 +101,12 @@ def test_filter_set_simple(workbook, testapp, simple_filter_set): # execute given flags only compound_search_res = testapp.post_json('/compound_search', { - 'flags': '?type=project', + 'flags': [ # should have no effect, since no filter_blocks toggle it + { + 'name': 'project', + 'query': '?type=Project' + } + ], 'search_type': 'Project' }).json['@graph'] assert len(compound_search_res) == 1 @@ -151,9 +115,14 @@ def test_filter_set_simple(workbook, testapp, simple_filter_set): compound_search_res = testapp.post_json(COMPOUND_SEARCH_URL, { 'filter_blocks': [{ 'query': 'CHROM=1', - 'flag_applied': True + 'flags_applied': ['variant'] }], - 'flags': 'type=variant', + 'flags': [ + { + 'name': 'variant', + 'query': '?type=Variant' + } + ], 'search_type': 'Variant' }).json['@graph'] assert len(compound_search_res) == 4 @@ -163,14 +132,73 @@ def test_filter_set_simple(workbook, testapp, simple_filter_set): assert len(compound_search_res) == 1 -def test_filter_set_complete(workbook, testapp, typical_filter_set): +@pytest.fixture +def typical_filter_set(): + """ A filter set with two filter blocks and a flag """ + return { + 'title': 'Test filter set', + 'search_type': 'Variant', + 'filter_blocks': [ + { + 'query': 'ALT=T&hg19.hg19_chrom=chr1', + 'flags_applied': ['variant'] + }, + { + 'query': 'REF=G&ALT=A', + 'flags_applied': ['variant'] + }, + ], + 'flags': [ + { + 'name': 'variant', + 'query': '?type=Variant' + } + ], + 'project': 'hms-dbmi', + 'institution': 'hms-dbmi' + } + + +def test_filter_set_typical(workbook, testapp, typical_filter_set): """ Executes a filter set with multiple filter blocks """ res = testapp.post_json(FILTER_SET_URL, typical_filter_set, status=201).json uuid = res['@graph'][0]['@id'] # execute the more complicated filter_set by @id compound_search_res = testapp.post_json(COMPOUND_SEARCH_URL, {'@id': uuid}).json['@graph'] - assert len(compound_search_res) == 3 # typical_filter_set matches 3/4 variants + assert len(compound_search_res) == 3 + + +@pytest.fixture +def complex_filter_set(): + """ A filter set with 3 filter_blocks and a flag """ + return { + 'title': 'Test filter set', + 'search_type': 'Variant', + 'filter_blocks': [ + { + 'query': 'ALT=T&hg19.hg19_chrom=chr1', + 'flags_applied': ['variant_chrom'] + }, + { + 'query': 'REF=G&ALT=A', + 'flags_applied': ['variant_chrom'] + }, + { + 'query': 'POS.from=0&POS.to=12125898', + 'flags_applied': ['variant_chrom'] + } + ], + 'flags': [ + { + 'name': 'variant_chrom', + 'query': '?type=Variant&CHROM=1' + } + ], + 'project': 'hms-dbmi', + 'institution': 'hms-dbmi', + 'uuid': '5145195f-c203-41be-9642-7ba6fb4bfb16' + } def test_filter_set_complex(workbook, testapp, complex_filter_set): @@ -184,36 +212,19 @@ def test_filter_set_complex(workbook, testapp, complex_filter_set): compound_search_res = testapp.post_json(COMPOUND_SEARCH_URL, {'@id': uuid}).json['@graph'] assert len(compound_search_res) == 4 # all variants will match - # toggle off all the blocks + # Modify POS filter_set = { - 'title': 'Test filter set', 'search_type': t, 'filter_blocks': filter_blocks, 'flags': flags } - for block in filter_blocks: - block['flag_applied'] = False - compound_search_res = testapp.post_json(COMPOUND_SEARCH_URL, filter_set).json['@graph'] - assert len(compound_search_res) == 4 # should match in all cases - - # Modify POS for block in filter_blocks: query = block['query'] if 'POS' in query: - block['flag_applied'] = True - block['query'] = 'POS.from=0&POS.to=100000' # exclude 3/4 variants + block['query'] = 'POS.from=0&POS.to=80000' # excludes 1/4 variants break compound_search_res = testapp.post_json(COMPOUND_SEARCH_URL, filter_set).json['@graph'] - assert len(compound_search_res) == 1 # should only match the one case - - # Now, toggle the REF=G&ALT=A block, which will re-introduce 1/4 variants, total 2/4 - for block in filter_blocks: - query = block['query'] - if 'REF' in query: - block['flag_applied'] = True - break - compound_search_res = testapp.post_json(COMPOUND_SEARCH_URL, filter_set).json['@graph'] - assert len(compound_search_res) == 2 + assert len(compound_search_res) == 3 def test_filter_set_intersection(workbook, testapp, complex_filter_set): @@ -224,7 +235,6 @@ def test_filter_set_intersection(workbook, testapp, complex_filter_set): filter_blocks = complex_filter_set['filter_blocks'] flags = complex_filter_set['flags'] filter_set = { - 'title': 'Test filter set', 'search_type': t, 'filter_blocks': filter_blocks, 'flags': flags, @@ -232,21 +242,69 @@ def test_filter_set_intersection(workbook, testapp, complex_filter_set): } testapp.post_json(COMPOUND_SEARCH_URL, filter_set, status=404) # AND will eliminate all here - # toggle off the REF/ALT requirement, now 2 will match - for block in filter_blocks: - if 'REF' in block['query']: - block['flag_applied'] = False - break + +@pytest.fixture +def filter_set_with_many_flags(): + """ A filter set with 2 filter blocks and 3 flags applied differently across blocks """ + return { + 'title': 'Test filter set', + 'search_type': 'Variant', + 'filter_blocks': [ + { + 'query': 'REF=A&ALT=T', + 'flags_applied': ['position_lower_bound'] + }, + { + 'query': 'REF=A&ALT=G', + 'flags_applied': ['position_upper_bound'] + } + ], + 'flags': [ + { + 'name': 'variant_chrom', + 'query': '?type=Variant&CHROM=1' + }, + { + 'name': 'position_upper_bound', + 'query': 'POS.to=100000' + }, + { + 'name': 'position_lower_bound', + 'query': 'POS.from=100000' + }, + { + 'name': 'hg19_chrom_is_two', + 'query': 'hg19.hg19_chrom=chr2' + } + ], + 'project': 'hms-dbmi', + 'institution': 'hms-dbmi', + 'uuid': 'de3babdb-68da-4c75-a42d-6428c81392d1' + } + + +def test_filter_set_selectively_apply_flags(workbook, testapp, filter_set_with_many_flags): + """ Executes a complex filter set with multiple flags added selectively across fields """ + filter_set = filter_set_with_many_flags compound_search_res = testapp.post_json(COMPOUND_SEARCH_URL, filter_set).json['@graph'] assert len(compound_search_res) == 2 - # toggle off hg19 so all match - for block in filter_blocks: - if 'hg19' in block['query']: - block['flag_applied'] = False - break + # add chr=2 flag, giving no results + for filter_block in filter_set['filter_blocks']: + filter_block['flags_applied'].append('hg19_chrom_is_two') + testapp.post_json(COMPOUND_SEARCH_URL, filter_set, status=404) + + # disable all flags, still only giving 2 results + for filter_block in filter_set['filter_blocks']: + filter_block['flags_applied'] = [] compound_search_res = testapp.post_json(COMPOUND_SEARCH_URL, filter_set).json['@graph'] - assert len(compound_search_res) == 4 + assert len(compound_search_res) == 2 + + # enable multiple flags, which should disqualify 1/2 remaining variants + for filter_block in filter_set['filter_blocks']: + filter_block['flags_applied'] = ['variant_chrom', 'position_lower_bound'] + compound_search_res = testapp.post_json(COMPOUND_SEARCH_URL, filter_set).json['@graph'] + assert len(compound_search_res) == 1 # the following tests are distinct in that they aim to test specific behavior associated with @@ -269,13 +327,12 @@ def execute_and_verify_generator_search(testapp, filter_set, expected): @pytest.fixture def filter_set_with_only_flags(): return { - 'title': 'Test filter set', 'search_type': 'Variant', - 'flags': 'CHROM=1' + 'global_flags': 'CHROM=1' } -def test_compound_search_only_flags(workbook, testapp, filter_set_with_only_flags): +def test_compound_search_only_global_flags(workbook, testapp, filter_set_with_only_flags): """ Tests compound search with a filter set that has only flags /search redirect is functioning if we get correct facets on the response, which are checked explicitly for correctness in this test. @@ -286,28 +343,16 @@ def test_compound_search_only_flags(workbook, testapp, filter_set_with_only_flag # do generator search execute_and_verify_generator_search(testapp, filter_set_with_only_flags, 4) - # verify facet values all sum to 4, since we should only be aggregating on the search results - assert 'facets' in resp - facets = resp['facets'] - for facet in facets: - count = 0 - if 'terms' in facet: - for term in facet['terms']: - count += term['doc_count'] - else: - count += facet['total'] - assert count == 4 - @pytest.fixture def filter_set_with_single_filter_block(): return { - 'title': 'Test filter set', 'search_type': 'Variant', 'filter_blocks': [{ - 'query': '?type=Variant&POS.from=0&POS.to=10000000', - 'flag_applied': True - }] + 'query': 'POS.from=0&POS.to=10000000', + 'flags_applied': [] + }], + 'global_flags': '?type=Variant' } @@ -326,13 +371,18 @@ def test_compound_search_single_filter_block(workbook, testapp, filter_set_with_ @pytest.fixture def filter_set_with_single_filter_block_and_flags(): return { - 'title': 'Test filter set', 'search_type': 'Variant', 'filter_blocks': [{ - 'query': '?type=Variant&POS.from=0&POS.to=10000000', - 'flag_applied': True + 'query': 'POS.from=0&POS.to=10000000', + 'flags_applied': ['chrom'] }], - 'flags': 'CHROM=1' + 'global_flags': '?type=Variant', + 'flags': [ + { + 'name': 'chrom', + 'query': 'CHROM=1' + } + ] } @@ -348,50 +398,37 @@ def test_compound_search_filter_and_flags(workbook, testapp, filter_set_with_sin execute_and_verify_generator_search(testapp, filter_set_with_single_filter_block_and_flags, 1) filter_set_with_single_filter_block_and_flags['return_generator'] = False # undo side-effect - # disable block, so flag only - toggle_filter_blocks(filter_set_with_single_filter_block_and_flags, on=False) - resp = testapp.post_json(COMPOUND_SEARCH_URL, filter_set_with_single_filter_block_and_flags).json - assert len(resp['@graph']) == 4 - assert 'facets' in resp - @pytest.fixture def filter_set_with_multiple_disabled_flags(): return { - 'title': 'Test filter set', 'search_type': 'Variant', 'filter_blocks': [{ 'query': '?type=Variant&POS.from=0&POS.to=10000000', - 'flag_applied': False + 'flags_applied': [] }, { 'query': '?type=Variant&REF=A', - 'flag_applied': False + 'flags_applied': [] }], - 'flags': 'CHROM=1' + 'global_flags': '?type=Variant', + 'flags': [ + { + 'name': 'chrom', + 'query': 'CHROM=1' + } + ] } -def test_compound_search_disabled_filter_blocks(workbook, testapp, filter_set_with_multiple_disabled_flags): - """ Tests a compound search with all filter_blocks disabled (so will only execute flags). """ +def test_compound_search_disabled_flags(workbook, testapp, filter_set_with_multiple_disabled_flags): + """ Tests a compound search with all flags disabled (raw filter_blocks + global_flags). """ resp = testapp.post_json(COMPOUND_SEARCH_URL, filter_set_with_multiple_disabled_flags).json - assert len(resp['@graph']) == 4 - - # Test same facet behavior as previously, since we are only executing flags - assert 'facets' in resp - facets = resp['facets'] - for facet in facets: - count = 0 - if 'terms' in facet: - for term in facet['terms']: - count += term['doc_count'] - else: - count += facet['total'] - assert count == 4 + assert len(resp['@graph']) == 2 # do generator search - execute_and_verify_generator_search(testapp, filter_set_with_multiple_disabled_flags, 4) - filter_set_with_multiple_disabled_flags['limit'] = 2 + execute_and_verify_generator_search(testapp, filter_set_with_multiple_disabled_flags, 2) + filter_set_with_multiple_disabled_flags['limit'] = 1 execute_and_verify_generator_search(testapp, filter_set_with_multiple_disabled_flags, 2) @@ -426,3 +463,42 @@ def test_failure(from_, limit): paginated_request['from'] = 0 paginated_request['limit'] = 10 execute_and_verify_generator_search(testapp, paginated_request, 10) + + +def test_compound_search_rejects_malformed_filter_sets(testapp): + """ Tests passing a bunch of malformed filter_sets raises an error. """ + filter_set_without_filter_block_sub_fields = { + 'search_type': 'Variant', + 'filter_blocks': [ + { + 'query': 'CHROM=1' # no flags_applied field + } + ] + } + with pytest.raises(AppError): + testapp.post_json(COMPOUND_SEARCH_URL, filter_set_without_filter_block_sub_fields) + filter_set_without_filter_block_sub_fields['filter_blocks'][0]['flags_applied'] = [] + del filter_set_without_filter_block_sub_fields['filter_blocks'][0]['query'] # no query + with pytest.raises(AppError): + testapp.post_json(COMPOUND_SEARCH_URL, filter_set_without_filter_block_sub_fields) + filter_set_without_filter_block_sub_fields['filter_blocks'][0]['query'] = ['hello'] # bad type + with pytest.raises(AppError): + testapp.post_json(COMPOUND_SEARCH_URL, filter_set_without_filter_block_sub_fields) + + filter_set_without_flag_sub_fields = { + 'search_type': 'Variant', + 'flags': [ + { + 'name': 'something' # no query + } + ] + } + with pytest.raises(AppError): + testapp.post_json(COMPOUND_SEARCH_URL, filter_set_without_flag_sub_fields) + filter_set_without_flag_sub_fields['flags'][0]['query'] = 'type=Variant' + del filter_set_without_flag_sub_fields['flags'][0]['name'] # no name + with pytest.raises(AppError): + testapp.post_json(COMPOUND_SEARCH_URL, filter_set_without_flag_sub_fields) + filter_set_without_flag_sub_fields['flags'][0]['name'] = 5 # bad type + with pytest.raises(AppError): + testapp.post_json(COMPOUND_SEARCH_URL, filter_set_without_flag_sub_fields) From e6c836982c8bcb0119c8b7b309a7e6ff5dc130dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Wed, 8 Jul 2020 18:50:23 -0400 Subject: [PATCH 48/54] add calc prop on sample_processing --- src/encoded/types/__init__.py | 28 -------- src/encoded/types/family.py | 1 + src/encoded/types/sample.py | 130 +++++++++++++++++++++++++++++++++- 3 files changed, 130 insertions(+), 29 deletions(-) diff --git a/src/encoded/types/__init__.py b/src/encoded/types/__init__.py index f25d3496b2..da29c53f10 100644 --- a/src/encoded/types/__init__.py +++ b/src/encoded/types/__init__.py @@ -29,34 +29,6 @@ def includeme(config): config.scan() -@collection( - name='sample-processings', - properties={ - 'title': 'SampleProcessings', - 'description': 'Listing of Sample Processings', - }) -class SampleProcessing(Item): - item_type = 'sample_processing' - schema = load_schema('encoded:schemas/sample_processing.json') - embedded_list = [] - rev = {'case': ('Case', 'sample_processing')} - - @calculated_property(schema={ - "title": "Cases", - "description": "The case(s) this sample processing is for", - "type": "array", - "items": { - "title": "Case", - "type": "string", - "linkTo": "Case" - } - }) - def cases(self, request): - rs = self.rev_link_atids(request, "case") - if rs: - return rs - - @collection( name='reports', properties={ diff --git a/src/encoded/types/family.py b/src/encoded/types/family.py index 34a637eaec..c667d65401 100644 --- a/src/encoded/types/family.py +++ b/src/encoded/types/family.py @@ -60,6 +60,7 @@ class Family(Item): "members.phenotypic_features.onset_age", "members.phenotypic_features.onset_age_units", "members.samples.status", + "members.samples.bam_sample_id", "members.samples.specimen_type", "members.samples.specimen_notes", "members.samples.specimen_collection_date", diff --git a/src/encoded/types/sample.py b/src/encoded/types/sample.py index 04dbee9b68..a23c5621eb 100644 --- a/src/encoded/types/sample.py +++ b/src/encoded/types/sample.py @@ -4,7 +4,8 @@ load_schema, ) from .base import ( - Item + Item, + get_item_or_none ) @@ -55,3 +56,130 @@ def requisition_completed(self, request): 'date_requisition_received', 'accessioned_by' ]): return False + + +@collection( + name='sample-processings', + properties={ + 'title': 'SampleProcessings', + 'description': 'Listing of Sample Processings', + }) +class SampleProcessing(Item): + item_type = 'sample_processing' + schema = load_schema('encoded:schemas/sample_processing.json') + embedded_list = [] + rev = {'case': ('Case', 'sample_processing')} + + @calculated_property(schema={ + "title": "Cases", + "description": "The case(s) this sample processing is for", + "type": "array", + "items": { + "title": "Case", + "type": "string", + "linkTo": "Case" + } + }) + def cases(self, request): + rs = self.rev_link_atids(request, "case") + if rs: + return rs + + @calculated_property(schema={ + "title": "Samples Pedigree", + "description": "Relationships to proband for samples.", + "type": "array", + "items": { + "title": "Sample Pedigree", + "type": "object", + "properties": { + "individual": { + "title": "Individual", + "type": "string" + }, + "sample_accession": { + "title": "Individual", + "type": "string" + }, + "sample_name": { + "title": "Individual", + "type": "string" + }, + "parents": { + "title": "Parents", + "type": "array", + "items": { + "title": "Parent", + "type": "string" + } + }, + "association": { + "title": "Individual", + "type": "string", + "enum": [ + "paternal", + "maternal" + ] + }, + "sex": { + "title": "Sex", + "type": "string", + "enum": [ + "F", + "M", + "U" + ] + }, + "relationship": { + "title": "Relationship", + "type": "string" + } + } + } + }) + def samples_pedigree(self, request, family=None, samples=None): + """Filter Family Pedigree for samples to be used in QCs""" + samples_pedigree = [] + if not family or samples: + return samples_pedigree + fam_data = get_item_or_none(request, family, 'families', frame='embedded') + if not fam_data: + return samples_pedigree + members = fam_data.get('members', []) + relations = fam_data.get('relationships', []) + if not members: + return samples_pedigree + for a_sample in samples: + temp = { + "individual": "", + "sample_accession": "", + "sample_name": "", + "parents": [], + "relationship": "", + "sex": "", + # "association": "" optional, add if exists + } + mem_infos = [i for i in members if a_sample in [x['@id'] for x in i.get('samples', [])]] + if not mem_infos: + continue + mem_info = mem_infos[0] + sample_info = [i for i in mem_info['samples'] if i['@id'] == a_sample][0] + # fetch the calculated relation info + relation_infos = [i for i in relations if i['individual'] == mem_info['accession']] + # fill in temp dict + temp['individual'] = mem_info['accession'] + temp['sex'] = mem_info.get('sex', 'U') + parents = [] + for a_parent in ['mother', 'father']: + if mem_info.get(a_parent): + parents.append(mem_info[a_parent]['display_title']) + temp['parents'] = parents + temp['sample_accession'] = sample_info['display_title'] + temp['sample_name'] = sample_info.get('bam_sample_id', '') + if relation_infos: + relation_info = relation_infos[0] + temp['relationship'] = relation_info.get('association', '') + if relation_info.get('association', ''): + temp['association'] = relation_info.get('relationship', '') + samples_pedigree.append(temp) + return samples_pedigree From 816678d65285889be394b6decc6e056ae93a7928 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Wed, 8 Jul 2020 18:58:10 -0400 Subject: [PATCH 49/54] add sample for data fixture family --- src/encoded/tests/datafixtures.py | 101 ++++++++++++++++++ src/encoded/tests/test_types_sample.py | 10 +- .../tests/test_types_sample_processing.py | 6 ++ 3 files changed, 112 insertions(+), 5 deletions(-) create mode 100644 src/encoded/tests/test_types_sample_processing.py diff --git a/src/encoded/tests/datafixtures.py b/src/encoded/tests/datafixtures.py index 7219d3bc15..096b90a8a0 100644 --- a/src/encoded/tests/datafixtures.py +++ b/src/encoded/tests/datafixtures.py @@ -142,10 +142,103 @@ def access_key(testapp, submitter): return result +# ADD SAMPLES FOR FAMILY MEMBERS +@pytest.fixture +def female_individual_sample(testapp, project, institution): + item = { + "accession": "GAPINGRANDMA", + "bam_sample_id": "ext_id_001", + "status": "released" + } + return testapp.post_json('/sample', item).json['@graph'][0] + + +@pytest.fixture +def grandpa_sample(testapp, project, institution): + item = { + "accession": "GAPSAGRANDPA", + "bam_sample_id": "ext_id_002", + "status": "released" + } + return testapp.post_json('/sample', item).json['@graph'][0] + + +@pytest.fixture +def mother_sample(testapp, project, institution): + item = { + "accession": "GAPSAMOTHER1", + "bam_sample_id": "ext_id_003", + "status": "released" + } + return testapp.post_json('/sample', item).json['@graph'][0] + + +@pytest.fixture +def father_sample(testapp, project, institution): + item = { + "accession": "GAPSAFATHER1", + "bam_sample_id": "ext_id_003", + "status": "released" + } + return testapp.post_json('/sample', item).json['@graph'][0] + + +@pytest.fixture +def uncle_sample(testapp, project, institution): + item = { + "accession": "GAPSAUNCLE01", + "bam_sample_id": "ext_id_003", + "status": "released" + } + return testapp.post_json('/sample', item).json['@graph'][0] + + +@pytest.fixture +def child_sample(testapp, project, institution): + item = { + "accession": "GAPSAPROBAND", + "bam_sample_id": "ext_id_003", + "status": "released" + } + return testapp.post_json('/sample', item).json['@graph'][0] + + +@pytest.fixture +def cousin_sample(testapp, project, institution): + item = { + "accession": "GAPSACOUSIN1", + "bam_sample_id": "ext_id_003", + "status": "released" + } + return testapp.post_json('/sample', item).json['@graph'][0] + + +@pytest.fixture +def sister_sample(testapp, project, institution): + item = { + "accession": "GAPSAHALFSIS", + "bam_sample_id": "ext_id_003", + "status": "released" + } + return testapp.post_json('/sample', item).json['@graph'][0] + + +@pytest.fixture +def brother_sample(testapp, project, institution): + item = { + "accession": "GAPSABROTHER", + "bam_sample_id": "ext_id_003", + "status": "released" + } + return testapp.post_json('/sample', item).json['@graph'][0] + + +# ADD FAMILY MEMBERS @pytest.fixture def female_individual(testapp, project, institution): item = { "accession": "GAPINGRANDMA", + "samples": ["GAPSAGRANDMA"], "age": 53, "age_units": "year", 'project': project['@id'], @@ -161,6 +254,7 @@ def female_individual(testapp, project, institution): def grandpa(testapp, project, institution): item = { "accession": "GAPIDGRANDPA", + "samples": ["GAPSAGRANDPA"], "age": 53, "age_units": "year", 'project': project['@id'], @@ -175,6 +269,7 @@ def grandpa(testapp, project, institution): def mother(testapp, project, institution, grandpa, female_individual): item = { "accession": "GAPIDMOTHER1", + "samples": ["GAPSAMOTHER1"], "age": 33, "age_units": "year", 'project': project['@id'], @@ -190,6 +285,7 @@ def mother(testapp, project, institution, grandpa, female_individual): def father(testapp, project, institution): item = { "accession": "GAPIDFATHER1", + "samples": ["GAPSAFATHER1"], "age": 33, "age_units": "year", 'project': project['@id'], @@ -203,6 +299,7 @@ def father(testapp, project, institution): def uncle(testapp, project, institution, grandpa): item = { "accession": "GAPIDUNCLE01", + "samples": ["GAPSAUNCLE01"], "age": 35, "age_units": "year", 'project': project['@id'], @@ -217,6 +314,7 @@ def uncle(testapp, project, institution, grandpa): def child(testapp, project, institution, mother, father): item = { "accession": "GAPIDPROBAND", + "samples": ["GAPSAPROBAND"], "age": 7, "age_units": "year", 'project': project['@id'], @@ -232,6 +330,7 @@ def child(testapp, project, institution, mother, father): def cousin(testapp, project, institution, uncle): item = { "accession": "GAPIDCOUSIN1", + "samples": ["GAPSACOUSIN1"], "age": 11, "age_units": "year", 'project': project['@id'], @@ -246,6 +345,7 @@ def cousin(testapp, project, institution, uncle): def sister(testapp, project, institution, mother): item = { "accession": "GAPIDHALFSIS", + "samples": ["GAPSAHALFSIS"], "age": 11, "age_units": "year", 'project': project['@id'], @@ -260,6 +360,7 @@ def sister(testapp, project, institution, mother): def brother(testapp, project, institution, mother, father): item = { "accession": "GAPIDBROTHER", + "samples": ["GAPSABROTHER"], "age": 13, "age_units": "year", 'project': project['@id'], diff --git a/src/encoded/tests/test_types_sample.py b/src/encoded/tests/test_types_sample.py index b2da37addb..cca34c8c56 100644 --- a/src/encoded/tests/test_types_sample.py +++ b/src/encoded/tests/test_types_sample.py @@ -1,6 +1,7 @@ import pytest pytestmark = [pytest.mark.setone, pytest.mark.working, pytest.mark.schema] + @pytest.fixture def MIndividual(testapp, project, institution, sample_one): ind = { @@ -11,7 +12,6 @@ def MIndividual(testapp, project, institution, sample_one): return testapp.post_json('/individual', ind, status=201).json['@graph'][0] - @pytest.fixture def WIndividual(testapp, project, institution): ind = { @@ -91,17 +91,17 @@ def test_sample_requisition_completed_accepted(testapp, sample_one): res = testapp.post_json('/sample', sample_one, status=201).json['@graph'][0] assert not res.get('requisition_completed') res2 = testapp.patch_json(res['@id'], {'specimen_accession_date': '2020-01-01'}, status=200).json['@graph'][0] - assert res2.get('requisition_completed') == False + assert res2.get('requisition_completed') is False res3 = testapp.patch_json(res['@id'], {'requisition_acceptance': {'accepted_rejected': 'Accepted'}}, status=200).json['@graph'][0] - assert res3.get('requisition_completed') == True + assert res3.get('requisition_completed') is True def test_sample_requisition_completed_rejected(testapp, sample_one): sample_one['requisition_acceptance'] = {'accepted_rejected': 'Rejected'} res = testapp.post_json('/sample', sample_one, status=201).json['@graph'][0] - assert res.get('requisition_completed') == False + assert res.get('requisition_completed') is False patch_info = res.get('requisition_acceptance') patch_info['date_completed'] = '2020-03-01' res2 = testapp.patch_json(res['@id'], {'requisition_acceptance': patch_info}, status=200).json['@graph'][0] - assert res2.get('requisition_completed') == True + assert res2.get('requisition_completed') is True diff --git a/src/encoded/tests/test_types_sample_processing.py b/src/encoded/tests/test_types_sample_processing.py new file mode 100644 index 0000000000..fd41ac3b71 --- /dev/null +++ b/src/encoded/tests/test_types_sample_processing.py @@ -0,0 +1,6 @@ +import pytest +from unittest import mock +from datetime import datetime +from xml.etree.ElementTree import fromstring +from encoded.types.family import * +pytestmark = [pytest.mark.work, pytest.mark.schema] From e3ce28f5cc8d66fbfa75ca97da7c286f38bc5b11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Wed, 8 Jul 2020 19:50:26 -0400 Subject: [PATCH 50/54] add test --- src/encoded/tests/datafixtures.py | 70 ++++++++++++------- src/encoded/tests/test_types_family.py | 4 +- .../tests/test_types_sample_processing.py | 28 ++++++++ src/encoded/types/sample.py | 13 +++- 4 files changed, 85 insertions(+), 30 deletions(-) diff --git a/src/encoded/tests/datafixtures.py b/src/encoded/tests/datafixtures.py index 096b90a8a0..60c00172b4 100644 --- a/src/encoded/tests/datafixtures.py +++ b/src/encoded/tests/datafixtures.py @@ -146,7 +146,9 @@ def access_key(testapp, submitter): @pytest.fixture def female_individual_sample(testapp, project, institution): item = { - "accession": "GAPINGRANDMA", + "accession": "GAPSAGRANDMA", + 'project': project['@id'], + 'institution': institution['@id'], "bam_sample_id": "ext_id_001", "status": "released" } @@ -157,6 +159,8 @@ def female_individual_sample(testapp, project, institution): def grandpa_sample(testapp, project, institution): item = { "accession": "GAPSAGRANDPA", + 'project': project['@id'], + 'institution': institution['@id'], "bam_sample_id": "ext_id_002", "status": "released" } @@ -167,6 +171,8 @@ def grandpa_sample(testapp, project, institution): def mother_sample(testapp, project, institution): item = { "accession": "GAPSAMOTHER1", + 'project': project['@id'], + 'institution': institution['@id'], "bam_sample_id": "ext_id_003", "status": "released" } @@ -177,7 +183,9 @@ def mother_sample(testapp, project, institution): def father_sample(testapp, project, institution): item = { "accession": "GAPSAFATHER1", - "bam_sample_id": "ext_id_003", + 'project': project['@id'], + 'institution': institution['@id'], + "bam_sample_id": "ext_id_004", "status": "released" } return testapp.post_json('/sample', item).json['@graph'][0] @@ -187,7 +195,9 @@ def father_sample(testapp, project, institution): def uncle_sample(testapp, project, institution): item = { "accession": "GAPSAUNCLE01", - "bam_sample_id": "ext_id_003", + 'project': project['@id'], + 'institution': institution['@id'], + "bam_sample_id": "ext_id_005", "status": "released" } return testapp.post_json('/sample', item).json['@graph'][0] @@ -197,7 +207,9 @@ def uncle_sample(testapp, project, institution): def child_sample(testapp, project, institution): item = { "accession": "GAPSAPROBAND", - "bam_sample_id": "ext_id_003", + 'project': project['@id'], + 'institution': institution['@id'], + "bam_sample_id": "ext_id_006", "status": "released" } return testapp.post_json('/sample', item).json['@graph'][0] @@ -207,7 +219,9 @@ def child_sample(testapp, project, institution): def cousin_sample(testapp, project, institution): item = { "accession": "GAPSACOUSIN1", - "bam_sample_id": "ext_id_003", + 'project': project['@id'], + 'institution': institution['@id'], + "bam_sample_id": "ext_id_007", "status": "released" } return testapp.post_json('/sample', item).json['@graph'][0] @@ -217,7 +231,9 @@ def cousin_sample(testapp, project, institution): def sister_sample(testapp, project, institution): item = { "accession": "GAPSAHALFSIS", - "bam_sample_id": "ext_id_003", + 'project': project['@id'], + 'institution': institution['@id'], + "bam_sample_id": "ext_id_008", "status": "released" } return testapp.post_json('/sample', item).json['@graph'][0] @@ -227,7 +243,9 @@ def sister_sample(testapp, project, institution): def brother_sample(testapp, project, institution): item = { "accession": "GAPSABROTHER", - "bam_sample_id": "ext_id_003", + 'project': project['@id'], + 'institution': institution['@id'], + "bam_sample_id": "ext_id_009", "status": "released" } return testapp.post_json('/sample', item).json['@graph'][0] @@ -235,10 +253,10 @@ def brother_sample(testapp, project, institution): # ADD FAMILY MEMBERS @pytest.fixture -def female_individual(testapp, project, institution): +def female_individual(testapp, project, institution, female_individual_sample): item = { - "accession": "GAPINGRANDMA", - "samples": ["GAPSAGRANDMA"], + "accession": "GAPIDGRANDMA", + "samples": [female_individual_sample['@id']], "age": 53, "age_units": "year", 'project': project['@id'], @@ -251,10 +269,10 @@ def female_individual(testapp, project, institution): @pytest.fixture -def grandpa(testapp, project, institution): +def grandpa(testapp, project, institution, grandpa_sample): item = { "accession": "GAPIDGRANDPA", - "samples": ["GAPSAGRANDPA"], + "samples": [grandpa_sample['@id']], "age": 53, "age_units": "year", 'project': project['@id'], @@ -266,10 +284,10 @@ def grandpa(testapp, project, institution): @pytest.fixture -def mother(testapp, project, institution, grandpa, female_individual): +def mother(testapp, project, institution, grandpa, female_individual, mother_sample): item = { "accession": "GAPIDMOTHER1", - "samples": ["GAPSAMOTHER1"], + "samples": [mother_sample['@id']], "age": 33, "age_units": "year", 'project': project['@id'], @@ -282,10 +300,10 @@ def mother(testapp, project, institution, grandpa, female_individual): @pytest.fixture -def father(testapp, project, institution): +def father(testapp, project, institution, father_sample): item = { "accession": "GAPIDFATHER1", - "samples": ["GAPSAFATHER1"], + "samples": [father_sample['@id']], "age": 33, "age_units": "year", 'project': project['@id'], @@ -296,10 +314,10 @@ def father(testapp, project, institution): @pytest.fixture -def uncle(testapp, project, institution, grandpa): +def uncle(testapp, project, institution, grandpa, uncle_sample): item = { "accession": "GAPIDUNCLE01", - "samples": ["GAPSAUNCLE01"], + "samples": [uncle_sample['@id']], "age": 35, "age_units": "year", 'project': project['@id'], @@ -311,10 +329,10 @@ def uncle(testapp, project, institution, grandpa): @pytest.fixture -def child(testapp, project, institution, mother, father): +def child(testapp, project, institution, mother, father, child_sample): item = { "accession": "GAPIDPROBAND", - "samples": ["GAPSAPROBAND"], + "samples": [child_sample['@id']], "age": 7, "age_units": "year", 'project': project['@id'], @@ -327,10 +345,10 @@ def child(testapp, project, institution, mother, father): @pytest.fixture -def cousin(testapp, project, institution, uncle): +def cousin(testapp, project, institution, uncle, cousin_sample): item = { "accession": "GAPIDCOUSIN1", - "samples": ["GAPSACOUSIN1"], + "samples": [cousin_sample['@id']], "age": 11, "age_units": "year", 'project': project['@id'], @@ -342,10 +360,10 @@ def cousin(testapp, project, institution, uncle): @pytest.fixture -def sister(testapp, project, institution, mother): +def sister(testapp, project, institution, mother, sister_sample): item = { "accession": "GAPIDHALFSIS", - "samples": ["GAPSAHALFSIS"], + "samples": [sister_sample['@id']], "age": 11, "age_units": "year", 'project': project['@id'], @@ -357,10 +375,10 @@ def sister(testapp, project, institution, mother): @pytest.fixture -def brother(testapp, project, institution, mother, father): +def brother(testapp, project, institution, mother, father, brother_sample): item = { "accession": "GAPIDBROTHER", - "samples": ["GAPSABROTHER"], + "samples": [brother_sample['@id']], "age": 13, "age_units": "year", 'project': project['@id'], diff --git a/src/encoded/tests/test_types_family.py b/src/encoded/tests/test_types_family.py index 94e7108198..af3ef7a457 100644 --- a/src/encoded/tests/test_types_family.py +++ b/src/encoded/tests/test_types_family.py @@ -49,7 +49,7 @@ def test_relationships_roles(testapp, fam): "GAPIDMOTHER1": "mother", "GAPIDBROTHER": "brother", "GAPIDGRANDPA": "grandfather", - "GAPINGRANDMA": "grandmother", + "GAPIDGRANDMA": "grandmother", "GAPIDHALFSIS": "half-sister", "GAPIDUNCLE01": "uncle", "GAPIDCOUSIN1": "cousin" @@ -68,7 +68,7 @@ def test_relationships_assosiation(testapp, fam): "GAPIDMOTHER1": "", "GAPIDBROTHER": "", "GAPIDGRANDPA": "maternal", - "GAPINGRANDMA": "maternal", + "GAPIDGRANDMA": "maternal", "GAPIDHALFSIS": "", "GAPIDUNCLE01": "maternal", "GAPIDCOUSIN1": "maternal" diff --git a/src/encoded/tests/test_types_sample_processing.py b/src/encoded/tests/test_types_sample_processing.py index fd41ac3b71..d5dab38821 100644 --- a/src/encoded/tests/test_types_sample_processing.py +++ b/src/encoded/tests/test_types_sample_processing.py @@ -4,3 +4,31 @@ from xml.etree.ElementTree import fromstring from encoded.types.family import * pytestmark = [pytest.mark.work, pytest.mark.schema] + + +@pytest.fixture +def sample_proc_fam(testapp, project, institution, fam): + data = { + 'project': project['@id'], + 'institution': institution['@id'], + 'samples': [ + "GAPSAPROBAND", + "GAPSAFATHER1", + "GAPSAMOTHER1", + "GAPSABROTHER", + "GAPSAGRANDPA", + "GAPSAGRANDMA", + "GAPSAHALFSIS", + "GAPSAUNCLE01", + "GAPSACOUSIN1" + ], + 'families': [fam['@id']] + } + return testapp.post_json('/sample_processing', data).json['@graph'][0] + + +def test_sample_processing_pedigree(testapp, sample_proc_fam): + """This is an end to end test for calculating relationships + Test for roles""" + print(sample_proc_fam['samples_pedigree']) + assert False diff --git a/src/encoded/types/sample.py b/src/encoded/types/sample.py index a23c5621eb..c3be71fcc1 100644 --- a/src/encoded/types/sample.py +++ b/src/encoded/types/sample.py @@ -137,18 +137,27 @@ def cases(self, request): } } }) - def samples_pedigree(self, request, family=None, samples=None): + def samples_pedigree(self, request, families=None, samples=None): """Filter Family Pedigree for samples to be used in QCs""" + # If there are multiple families this will be problematic, return empty + # We will need to know the context samples_pedigree = [] - if not family or samples: + if not families or samples: return samples_pedigree + # this part will need word (ie disregard relations and just return parents) + if len(families) != 1: + return samples_pedigree + family = families[0] fam_data = get_item_or_none(request, family, 'families', frame='embedded') + print(fam_data) if not fam_data: return samples_pedigree + print('B') members = fam_data.get('members', []) relations = fam_data.get('relationships', []) if not members: return samples_pedigree + print('C') for a_sample in samples: temp = { "individual": "", From 6e26622fb7f6d0691fe4edb9deaca978d549322f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Wed, 8 Jul 2020 20:13:14 -0400 Subject: [PATCH 51/54] add tests --- src/encoded/tests/test_types_sample.py | 54 +++++++++++++++++++ .../tests/test_types_sample_processing.py | 34 ------------ src/encoded/types/sample.py | 9 ++-- 3 files changed, 57 insertions(+), 40 deletions(-) delete mode 100644 src/encoded/tests/test_types_sample_processing.py diff --git a/src/encoded/tests/test_types_sample.py b/src/encoded/tests/test_types_sample.py index cca34c8c56..f118d408ff 100644 --- a/src/encoded/tests/test_types_sample.py +++ b/src/encoded/tests/test_types_sample.py @@ -105,3 +105,57 @@ def test_sample_requisition_completed_rejected(testapp, sample_one): patch_info['date_completed'] = '2020-03-01' res2 = testapp.patch_json(res['@id'], {'requisition_acceptance': patch_info}, status=200).json['@graph'][0] assert res2.get('requisition_completed') is True + + +# Sample Processing Tests +@pytest.fixture +def sample_proc_fam(testapp, project, institution, fam): + data = { + 'project': project['@id'], + 'institution': institution['@id'], + 'samples': [ + "GAPSAPROBAND", + "GAPSAFATHER1", + "GAPSAMOTHER1", + "GAPSABROTHER", + "GAPSAGRANDPA", + "GAPSAGRANDMA", + "GAPSAHALFSIS", + "GAPSAUNCLE01", + "GAPSACOUSIN1" + ], + 'families': [fam['@id']] + } + res = testapp.post_json('/sample_processing', data).json['@graph'][0] + return res + + +def test_sample_processing_pedigree(testapp, sample_proc_fam): + """This is an end to end test for calculating relationships + Test for roles""" + expected_values = { + 'GAPIDPROBAND': {'sample_accession': 'GAPSAPROBAND', 'sample_name': 'ext_id_006', + 'parents': ['GAPIDMOTHER1', 'GAPIDFATHER1'], 'relationship': 'proband', 'sex': 'M'}, + 'GAPIDFATHER1': {'sample_accession': 'GAPSAFATHER1', 'sample_name': 'ext_id_004', + 'parents': [], 'relationship': 'father', 'sex': 'M'}, + 'GAPIDMOTHER1': {'sample_accession': 'GAPSAMOTHER1', 'sample_name': 'ext_id_003', + 'parents': ['GAPIDGRANDMA', 'GAPIDGRANDPA'], 'relationship': 'mother', 'sex': 'F'}, + 'GAPIDBROTHER': {'sample_accession': 'GAPSABROTHER', 'sample_name': 'ext_id_009', + 'parents': ['GAPIDMOTHER1', 'GAPIDFATHER1'], 'relationship': 'brother', 'sex': 'M'}, + 'GAPIDGRANDPA': {'sample_accession': 'GAPSAGRANDPA', 'sample_name': 'ext_id_002', + 'parents': [], 'relationship': 'grandfather', 'sex': 'M', 'association': 'maternal'}, + 'GAPIDGRANDMA': {'sample_accession': 'GAPSAGRANDMA', 'sample_name': 'ext_id_001', + 'parents': [], 'relationship': 'grandmother', 'sex': 'F', 'association': 'maternal'}, + 'GAPIDHALFSIS': {'sample_accession': 'GAPSAHALFSIS', 'sample_name': 'ext_id_008', + 'parents': ['GAPIDMOTHER1'], 'relationship': 'half-sister', 'sex': 'F'}, + 'GAPIDUNCLE01': {'sample_accession': 'GAPSAUNCLE01', 'sample_name': 'ext_id_005', + 'parents': ['GAPIDGRANDPA'], 'relationship': 'uncle', 'sex': 'M', 'association': 'maternal'}, + 'GAPIDCOUSIN1': {'sample_accession': 'GAPSACOUSIN1', 'sample_name': 'ext_id_007', + 'parents': ['GAPIDUNCLE01'], 'relationship': 'cousin', 'sex': 'F', 'association': 'maternal'} + } + calculated_values = sample_proc_fam['samples_pedigree'] + + for a_sample in calculated_values: + expected_value = expected_values[a_sample['individual']] + for a_key in expected_value: + assert a_sample[a_key] == expected_value[a_key] diff --git a/src/encoded/tests/test_types_sample_processing.py b/src/encoded/tests/test_types_sample_processing.py deleted file mode 100644 index d5dab38821..0000000000 --- a/src/encoded/tests/test_types_sample_processing.py +++ /dev/null @@ -1,34 +0,0 @@ -import pytest -from unittest import mock -from datetime import datetime -from xml.etree.ElementTree import fromstring -from encoded.types.family import * -pytestmark = [pytest.mark.work, pytest.mark.schema] - - -@pytest.fixture -def sample_proc_fam(testapp, project, institution, fam): - data = { - 'project': project['@id'], - 'institution': institution['@id'], - 'samples': [ - "GAPSAPROBAND", - "GAPSAFATHER1", - "GAPSAMOTHER1", - "GAPSABROTHER", - "GAPSAGRANDPA", - "GAPSAGRANDMA", - "GAPSAHALFSIS", - "GAPSAUNCLE01", - "GAPSACOUSIN1" - ], - 'families': [fam['@id']] - } - return testapp.post_json('/sample_processing', data).json['@graph'][0] - - -def test_sample_processing_pedigree(testapp, sample_proc_fam): - """This is an end to end test for calculating relationships - Test for roles""" - print(sample_proc_fam['samples_pedigree']) - assert False diff --git a/src/encoded/types/sample.py b/src/encoded/types/sample.py index c3be71fcc1..e8d20f3490 100644 --- a/src/encoded/types/sample.py +++ b/src/encoded/types/sample.py @@ -142,22 +142,19 @@ def samples_pedigree(self, request, families=None, samples=None): # If there are multiple families this will be problematic, return empty # We will need to know the context samples_pedigree = [] - if not families or samples: + if not families or not samples: return samples_pedigree # this part will need word (ie disregard relations and just return parents) if len(families) != 1: return samples_pedigree family = families[0] fam_data = get_item_or_none(request, family, 'families', frame='embedded') - print(fam_data) if not fam_data: return samples_pedigree - print('B') members = fam_data.get('members', []) relations = fam_data.get('relationships', []) if not members: return samples_pedigree - print('C') for a_sample in samples: temp = { "individual": "", @@ -187,8 +184,8 @@ def samples_pedigree(self, request, families=None, samples=None): temp['sample_name'] = sample_info.get('bam_sample_id', '') if relation_infos: relation_info = relation_infos[0] - temp['relationship'] = relation_info.get('association', '') + temp['relationship'] = relation_info.get('relationship', '') if relation_info.get('association', ''): - temp['association'] = relation_info.get('relationship', '') + temp['association'] = relation_info.get('association', '') samples_pedigree.append(temp) return samples_pedigree From 65cc26a6226c065f83a06d7d148583f98b167e51 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Thu, 9 Jul 2020 06:30:49 -0400 Subject: [PATCH 52/54] Move to version 5 of coverage. My theory is that since version 4 is no longer supported by coveralls, it's the reason coveralls is failing on Travis. --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index fc559f87a1..5f81bf8ea0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] # Note: Various modules refer to this system as "encoded", not "cgap-portal". name = "encoded" -version = "2.1.8" +version = "2.1.9" description = "Clinical Genomics Analysis Platform" authors = ["4DN-DCIC Team "] license = "MIT" @@ -115,7 +115,7 @@ xlwt = "1.2.0" "zope.sqlalchemy" = "^1.2" [tool.poetry.dev-dependencies] -coverage = "4.0.3" +coverage = ">=5.1" # flake8 = "3.7.8" flaky = "3.6.1" # flask only for moto[server] From 301652dd0e84f348aa541a8d8ce57b0274d8f29f Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Thu, 9 Jul 2020 06:39:08 -0400 Subject: [PATCH 53/54] Bring poetry.lock up to date with change to coverage in pyproject.toml. --- poetry.lock | 113 +++++++++++++++++++++++++++------------------------- 1 file changed, 59 insertions(+), 54 deletions(-) diff --git a/poetry.lock b/poetry.lock index 3daaea1a3b..2209b907bb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -178,8 +178,11 @@ category = "dev" description = "Code coverage measurement for Python" name = "coverage" optional = false -python-versions = "*" -version = "4.0.3" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" +version = "5.2" + +[package.extras] +toml = ["toml"] [[package]] category = "dev" @@ -217,7 +220,7 @@ description = "Storage support for 4DN Data Portals." name = "dcicsnovault" optional = false python-versions = ">=3.6,<3.7" -version = "3.1.1" +version = "3.1.4" [package.dependencies] MarkupSafe = ">=0.23,<1" @@ -297,7 +300,7 @@ description = "A Python library for the Docker Engine API." name = "docker" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -version = "4.2.1" +version = "4.2.2" [package.dependencies] requests = ">=2.14.2,<2.18.0 || >2.18.0" @@ -481,12 +484,12 @@ python-versions = "*" version = "2.7" [[package]] -category = "main" +category = "dev" description = "Read metadata from Python packages" name = "importlib-metadata" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" -version = "1.6.1" +version = "1.7.0" [package.dependencies] zipp = ">=0.5" @@ -502,13 +505,9 @@ marker = "python_version < \"3.7\"" name = "importlib-resources" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" -version = "2.0.1" +version = "3.0.0" [package.dependencies] -[package.dependencies.importlib-metadata] -python = "<3.8" -version = "*" - [package.dependencies.zipp] python = "<3.8" version = ">=0.4" @@ -660,7 +659,7 @@ description = "A network address manipulation library for Python" name = "netaddr" optional = false python-versions = "*" -version = "0.7.20" +version = "0.8.0" [package.dependencies] [package.dependencies.importlib-resources] @@ -1329,7 +1328,7 @@ description = "Fast, Extensible Progress Meter" name = "tqdm" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*" -version = "4.46.1" +version = "4.47.0" [package.extras] dev = ["py-make (>=0.1.0)", "twine", "argopt", "pydoc-markdown"] @@ -1515,7 +1514,6 @@ version = "0.12.0" [[package]] category = "main" description = "Backport of pathlib-compatible object wrapper for zip files" -marker = "python_version < \"3.7\"" name = "zipp" optional = false python-versions = ">=3.6" @@ -1574,7 +1572,7 @@ transaction = ">=1.6.0" test = ["zope.testing"] [metadata] -content-hash = "12bdfa8e74ac35c52a896cb663b689aaf78822d03df367f5091aee5debffed2e" +content-hash = "c352edda07be648af79f61f67dd99db3d16e1bdcd53e559f3e59a5904ca7cc7a" python-versions = ">=3.6,<3.7" [metadata.files] @@ -1665,33 +1663,40 @@ colorama = [ {file = "colorama-0.3.3.tar.gz", hash = "sha256:eb21f2ba718fbf357afdfdf6f641ab393901c7ca8d9f37edd0bee4806ffa269c"}, ] coverage = [ - {file = "coverage-4.0.3-cp26-none-macosx_10_10_x86_64.whl", hash = "sha256:6c2fd127cd4e2decb0ab41fe3ac2948b87ad2ea0470e24b4be5f7e7fdfef8df3"}, - {file = "coverage-4.0.3-cp26-none-win32.whl", hash = "sha256:0a90afa6f5ea08889da9066dca3ce2ef85d47587e3f66ca06a4fa8d3a0053acc"}, - {file = "coverage-4.0.3-cp26-none-win_amd64.whl", hash = "sha256:93c50475f189cd226e9688b9897a0cd3c4c5d9c90b1733fa8f6445cfc0182c51"}, - {file = "coverage-4.0.3-cp27-none-macosx_10_10_x86_64.whl", hash = "sha256:94c1e66610807a7917d967ed6415b9d5fde7487ab2a07bb5e054567865ef6ef0"}, - {file = "coverage-4.0.3-cp27-none-win32.whl", hash = "sha256:76a73a48a308fb87a4417d630b0345d36166f489ef17ea5aa8e4596fb50a2296"}, - {file = "coverage-4.0.3-cp27-none-win_amd64.whl", hash = "sha256:beb96d32ce8cfa47ec6433d95a33e4afaa97c19ac1b4a47ea40a424fedfee7c2"}, - {file = "coverage-4.0.3-cp33-cp33m-macosx_10_10_x86_64.whl", hash = "sha256:50727512afe77e044c7d7f2fd4cd0fe62b06527f965b335a810d956748e0514d"}, - {file = "coverage-4.0.3-cp33-none-win32.whl", hash = "sha256:c00bac0f6b35b82ace069a6a0d88e8fd4cd18d964fc5e47329cd02b212397fbe"}, - {file = "coverage-4.0.3-cp33-none-win_amd64.whl", hash = "sha256:e813cba9ff0e3d37ad31dc127fac85d23f9a26d0461ef8042ac4539b2045e781"}, - {file = "coverage-4.0.3-cp34-cp34m-macosx_10_10_x86_64.whl", hash = "sha256:964f86394cb4d0fd2bb40ffcddca321acf4323b48d1aa5a93db8b743c8a00f79"}, - {file = "coverage-4.0.3-cp34-none-win32.whl", hash = "sha256:d079e36baceea9707fd50b268305654151011274494a33c608c075808920eda8"}, - {file = "coverage-4.0.3-cp34-none-win_amd64.whl", hash = "sha256:8e60e720cad3ee6b0a32f475ae4040552c5623870a9ca0d3d4263faa89a8d96b"}, - {file = "coverage-4.0.3-cp35-cp35m-macosx_10_10_x86_64.whl", hash = "sha256:99043494b28d6460035dd9410269cdb437ee460edc7f96f07ab45c57ba95e651"}, - {file = "coverage-4.0.3-cp35-none-win32.whl", hash = "sha256:6ed521ed3800d8f8911642b9b3c3891780a929db5e572c88c4713c1032530f82"}, - {file = "coverage-4.0.3-cp35-none-win_amd64.whl", hash = "sha256:af2f59ce312523c384a7826821cae0b95f320fee1751387abba4f00eed737166"}, - {file = "coverage-4.0.3.tar.gz", hash = "sha256:85b1275b6d7a61ccc8024a4e9a4c9e896394776edce1a5d075ec116f91925462"}, - {file = "coverage-4.0.3.win-amd64-py2.6.exe", hash = "sha256:7eaa0a33423476ed63317ee0a53cc07c0e36b5a390e3e95b95152e7eb6b3a6f6"}, - {file = "coverage-4.0.3.win-amd64-py2.7.exe", hash = "sha256:addf63b5e39d573c459c3930b25176146395c1dc1afce4710067bb5e6dc4ea58"}, - {file = "coverage-4.0.3.win-amd64-py3.3.exe", hash = "sha256:0ba6c4345e3c197f6a3ba924d155c402ad28c080ac0d79529493eb17582fbc41"}, - {file = "coverage-4.0.3.win-amd64-py3.4.exe", hash = "sha256:ee2338539157cfc35fb1d6757dd799126804df39393c4a6c5fe88b402c8c0ab4"}, - {file = "coverage-4.0.3.win-amd64-py3.5.exe", hash = "sha256:2be3748f45d2eb0259c3c93abccc15c10725ef715bf0817a4c0a1a1dad2abc6a"}, - {file = "coverage-4.0.3.win32-py2.6.exe", hash = "sha256:af6ed80340e5e1b89fa794f730ce7597651fbda3312e500002688b679c184ef9"}, - {file = "coverage-4.0.3.win32-py2.7.exe", hash = "sha256:d3188345f1c7161d701fd2ea9150f9bb6e2df890f3ddd6c0aea1f525e21d1544"}, - {file = "coverage-4.0.3.win32-py3.3.exe", hash = "sha256:e65c78bde155a734f0d624647c4d6e0f47fb4875355a0b95c37d537788737f4f"}, - {file = "coverage-4.0.3.win32-py3.4.exe", hash = "sha256:845d0f8a1765074b3256f07ddbce2969e5a5316dfd0eb3289137010d7677326a"}, - {file = "coverage-4.0.3.win32-py3.5.exe", hash = "sha256:e96c13a40df389ce8cbb5ec108e5fb834989d1bedff5d8846e5aa3d270a5f3b6"}, - {file = "coverage-4.0.3.zip", hash = "sha256:00d464797a236f654337181af72b4baea3d35d056ca480e45e9163bb5df496b8"}, + {file = "coverage-5.2-cp27-cp27m-macosx_10_13_intel.whl", hash = "sha256:d9ad0a988ae20face62520785ec3595a5e64f35a21762a57d115dae0b8fb894a"}, + {file = "coverage-5.2-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:4bb385a747e6ae8a65290b3df60d6c8a692a5599dc66c9fa3520e667886f2e10"}, + {file = "coverage-5.2-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:9702e2cb1c6dec01fb8e1a64c015817c0800a6eca287552c47a5ee0ebddccf62"}, + {file = "coverage-5.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:42fa45a29f1059eda4d3c7b509589cc0343cd6bbf083d6118216830cd1a51613"}, + {file = "coverage-5.2-cp27-cp27m-win32.whl", hash = "sha256:41d88736c42f4a22c494c32cc48a05828236e37c991bd9760f8923415e3169e4"}, + {file = "coverage-5.2-cp27-cp27m-win_amd64.whl", hash = "sha256:bbb387811f7a18bdc61a2ea3d102be0c7e239b0db9c83be7bfa50f095db5b92a"}, + {file = "coverage-5.2-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:3740b796015b889e46c260ff18b84683fa2e30f0f75a171fb10d2bf9fb91fc70"}, + {file = "coverage-5.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:ebf2431b2d457ae5217f3a1179533c456f3272ded16f8ed0b32961a6d90e38ee"}, + {file = "coverage-5.2-cp35-cp35m-macosx_10_13_x86_64.whl", hash = "sha256:d54d7ea74cc00482a2410d63bf10aa34ebe1c49ac50779652106c867f9986d6b"}, + {file = "coverage-5.2-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:87bdc8135b8ee739840eee19b184804e5d57f518578ffc797f5afa2c3c297913"}, + {file = "coverage-5.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:ed9a21502e9223f563e071759f769c3d6a2e1ba5328c31e86830368e8d78bc9c"}, + {file = "coverage-5.2-cp35-cp35m-win32.whl", hash = "sha256:509294f3e76d3f26b35083973fbc952e01e1727656d979b11182f273f08aa80b"}, + {file = "coverage-5.2-cp35-cp35m-win_amd64.whl", hash = "sha256:ca63dae130a2e788f2b249200f01d7fa240f24da0596501d387a50e57aa7075e"}, + {file = "coverage-5.2-cp36-cp36m-macosx_10_13_x86_64.whl", hash = "sha256:5c74c5b6045969b07c9fb36b665c9cac84d6c174a809fc1b21bdc06c7836d9a0"}, + {file = "coverage-5.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:c32aa13cc3fe86b0f744dfe35a7f879ee33ac0a560684fef0f3e1580352b818f"}, + {file = "coverage-5.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:1e58fca3d9ec1a423f1b7f2aa34af4f733cbfa9020c8fe39ca451b6071237405"}, + {file = "coverage-5.2-cp36-cp36m-win32.whl", hash = "sha256:3b2c34690f613525672697910894b60d15800ac7e779fbd0fccf532486c1ba40"}, + {file = "coverage-5.2-cp36-cp36m-win_amd64.whl", hash = "sha256:a4d511012beb967a39580ba7d2549edf1e6865a33e5fe51e4dce550522b3ac0e"}, + {file = "coverage-5.2-cp37-cp37m-macosx_10_13_x86_64.whl", hash = "sha256:32ecee61a43be509b91a526819717d5e5650e009a8d5eda8631a59c721d5f3b6"}, + {file = "coverage-5.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:6f91b4492c5cde83bfe462f5b2b997cdf96a138f7c58b1140f05de5751623cf1"}, + {file = "coverage-5.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:bfcc811883699ed49afc58b1ed9f80428a18eb9166422bce3c31a53dba00fd1d"}, + {file = "coverage-5.2-cp37-cp37m-win32.whl", hash = "sha256:60a3d36297b65c7f78329b80120f72947140f45b5c7a017ea730f9112b40f2ec"}, + {file = "coverage-5.2-cp37-cp37m-win_amd64.whl", hash = "sha256:12eaccd86d9a373aea59869bc9cfa0ab6ba8b1477752110cb4c10d165474f703"}, + {file = "coverage-5.2-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:d82db1b9a92cb5c67661ca6616bdca6ff931deceebb98eecbd328812dab52032"}, + {file = "coverage-5.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:214eb2110217f2636a9329bc766507ab71a3a06a8ea30cdeebb47c24dce5972d"}, + {file = "coverage-5.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:8a3decd12e7934d0254939e2bf434bf04a5890c5bf91a982685021786a08087e"}, + {file = "coverage-5.2-cp38-cp38-win32.whl", hash = "sha256:1dcebae667b73fd4aa69237e6afb39abc2f27520f2358590c1b13dd90e32abe7"}, + {file = "coverage-5.2-cp38-cp38-win_amd64.whl", hash = "sha256:f50632ef2d749f541ca8e6c07c9928a37f87505ce3a9f20c8446ad310f1aa87b"}, + {file = "coverage-5.2-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:7403675df5e27745571aba1c957c7da2dacb537c21e14007ec3a417bf31f7f3d"}, + {file = "coverage-5.2-cp39-cp39-manylinux1_i686.whl", hash = "sha256:0fc4e0d91350d6f43ef6a61f64a48e917637e1dcfcba4b4b7d543c628ef82c2d"}, + {file = "coverage-5.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:25fe74b5b2f1b4abb11e103bb7984daca8f8292683957d0738cd692f6a7cc64c"}, + {file = "coverage-5.2-cp39-cp39-win32.whl", hash = "sha256:d67599521dff98ec8c34cd9652cbcfe16ed076a2209625fca9dc7419b6370e5c"}, + {file = "coverage-5.2-cp39-cp39-win_amd64.whl", hash = "sha256:10f2a618a6e75adf64329f828a6a5b40244c1c50f5ef4ce4109e904e69c71bd2"}, + {file = "coverage-5.2.tar.gz", hash = "sha256:1874bdc943654ba46d28f179c1846f5710eda3aeb265ff029e0ac2b52daae404"}, ] cryptography = [ {file = "cryptography-2.9.2-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:daf54a4b07d67ad437ff239c8a4080cfd1cc7213df57d33c97de7b4738048d5e"}, @@ -1718,16 +1723,16 @@ dcicpyvcf = [ {file = "dcicpyvcf-1.0.0.tar.gz", hash = "sha256:c5bf8d585002ab3b95d13a47803376b456b931865e4189c38a18cca47b108449"}, ] dcicsnovault = [ - {file = "dcicsnovault-3.1.1-py3-none-any.whl", hash = "sha256:0baf3b4d917969a943c6be69ed29de90d0aa4afc2efd8fc71783551ec13e6e3b"}, - {file = "dcicsnovault-3.1.1.tar.gz", hash = "sha256:6f41cea9c1cc5f86a1324da3923f6aa461626070419d514cb856b1e08e80dc30"}, + {file = "dcicsnovault-3.1.4-py3-none-any.whl", hash = "sha256:3ad78b95255f4a409fb7e29d1933fee113b03c2a6abf65e62b821a0e6ac1666e"}, + {file = "dcicsnovault-3.1.4.tar.gz", hash = "sha256:5efc2ea37d0fc78411817925d63c383e322680f264629c2060533a50721a9bd4"}, ] dcicutils = [ {file = "dcicutils-0.31.1-py3-none-any.whl", hash = "sha256:9f2728d6ce4fe9b7a0cabdf5a46a20e34039147192854622881c9a46c3707fd1"}, {file = "dcicutils-0.31.1.tar.gz", hash = "sha256:d64569962ef43d3091f9d3cb0e81312ee1fc9e529e23d699fd2874429b043686"}, ] docker = [ - {file = "docker-4.2.1-py2.py3-none-any.whl", hash = "sha256:672f51aead26d90d1cfce84a87e6f71fca401bbc2a6287be18603583620a28ba"}, - {file = "docker-4.2.1.tar.gz", hash = "sha256:380a20d38fbfaa872e96ee4d0d23ad9beb0f9ed57ff1c30653cbeb0c9c0964f2"}, + {file = "docker-4.2.2-py2.py3-none-any.whl", hash = "sha256:03a46400c4080cb6f7aa997f881ddd84fef855499ece219d75fbdb53289c17ab"}, + {file = "docker-4.2.2.tar.gz", hash = "sha256:26eebadce7e298f55b76a88c4f8802476c5eaddbdbe38dbc6cce8781c47c9b54"}, ] docutils = [ {file = "docutils-0.12-py3-none-any.whl", hash = "sha256:dcebd4928112631626f4c4d0df59787c748404e66dda952110030ea883d3b8cd"}, @@ -1788,12 +1793,12 @@ idna = [ {file = "idna-2.7.tar.gz", hash = "sha256:684a38a6f903c1d71d6d5fac066b58d7768af4de2b832e426ec79c30daa94a16"}, ] importlib-metadata = [ - {file = "importlib_metadata-1.6.1-py2.py3-none-any.whl", hash = "sha256:15ec6c0fd909e893e3a08b3a7c76ecb149122fb14b7efe1199ddd4c7c57ea958"}, - {file = "importlib_metadata-1.6.1.tar.gz", hash = "sha256:0505dd08068cfec00f53a74a0ad927676d7757da81b7436a6eefe4c7cf75c545"}, + {file = "importlib_metadata-1.7.0-py2.py3-none-any.whl", hash = "sha256:dc15b2969b4ce36305c51eebe62d418ac7791e9a157911d58bfb1f9ccd8e2070"}, + {file = "importlib_metadata-1.7.0.tar.gz", hash = "sha256:90bb658cdbbf6d1735b6341ce708fc7024a3e14e99ffdc5783edea9f9b077f83"}, ] importlib-resources = [ - {file = "importlib_resources-2.0.1-py2.py3-none-any.whl", hash = "sha256:83985739b3a6679702f9ab33f0ad016ad564664d0568a31ac14d7c64789453e6"}, - {file = "importlib_resources-2.0.1.tar.gz", hash = "sha256:f5edfcece1cc9435d0979c19e08739521f4cf1aa1adaf6e571f732df6f568962"}, + {file = "importlib_resources-3.0.0-py2.py3-none-any.whl", hash = "sha256:d028f66b66c0d5732dae86ba4276999855e162a749c92620a38c1d779ed138a7"}, + {file = "importlib_resources-3.0.0.tar.gz", hash = "sha256:19f745a6eca188b490b1428c8d1d4a0d2368759f32370ea8fb89cad2ab1106c3"}, ] isodate = [ {file = "isodate-0.5.4.tar.gz", hash = "sha256:42105c41d037246dc1987e36d96f3752ffd5c0c24834dd12e4fdbe1e79544e31"}, @@ -1843,8 +1848,8 @@ moto = [ {file = "moto-1.3.7.tar.gz", hash = "sha256:129de2e04cb250d9f8b2c722ec152ed1b5426ef179b4ebb03e9ec36e6eb3fcc5"}, ] netaddr = [ - {file = "netaddr-0.7.20-py2.py3-none-any.whl", hash = "sha256:7a9c8f58d048b820df1882439bb04fb2de13c03ec8af3112a1099822b0a2a4b8"}, - {file = "netaddr-0.7.20.tar.gz", hash = "sha256:d09252e5aec3913815d77eb8e8ea8fa6eb33521253e52f977f6abaa964776f3e"}, + {file = "netaddr-0.8.0-py2.py3-none-any.whl", hash = "sha256:9666d0232c32d2656e5e5f8d735f58fd6c7457ce52fc21c98d45f2af78f990ac"}, + {file = "netaddr-0.8.0.tar.gz", hash = "sha256:d6cc57c7a07b1d9d2e917aa8b36ae8ce61c35ba3fcd1b83ca31c5a0ee2b5a243"}, ] passlib = [ {file = "passlib-1.6.5-py2.py3-none-any.whl", hash = "sha256:ad631a58dc8abeb0f48016c13f4b3b0f3a7b1045a8cb3c61dd15e2d95b45c472"}, @@ -2247,8 +2252,8 @@ toml = [ {file = "toml-0.10.1.tar.gz", hash = "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f"}, ] tqdm = [ - {file = "tqdm-4.46.1-py2.py3-none-any.whl", hash = "sha256:07c06493f1403c1380b630ae3dcbe5ae62abcf369a93bbc052502279f189ab8c"}, - {file = "tqdm-4.46.1.tar.gz", hash = "sha256:cd140979c2bebd2311dfb14781d8f19bd5a9debb92dcab9f6ef899c987fcf71f"}, + {file = "tqdm-4.47.0-py2.py3-none-any.whl", hash = "sha256:7810e627bcf9d983a99d9ff8a0c09674400fd2927eddabeadf153c14a2ec8656"}, + {file = "tqdm-4.47.0.tar.gz", hash = "sha256:63ef7a6d3eb39f80d6b36e4867566b3d8e5f1fe3d6cb50c5e9ede2b3198ba7b7"}, ] transaction = [ {file = "transaction-2.4.0-py2.py3-none-any.whl", hash = "sha256:b96a5e9aaa73f905759bc9ccf0021bf4864c01ac36666e0d28395e871f6d584a"}, From 702465fd80a05cb64e165b43f7fec7096021c781 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Fri, 10 Jul 2020 01:57:23 -0400 Subject: [PATCH 54/54] Move 'pip' depedencies from .travis.yml to pyproject.toml. --- .travis.yml | 2 -- poetry.lock | 58 ++++++++++++++++++++++++++++++++++++++++++++++---- pyproject.toml | 6 ++++-- 3 files changed, 58 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index 99a5f65774..fe534e18ee 100644 --- a/.travis.yml +++ b/.travis.yml @@ -50,8 +50,6 @@ before_install: install: - pip install --upgrade pip - pip install poetry -- pip install coveralls -- pip install codacy-coverage - poetry install - make npm-setup before_script: diff --git a/poetry.lock b/poetry.lock index 2209b907bb..deaae037c6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -165,6 +165,21 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" version = "7.1.2" +[[package]] +category = "dev" +description = "Codacy coverage reporter for Python" +name = "codacy-coverage" +optional = false +python-versions = "*" +version = "1.3.11" + +[package.dependencies] +requests = ">=2.9.1" + +[package.extras] +dev = ["check-manifest"] +test = ["coverage", "nosetests"] + [[package]] category = "main" description = "Cross-platform colored terminal text." @@ -184,6 +199,22 @@ version = "5.2" [package.extras] toml = ["toml"] +[[package]] +category = "dev" +description = "Show coverage stats online via coveralls.io" +name = "coveralls" +optional = false +python-versions = ">= 3.5" +version = "2.1.1" + +[package.dependencies] +coverage = ">=4.1,<6.0" +docopt = ">=0.6.1" +requests = ">=1.0.0" + +[package.extras] +yaml = ["PyYAML (>=3.10)"] + [[package]] category = "dev" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." @@ -279,7 +310,7 @@ description = "Utility package for interacting with the 4DN Data Portal and othe name = "dcicutils" optional = false python-versions = ">=3.4,<3.8" -version = "0.31.1" +version = "0.32.2" [package.dependencies] aws-requests-auth = ">=0.4.2,<1" @@ -315,6 +346,14 @@ version = "223" ssh = ["paramiko (>=2.4.2)"] tls = ["pyOpenSSL (>=17.5.0)", "cryptography (>=1.3.4)", "idna (>=2.0.0)"] +[[package]] +category = "dev" +description = "Pythonic argument parser, that will make you smile" +name = "docopt" +optional = false +python-versions = "*" +version = "0.6.2" + [[package]] category = "main" description = "Docutils -- Python Documentation Utilities" @@ -1572,7 +1611,7 @@ transaction = ">=1.6.0" test = ["zope.testing"] [metadata] -content-hash = "c352edda07be648af79f61f67dd99db3d16e1bdcd53e559f3e59a5904ca7cc7a" +content-hash = "c98233211908adc53b5166ef693ff6c53db02b8b736c1e6610e93b3bc717b04b" python-versions = ">=3.6,<3.7" [metadata.files] @@ -1659,6 +1698,10 @@ click = [ {file = "click-7.1.2-py2.py3-none-any.whl", hash = "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"}, {file = "click-7.1.2.tar.gz", hash = "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a"}, ] +codacy-coverage = [ + {file = "codacy-coverage-1.3.11.tar.gz", hash = "sha256:b94651934745c638a980ad8d67494077e60f71e19e29aad1c275b66e0a070cbc"}, + {file = "codacy_coverage-1.3.11-py2.py3-none-any.whl", hash = "sha256:d8a1ce56b0dd156d6b1de14fa6217d32ec86097902f08a17ff2f95ba27264474"}, +] colorama = [ {file = "colorama-0.3.3.tar.gz", hash = "sha256:eb21f2ba718fbf357afdfdf6f641ab393901c7ca8d9f37edd0bee4806ffa269c"}, ] @@ -1698,6 +1741,10 @@ coverage = [ {file = "coverage-5.2-cp39-cp39-win_amd64.whl", hash = "sha256:10f2a618a6e75adf64329f828a6a5b40244c1c50f5ef4ce4109e904e69c71bd2"}, {file = "coverage-5.2.tar.gz", hash = "sha256:1874bdc943654ba46d28f179c1846f5710eda3aeb265ff029e0ac2b52daae404"}, ] +coveralls = [ + {file = "coveralls-2.1.1-py2.py3-none-any.whl", hash = "sha256:3726d35c0f93a28631a003880e2aa6cc93c401d62bc6919c5cb497217ba30c55"}, + {file = "coveralls-2.1.1.tar.gz", hash = "sha256:afe359cd5b350e1b3895372bda32af8f0260638c7c4a31a5c0f15aa6a96f40d9"}, +] cryptography = [ {file = "cryptography-2.9.2-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:daf54a4b07d67ad437ff239c8a4080cfd1cc7213df57d33c97de7b4738048d5e"}, {file = "cryptography-2.9.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:3b3eba865ea2754738616f87292b7f29448aec342a7c720956f8083d252bf28b"}, @@ -1727,13 +1774,16 @@ dcicsnovault = [ {file = "dcicsnovault-3.1.4.tar.gz", hash = "sha256:5efc2ea37d0fc78411817925d63c383e322680f264629c2060533a50721a9bd4"}, ] dcicutils = [ - {file = "dcicutils-0.31.1-py3-none-any.whl", hash = "sha256:9f2728d6ce4fe9b7a0cabdf5a46a20e34039147192854622881c9a46c3707fd1"}, - {file = "dcicutils-0.31.1.tar.gz", hash = "sha256:d64569962ef43d3091f9d3cb0e81312ee1fc9e529e23d699fd2874429b043686"}, + {file = "dcicutils-0.32.2-py3-none-any.whl", hash = "sha256:7403d422a12160162a9691aff2af04f6f37869fc40252f2f61cd92a10076a4b2"}, + {file = "dcicutils-0.32.2.tar.gz", hash = "sha256:888feae7870294fe12979fbe567b653874273d149bcc31ecef5817300b01c0f1"}, ] docker = [ {file = "docker-4.2.2-py2.py3-none-any.whl", hash = "sha256:03a46400c4080cb6f7aa997f881ddd84fef855499ece219d75fbdb53289c17ab"}, {file = "docker-4.2.2.tar.gz", hash = "sha256:26eebadce7e298f55b76a88c4f8802476c5eaddbdbe38dbc6cce8781c47c9b54"}, ] +docopt = [ + {file = "docopt-0.6.2.tar.gz", hash = "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491"}, +] docutils = [ {file = "docutils-0.12-py3-none-any.whl", hash = "sha256:dcebd4928112631626f4c4d0df59787c748404e66dda952110030ea883d3b8cd"}, {file = "docutils-0.12.tar.gz", hash = "sha256:c7db717810ab6965f66c8cf0398a98c9d8df982da39b4cd7f162911eb89596fa"}, diff --git a/pyproject.toml b/pyproject.toml index 5f81bf8ea0..f792af5b14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] # Note: Various modules refer to this system as "encoded", not "cgap-portal". name = "encoded" -version = "2.1.9" +version = "2.1.10" description = "Clinical Genomics Analysis Platform" authors = ["4DN-DCIC Team "] license = "MIT" @@ -115,7 +115,9 @@ xlwt = "1.2.0" "zope.sqlalchemy" = "^1.2" [tool.poetry.dev-dependencies] -coverage = ">=5.1" +coverage = ">=5.2" +codacy-coverage = ">=1.3.11" +coveralls = ">=2.1.1" # flake8 = "3.7.8" flaky = "3.6.1" # flask only for moto[server]