Skip to content

Commit 8b934fe

Browse files
authored
Merge pull request #163 from BIH-CEI/121-datafield-constructor-change-value_set
121 datafield constructor change value set
2 parents 5263a6b + 74e7997 commit 8b934fe

File tree

6 files changed

+66
-40
lines changed

6 files changed

+66
-40
lines changed

notebooks/erdri_cds_definition_in_code.ipynb

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -522,37 +522,37 @@
522522
" fields=(\n",
523523
" # 1. Pseudonym\n",
524524
" # 1.1. Pseudonym\n",
525-
" DataField(section=\"1. Pseudonym\", ordinal=\"1.1\", name=\"Pseudonym\", value_set=vs_1_1, required=True),\n",
525+
" DataField(section=\"1. Pseudonym\", ordinal=\"1.1\", name=\"Pseudonym\", viable_values=vs_1_1, required=True),\n",
526526
"\n",
527527
" # 2. Personal information\n",
528528
" # 2.1. Date of Birth\n",
529-
" DataField(section=\"2. Personal information\", ordinal=\"2.1\", name=\"Date of Birth\", value_set=vs_2_1, required=True),\n",
529+
" DataField(section=\"2. Personal information\", ordinal=\"2.1\", name=\"Date of Birth\", viable_values=vs_2_1, required=True),\n",
530530
" # 2.2. Sex\n",
531-
" DataField(section=\"2. Personal information\", ordinal=\"2.2\", name=\"Sex\", value_set=vs_2_2, required=True),\n",
531+
" DataField(section=\"2. Personal information\", ordinal=\"2.2\", name=\"Sex\", viable_values=vs_2_2, required=True),\n",
532532
" \n",
533533
" # 3. Patient Status\n",
534534
" # 3.1. Patient's status\n",
535-
" DataField(section=\"3. Patient Status\", ordinal=\"3.1\", name=\"Patient's status\", value_set=vs_3_1, required=True),\n",
535+
" DataField(section=\"3. Patient Status\", ordinal=\"3.1\", name=\"Patient's status\", viable_values=vs_3_1, required=True),\n",
536536
" # 3.2. Date of death\n",
537-
" DataField(section=\"3. Patient Status\", ordinal=\"3.2\", name=\"Date of death\", value_set=vs_3_2, required=False),\n",
537+
" DataField(section=\"3. Patient Status\", ordinal=\"3.2\", name=\"Date of death\", viable_values=vs_3_2, required=False),\n",
538538
" \n",
539539
" # 4. Care Pathway\n",
540540
" # 4.1. First contact with specialised centre\n",
541-
" DataField(section=\"4. Care Pathway\", ordinal=\"4.1\", name=\"First contact with specialised centre\", value_set=vs_4_1),\n",
541+
" DataField(section=\"4. Care Pathway\", ordinal=\"4.1\", name=\"First contact with specialised centre\", viable_values=vs_4_1),\n",
542542
" \n",
543543
" # 5. Disease history\n",
544544
" # 5.1. Age at onset\n",
545-
" DataField(section=\"5. Disease history\", ordinal=\"5.1\", name=\"Age at onset\", value_set=vs_5_1),\n",
545+
" DataField(section=\"5. Disease history\", ordinal=\"5.1\", name=\"Age at onset\", viable_values=vs_5_1),\n",
546546
" # 5.2. Age at diagnosis\n",
547-
" DataField(section=\"5. Disease history\", ordinal=\"5.2\", name=\"Age at diagnosis\", value_set=vs_5_2),\n",
547+
" DataField(section=\"5. Disease history\", ordinal=\"5.2\", name=\"Age at diagnosis\", viable_values=vs_5_2),\n",
548548
" \n",
549549
" # 6. Diagnosis\n",
550550
" # 6.1. Diagnosis of the rare disease\n",
551-
" DataField(section=\"6. Diagnosis\", ordinal=\"6.1\", name=\"Diagnosis of the rare disease\", value_set=vs_6_1),\n",
551+
" DataField(section=\"6. Diagnosis\", ordinal=\"6.1\", name=\"Diagnosis of the rare disease\", viable_values=vs_6_1),\n",
552552
" # 6.2. Genetic diagnosis\n",
553-
" DataField(section=\"6. Diagnosis\", ordinal=\"6.2\", name=\"Genetic diagnosis\", value_set=vs_6_2),\n",
553+
" DataField(section=\"6. Diagnosis\", ordinal=\"6.2\", name=\"Genetic diagnosis\", viable_values=vs_6_2),\n",
554554
" # 6.3. Undiagnosed case\n",
555-
" DataField(section=\"6. Diagnosis\", ordinal=\"6.3\", name=\"Undiagnosed case\", value_set=vs_6_3),\n",
555+
" DataField(section=\"6. Diagnosis\", ordinal=\"6.3\", name=\"Undiagnosed case\", viable_values=vs_6_3),\n",
556556
" )\n",
557557
")"
558558
],

notebooks/erdri_cds_from_file.ipynb

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@
249249
" # left side: fields of DataField class, right side: names of columns in data model definition file\n",
250250
" DataField.name.__name__: 'data_field_name',\n",
251251
" DataField.section.__name__: 'data_model_section',\n",
252-
" DataField.value_set.__name__: 'data_types',\n",
252+
" DataField.viable_values.__name__: 'data_types',\n",
253253
" DataField.required.__name__: 'required',\n",
254254
" DataField.specification.__name__: 'comment',\n",
255255
" DataField.ordinal.__name__: '' # if left empty such as here, the program will try to parse the ordinal from the file or leave it empty otherwise\n",
@@ -334,7 +334,8 @@
334334
"metadata": {
335335
"collapsed": false
336336
},
337-
"id": "ced8854ebd3d48fb"
337+
"id": "ced8854ebd3d48fb",
338+
"execution_count": null
338339
}
339340
],
340341
"metadata": {

src/phenopacket_mapper/data_standards/data_model.py

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -44,46 +44,54 @@ class DataField:
4444
If the `value_set` is a single type, it can be passed directly as the `value_set` parameter.
4545
4646
e.g.:
47-
>>> DataField(name="Field 1", value_set=int)
48-
DataField(name='Field 1', value_set=ValueSet(elements=[<class 'int'>], name='', description=''), id='field_1', description='', section='', required=True, specification='', ordinal='')
47+
>>> DataField(name="Field 1", specification=int)
48+
DataField(name='Field 1', specification=ValueSet(elements=[<class 'int'>], name='', description=''), id='field_1', description='', section='', required=True, ordinal='')
4949
5050
:ivar name: Name of the field
51-
:ivar value_set: Value set of the field, if the value set is only one type, can also pass that type directly
51+
:ivar specification: Value set of the field, if the value set is only one type, can also pass that type directly
5252
:ivar id: The identifier of the field, adhering to the naming rules stated above
5353
:ivar description: Description of the field
5454
:ivar section: Section of the field (Only applicable if the data model is divided into sections)
5555
:ivar required: Required flag of the field
56-
:ivar specification: Text specification of the field (a description of the value set and field)
5756
:ivar ordinal: Ordinal of the field (E.g. 1.1, 1.2, 2.1, etc.)
5857
"""
5958
name: str = field()
60-
value_set: Union[ValueSet, type] = field()
59+
specification: Union[ValueSet, type, List[type]] = field()
6160
id: str = field(default=None)
6261
description: str = field(default='')
6362
section: str = field(default='')
6463
required: bool = field(default=True)
65-
specification: str = field(default='')
6664
ordinal: str = field(default='')
6765

6866
def __post_init__(self):
6967
if not self.id:
7068
from phenopacket_mapper.utils import str_to_valid_id
7169
object.__setattr__(self, 'id', str_to_valid_id(self.name))
7270

73-
if isinstance(self.value_set, type):
74-
object.__setattr__(self, 'value_set', ValueSet(elements=[self.value_set]))
71+
if isinstance(self.specification, type):
72+
object.__setattr__(self, 'specification', ValueSet(elements=[self.specification]))
73+
if isinstance(self.specification, list):
74+
if all(isinstance(e, type) for e in self.specification):
75+
object.__setattr__(self, 'specification', ValueSet(elements=self.specification))
7576

7677
def __str__(self):
7778
ret = "DataField(\n"
7879
ret += f"\t\tid: {self.id},\n"
7980
ret += f"\t\tsection: {self.section},\n"
8081
ret += f"\t\tordinal, name: ({self.ordinal}, {self.name}),\n"
81-
ret += f"\t\tvalue_set: {self.value_set}, required: {self.required},\n"
82+
ret += f"\t\tvalue_set: {self.specification}, required: {self.required},\n"
8283
ret += f"\t\tspecification: {self.specification}\n"
8384
ret += "\t)"
8485
return ret
8586

8687

88+
def __eq__(self, other):
89+
if not isinstance(other, DataField):
90+
return False
91+
return (self.id == other.id and self.specification == other.specification
92+
and self.required == other.required)
93+
94+
8795
@dataclass(slots=True)
8896
class DataFieldValue:
8997
"""This class defines the value of a `DataField` in a `DataModelInstance`
@@ -109,13 +117,13 @@ def validate(self) -> bool:
109117
if self.field.required and self.value is None: # no value
110118
warnings.warn(f"Field {self.field.name} is required but has no value")
111119
return False
112-
elif self.value is not None and self.field.value_set:
113-
if Any in self.field.value_set: # value set allows any
120+
elif self.value is not None and self.field.specification:
121+
if Any in self.field.specification: # value set allows any
114122
return True
115-
elif self.value in self.field.value_set: # raw value (likely a primitive) is in the value set
123+
elif self.value in self.field.specification: # raw value (likely a primitive) is in the value set
116124
return True
117125
else: # check if the value matches one of the types in the value set
118-
for e in self.field.value_set:
126+
for e in self.field.specification:
119127
if isinstance(e, type):
120128
cur_type = e
121129
if cur_type is type(self.value):
@@ -142,9 +150,9 @@ class DataModel:
142150
be accessed using the `id` as an attribute of the `DataModel` object. E.g.: `data_model.date_of_birth`. This is
143151
useful in the data reading and mapping processes.
144152
145-
>>> data_model = DataModel("Test data model", (DataField(name="Field 1", value_set=ValueSet()),))
153+
>>> data_model = DataModel("Test data model", (DataField(name="Field 1", specification=ValueSet()),))
146154
>>> data_model.field_1
147-
DataField(name='Field 1', value_set=ValueSet(elements=[], name='', description=''), id='field_1', description='', section='', required=True, specification='', ordinal='')
155+
DataField(name='Field 1', specification=ValueSet(elements=[], name='', description=''), id='field_1', description='', section='', required=True, ordinal='')
148156
149157
:ivar data_model_name: Name of the data model
150158
:ivar fields: List of `DataField` objects
@@ -245,7 +253,7 @@ def from_file(
245253
DataField.name.__name__: 'data_field_name',
246254
DataField.section.__name__: 'data_model_section',
247255
DataField.description.__name__: 'description',
248-
DataField.value_set.__name__: 'value_set',
256+
DataField.specification.__name__: 'value_set',
249257
DataField.required.__name__: 'required',
250258
DataField.specification.__name__: 'specification',
251259
DataField.ordinal.__name__: 'ordinal'
@@ -474,3 +482,8 @@ def head(self, n: int = 5):
474482
return self.data_frame.head(n)
475483
else:
476484
warnings.warn("No data frame object available for this dataset")
485+
486+
487+
if __name__ == "__main__":
488+
df = DataField(name="Field 1", specification=int)
489+
print(df.specification == ValueSet([int]))

src/phenopacket_mapper/pipeline/input.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@ def read_data_model(
2424
DataField.name.__name__: 'data_field_name',
2525
DataField.section.__name__: 'data_model_section',
2626
DataField.description.__name__: 'description',
27-
DataField.value_set.__name__: 'value_set',
27+
DataField.specification.__name__: 'value_set',
2828
DataField.required.__name__: 'required',
29-
DataField.specification.__name__: 'specification',
3029
DataField.ordinal.__name__: 'ordinal'
3130
}),
3231
parse_value_sets: bool = False,
@@ -98,23 +97,21 @@ def remove_line_breaks_if_not_none(value):
9897
for i in range(len(df)):
9998
data_field_name = loc_default(df, row_index=i, column_name=column_names.get(DataField.name.__name__, ''))
10099
section = loc_default(df, row_index=i, column_name=column_names.get(DataField.section.__name__, ''))
101-
value_set = loc_default(df, row_index=i, column_name=column_names.get(DataField.value_set.__name__, ''))
100+
value_set = loc_default(df, row_index=i, column_name=column_names.get(DataField.specification.__name__, ''))
102101
description = loc_default(df, row_index=i, column_name=column_names.get(DataField.description.__name__, ''))
103102
required = bool(loc_default(df, row_index=i, column_name=column_names.get(DataField.required.__name__, '')))
104-
specification = loc_default(df, row_index=i, column_name=column_names.get(DataField.specification.__name__, ''))
105103
ordinal = loc_default(df, row_index=i, column_name=column_names.get(DataField.ordinal.__name__, ''))
106104

107105
if remove_line_breaks:
108106
data_field_name = remove_line_breaks_if_not_none(data_field_name)
109107
section = remove_line_breaks_if_not_none(section)
110108
description = remove_line_breaks_if_not_none(description)
111-
specification = remove_line_breaks_if_not_none(specification)
112109

113110
if parse_ordinals:
114111
ordinal, data_field_name = parse_ordinal(data_field_name)
115112

116113
if parse_value_sets:
117-
if not column_names.get(DataField.value_set.__name__, ''):
114+
if not column_names.get(DataField.specification.__name__, ''):
118115
raise ValueError("Value set column name must be provided to parse value sets.")
119116

120117
value_set = parsing.parse_value_set(
@@ -127,10 +124,9 @@ def remove_line_breaks_if_not_none(value):
127124
DataField(
128125
name=data_field_name,
129126
section=section,
130-
value_set=value_set,
127+
specification=value_set,
131128
description=description,
132129
required=required,
133-
specification=specification,
134130
ordinal=ordinal
135131
),
136132
)
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import pytest
2+
3+
from phenopacket_mapper.data_standards import ValueSet
4+
from phenopacket_mapper.data_standards import DataField
5+
6+
@pytest.fixture
7+
def name():
8+
return "name"
9+
10+
@pytest.mark.parametrize("viable_values, expected", [
11+
(str, ValueSet([str])),
12+
([str, int], ValueSet([str, int])),
13+
(ValueSet([str, int]), ValueSet([str, int])),
14+
])
15+
def test_data_field_constructor(name, viable_values, expected):
16+
assert DataField(name=name, specification=viable_values).specification.elements == expected.elements

tests/data_standards/data_models/test_data_model.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
@pytest.fixture
88
def data_model():
99
return DataModel(resources=[], data_model_name='test_data_model', fields=(
10-
DataField(name='Field 0', value_set=ValueSet()),
11-
DataField(name='Date of Birth', value_set=ValueSet()),
12-
DataField(name='%^&#12pseudonym!2', value_set=ValueSet()),
10+
DataField(name='Field 0', specification=ValueSet()),
11+
DataField(name='Date of Birth', specification=ValueSet()),
12+
DataField(name='%^&#12pseudonym!2', specification=ValueSet()),
1313
))
1414

1515

0 commit comments

Comments
 (0)