Skip to content

Commit

Permalink
renamed value set and viable values to specification
Browse files Browse the repository at this point in the history
  • Loading branch information
frehburg committed Oct 8, 2024
1 parent b5e2bdc commit 6c48933
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 29 deletions.
44 changes: 27 additions & 17 deletions src/phenopacket_mapper/data_standards/data_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,49 +44,54 @@ class DataField:
If the `value_set` is a single type, it can be passed directly as the `value_set` parameter.
e.g.:
>>> DataField(name="Field 1", viable_values=int)
>>> DataField(name="Field 1", specification=int)
DataField(name='Field 1', value_set=ValueSet(elements=[<class 'int'>], name='', description=''), id='field_1', description='', section='', required=True, specification='', ordinal='')
:ivar name: Name of the field
:ivar viable_values: Value set of the field, if the value set is only one type, can also pass that type directly
:ivar specification: Value set of the field, if the value set is only one type, can also pass that type directly
:ivar id: The identifier of the field, adhering to the naming rules stated above
:ivar description: Description of the field
:ivar section: Section of the field (Only applicable if the data model is divided into sections)
:ivar required: Required flag of the field
:ivar specification: Text specification of the field (a description of the value set and field)
:ivar ordinal: Ordinal of the field (E.g. 1.1, 1.2, 2.1, etc.)
"""
name: str = field()
viable_values: Union[ValueSet, type, List[type]] = field()
specification: Union[ValueSet, type, List[type]] = field()
id: str = field(default=None)
description: str = field(default='')
section: str = field(default='')
required: bool = field(default=True)
specification: str = field(default='')
ordinal: str = field(default='')

def __post_init__(self):
if not self.id:
from phenopacket_mapper.utils import str_to_valid_id
object.__setattr__(self, 'id', str_to_valid_id(self.name))

if isinstance(self.viable_values, type):
object.__setattr__(self, 'viable_values', ValueSet(elements=[self.viable_values]))
if isinstance(self.viable_values, list):
if all(isinstance(e, type) for e in self.viable_values):
object.__setattr__(self, 'viable_values', ValueSet(elements=self.viable_values))
if isinstance(self.specification, type):
object.__setattr__(self, 'specification', ValueSet(elements=[self.specification]))
if isinstance(self.specification, list):
if all(isinstance(e, type) for e in self.specification):
object.__setattr__(self, 'specification', ValueSet(elements=self.specification))

def __str__(self):
ret = "DataField(\n"
ret += f"\t\tid: {self.id},\n"
ret += f"\t\tsection: {self.section},\n"
ret += f"\t\tordinal, name: ({self.ordinal}, {self.name}),\n"
ret += f"\t\tvalue_set: {self.viable_values}, required: {self.required},\n"
ret += f"\t\tvalue_set: {self.specification}, required: {self.required},\n"
ret += f"\t\tspecification: {self.specification}\n"
ret += "\t)"
return ret


def __eq__(self, other):
if not isinstance(other, DataField):
return False
return (self.id == other.id and self.specification == other.specification
and self.required == other.required)


@dataclass(slots=True)
class DataFieldValue:
"""This class defines the value of a `DataField` in a `DataModelInstance`
Expand All @@ -112,13 +117,13 @@ def validate(self) -> bool:
if self.field.required and self.value is None: # no value
warnings.warn(f"Field {self.field.name} is required but has no value")
return False
elif self.value is not None and self.field.viable_values:
if Any in self.field.viable_values: # value set allows any
elif self.value is not None and self.field.specification:
if Any in self.field.specification: # value set allows any
return True
elif self.value in self.field.viable_values: # raw value (likely a primitive) is in the value set
elif self.value in self.field.specification: # raw value (likely a primitive) is in the value set
return True
else: # check if the value matches one of the types in the value set
for e in self.field.viable_values:
for e in self.field.specification:
if isinstance(e, type):
cur_type = e
if cur_type is type(self.value):
Expand All @@ -145,7 +150,7 @@ class DataModel:
be accessed using the `id` as an attribute of the `DataModel` object. E.g.: `data_model.date_of_birth`. This is
useful in the data reading and mapping processes.
>>> data_model = DataModel("Test data model", (DataField(name="Field 1", viable_values=ValueSet()),))
>>> data_model = DataModel("Test data model", (DataField(name="Field 1", specification=ValueSet()),))
>>> data_model.field_1
DataField(name='Field 1', value_set=ValueSet(elements=[], name='', description=''), id='field_1', description='', section='', required=True, specification='', ordinal='')
Expand Down Expand Up @@ -248,7 +253,7 @@ def from_file(
DataField.name.__name__: 'data_field_name',
DataField.section.__name__: 'data_model_section',
DataField.description.__name__: 'description',
DataField.viable_values.__name__: 'value_set',
DataField.specification.__name__: 'value_set',
DataField.required.__name__: 'required',
DataField.specification.__name__: 'specification',
DataField.ordinal.__name__: 'ordinal'
Expand Down Expand Up @@ -477,3 +482,8 @@ def head(self, n: int = 5):
return self.data_frame.head(n)
else:
warnings.warn("No data frame object available for this dataset")


if __name__ == "__main__":
df = DataField(name="Field 1", specification=int)
print(df.specification == ValueSet([int]))
12 changes: 4 additions & 8 deletions src/phenopacket_mapper/pipeline/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,8 @@ def read_data_model(
DataField.name.__name__: 'data_field_name',
DataField.section.__name__: 'data_model_section',
DataField.description.__name__: 'description',
DataField.viable_values.__name__: 'value_set',
DataField.specification.__name__: 'value_set',
DataField.required.__name__: 'required',
DataField.specification.__name__: 'specification',
DataField.ordinal.__name__: 'ordinal'
}),
parse_value_sets: bool = False,
Expand Down Expand Up @@ -98,23 +97,21 @@ def remove_line_breaks_if_not_none(value):
for i in range(len(df)):
data_field_name = loc_default(df, row_index=i, column_name=column_names.get(DataField.name.__name__, ''))
section = loc_default(df, row_index=i, column_name=column_names.get(DataField.section.__name__, ''))
value_set = loc_default(df, row_index=i, column_name=column_names.get(DataField.viable_values.__name__, ''))
value_set = loc_default(df, row_index=i, column_name=column_names.get(DataField.specification.__name__, ''))
description = loc_default(df, row_index=i, column_name=column_names.get(DataField.description.__name__, ''))
required = bool(loc_default(df, row_index=i, column_name=column_names.get(DataField.required.__name__, '')))
specification = loc_default(df, row_index=i, column_name=column_names.get(DataField.specification.__name__, ''))
ordinal = loc_default(df, row_index=i, column_name=column_names.get(DataField.ordinal.__name__, ''))

if remove_line_breaks:
data_field_name = remove_line_breaks_if_not_none(data_field_name)
section = remove_line_breaks_if_not_none(section)
description = remove_line_breaks_if_not_none(description)
specification = remove_line_breaks_if_not_none(specification)

if parse_ordinals:
ordinal, data_field_name = parse_ordinal(data_field_name)

if parse_value_sets:
if not column_names.get(DataField.viable_values.__name__, ''):
if not column_names.get(DataField.specification.__name__, ''):
raise ValueError("Value set column name must be provided to parse value sets.")

value_set = parsing.parse_value_set(
Expand All @@ -127,10 +124,9 @@ def remove_line_breaks_if_not_none(value):
DataField(
name=data_field_name,
section=section,
viable_values=value_set,
specification=value_set,
description=description,
required=required,
specification=specification,
ordinal=ordinal
),
)
Expand Down
2 changes: 1 addition & 1 deletion tests/data_standards/data_models/test_data_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ def name():
(ValueSet([str, int]), ValueSet([str, int])),
])
def test_data_field_constructor(name, viable_values, expected):
assert DataField(name=name, viable_values=viable_values).viable_values.elements == expected.elements
assert DataField(name=name, specification=viable_values).specification.elements == expected.elements
6 changes: 3 additions & 3 deletions tests/data_standards/data_models/test_data_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
@pytest.fixture
def data_model():
return DataModel(resources=[], data_model_name='test_data_model', fields=(
DataField(name='Field 0', viable_values=ValueSet()),
DataField(name='Date of Birth', viable_values=ValueSet()),
DataField(name='%^&#12pseudonym!2', viable_values=ValueSet()),
DataField(name='Field 0', specification=ValueSet()),
DataField(name='Date of Birth', specification=ValueSet()),
DataField(name='%^&#12pseudonym!2', specification=ValueSet()),
))


Expand Down

0 comments on commit 6c48933

Please sign in to comment.