Skip to content

Commit 0071e5f

Browse files
authored
Merge pull request #41 from FHIR-Aggregator/fix/upgrade
upgraded fhir.resources version closes #28
2 parents 472c9d8 + 3db0471 commit 0071e5f

13 files changed

+1661
-520
lines changed

README.md

Lines changed: 22 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -26,57 +26,46 @@ To run the transformer, ensure that [CDA](https://cda.readthedocs.io/en/latest/)
2626
Usage: cda2fhir transform [OPTIONS]
2727
2828
Options:
29-
-s, --save Save FHIR ndjson to CDA2FHIR/data/META folder.
30-
[default: True]
29+
-s, --save Save FHIR ndjson to CDA2FHIR/data/META folder.
30+
[default: True]
3131
-v, --verbose
32-
-ns, --n_samples TEXT Number of samples to randomly select - max 100.
33-
-nd, --n_diagnosis TEXT Number of diagnosis to randomly select - max 100.
34-
-nf, --n_files TEXT Number of files to randomly select - max 100.
35-
-f, --transform_files Transform CDA files to FHIR DocumentReference and Group.
36-
-p, --path TEXT Path to save the FHIR NDJSON files. default is
37-
CDA2FHIR/data/META.
38-
--help Show this message and exit.
32+
-ns, --n_samples TEXT Number of samples to randomly select - max 100.
33+
-nd, --n_diagnosis TEXT Number of diagnosis to randomly select - max 100.
34+
-nf, --n_files TEXT Number of files to randomly select - max 100.
35+
-f, --transform_files Transform CDA files to FHIR DocumentReference and
36+
Group.
37+
-t, --transform_treatment Transform CDA treatment to all sub-hierarchy of
38+
FHIR MedicationAdministration ->
39+
SubstanceDefinitionRepresentation.
40+
-c, --transform_condition Transform CDA disease to Condition
41+
-m, --transform_mutation Transform CDA mutation to Observation
42+
-p, --path TEXT Path to save the FHIR NDJSON files. default is
43+
CDA2FHIR/data/META.
44+
--help Show this message and exit.
3945
```
4046

4147
- example
4248
```
4349
cda2fhir transform
4450
```
4551

46-
NOTE: in-case of interest in validating your FHIR data with GEN3, you will need to go through the [user-guide, setup, and documentation of GEN3 tracker](https://aced-idp.github.io/requirements/) before running the ```cda2fhir``` commands.
47-
4852
### FHIR data validation
4953

50-
#### disable gen3-client
51-
```
52-
mv ~/.gen3/gen3_client_config.ini ~/.gen3/gen3_client_config.ini-xxx
53-
mv ~/.gen3/gen3-client ~/.gen3/gen3-client-xxx
54-
```
55-
5654
#### Run validate
5755
```
58-
time cda2fhir validate
59-
{'summary': {'Specimen': 721837, 'Observation': 731005, 'ResearchStudy': 423, 'BodyStructure': 163, 'Condition': 95262, 'ResearchSubject': 160649, 'Patient': 138738}}
60-
61-
real 5m
62-
user 5m
63-
sys 0m5.1s
64-
56+
cda2fhir validate --path data/META
57+
{'summary': {'Specimen': 742505, 'Medication': 214, 'Observation': 832864, 'ResearchStudy': 429, 'SubstanceDefinition': 214, 'BodyStructure': 135, 'Condition': 114804, 'ResearchSubject': 184888, 'MedicationAdministration': 38267, 'Patient': 159047, 'Substance': 214}}
6558
```
6659

67-
#### Restore gen3-client
68-
69-
```
70-
mv ~/.gen3/gen3-client-xxx ~/.gen3/gen3-client
71-
mv ~/.gen3/gen3_client_config.ini-xxx ~/.gen3/gen3_client_config.ini
72-
73-
```
74-
75-
7660
This command will validate your FHIR entities and their reference relations to each other. It will also generate a summary count of all entities in each ndjson file.
7761

7862
NOTE: This process may take _**5 minutes**_ or more, depending on your platform or compute power due to the size of the current data.
7963

64+
#### Check for a field ex. extension
65+
```bash
66+
awk '!/extension/ {exit 1}' data/META/ResearchSubject.ndjson && echo "Every line contains 'extension'" || echo "Not every line contains 'extension'"
67+
```
68+
8069

8170
### Testing
8271
Current integration testing runs on all data and may take approximately _**2 hours**_.

cda2fhir/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from cda2fhir.patcher import apply_patches
2+
3+
apply_patches()
4+
5+

cda2fhir/cda2fhir.py

Lines changed: 457 additions & 218 deletions
Large diffs are not rendered by default.

cda2fhir/cdamodels.py

Lines changed: 46 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,17 @@ class CDASubject(Base):
4141
vital_status: Mapped[Optional[str]] = mapped_column(String)
4242
days_to_death: Mapped[Optional[int]] = mapped_column(Integer)
4343
cause_of_death: Mapped[Optional[str]] = mapped_column(String)
44-
integer_id_alias: Mapped[Optional[int]] = mapped_column(Integer)
44+
integer_id_alias: Mapped[Optional[int]] = mapped_column(Integer, unique=True)
4545
researchsubject_subjects: Mapped[List["CDASubjectResearchSubject"]] = relationship(
4646
back_populates="subject"
4747
)
48-
subject_alias_relation: Mapped[List["CDASubjectAlias"]] = relationship(
49-
back_populates="subject_alias_relations"
50-
)
5148
subject_project_relation: Mapped[List["CDASubjectProject"]] = relationship(
5249
back_populates="subject"
5350
)
51+
subject_identifier: Mapped[List["CDASubjectIdentifier"]] = relationship(
52+
"CDASubjectIdentifier",
53+
back_populates="subject"
54+
)
5455
subject_file_relation: Mapped[List["CDAFileSubject"]] = relationship(
5556
back_populates="subject"
5657
)
@@ -59,14 +60,21 @@ class CDASubject(Base):
5960
back_populates="subject"
6061
)
6162

62-
6363
@property
6464
def alias_id(self):
65-
"""Fetch CDA subject's alias id from subject_alias_relations table - (one to one)"""
66-
if self.subject_alias_relation and len(self.subject_alias_relation) > 0:
67-
return self.subject_alias_relation.__getitem__(0).subject_alias
68-
else:
69-
return None
65+
return self.integer_id_alias
66+
67+
68+
69+
class CDASubjectIdentifier(Base):
70+
__tablename__ = 'subject_identifier'
71+
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
72+
subject_alias: Mapped[int] = mapped_column(Integer, ForeignKey('subject.integer_id_alias'), nullable=False)
73+
system: Mapped[str] = mapped_column(String, nullable=False)
74+
field_name: Mapped[str] = mapped_column(String, nullable=False)
75+
value: Mapped[str] = mapped_column(String, nullable=False)
76+
subject: Mapped["CDASubject"] = relationship("CDASubject", back_populates="subject_identifier")
77+
7078

7179

7280
class CDAResearchSubject(Base):
@@ -76,7 +84,7 @@ class CDAResearchSubject(Base):
7684
member_of_research_project: Mapped[Optional[str]] = mapped_column(String)
7785
primary_diagnosis_condition: Mapped[Optional[str]] = mapped_column(String)
7886
primary_diagnosis_site: Mapped[Optional[str]] = mapped_column(String)
79-
integer_id_alias: Mapped[Optional[int]] = mapped_column(Integer)
87+
integer_id_alias: Mapped[Optional[int]] = mapped_column(Integer, unique=True)
8088
subject_researchsubjects: Mapped[List["CDASubjectResearchSubject"]] = relationship(
8189
back_populates="researchsubject"
8290
)
@@ -94,8 +102,8 @@ class CDAResearchSubject(Base):
94102
class CDASubjectResearchSubject(Base):
95103
__tablename__ = 'subject_researchsubject'
96104
query: QueryPropertyDescriptor = Session.query_property()
97-
subject_id: Mapped[str] = mapped_column(ForeignKey("subject.id"), primary_key=True)
98-
researchsubject_id: Mapped[str] = mapped_column(ForeignKey("researchsubject.id"), primary_key=True)
105+
subject_alias: Mapped[int] = mapped_column(ForeignKey("subject.integer_id_alias"), primary_key=True)
106+
researchsubject_alias: Mapped[int] = mapped_column(ForeignKey("researchsubject.integer_id_alias"), primary_key=True)
99107
subject: Mapped["CDASubject"] = relationship(
100108
back_populates="researchsubject_subjects"
101109
)
@@ -104,37 +112,35 @@ class CDASubjectResearchSubject(Base):
104112
)
105113

106114

107-
class CDASubjectAlias(Base):
108-
__tablename__ = 'subject_alias_table'
109-
query: QueryPropertyDescriptor = Session.query_property()
110-
subject_id: Mapped[str] = mapped_column(ForeignKey("subject.id"), primary_key=True)
111-
subject_alias: Mapped[int] = mapped_column(Integer, primary_key=True)
112-
subject_alias_relations: Mapped["CDASubject"] = relationship(
113-
back_populates="subject_alias_relation"
114-
)
115-
116-
117115
class CDASubjectProject(Base):
118116
__tablename__ = 'subject_project'
119117
query: QueryPropertyDescriptor = Session.query_property()
120-
subject_id: Mapped[str] = mapped_column(ForeignKey("subject.id"), primary_key=True)
118+
subject_alias: Mapped[int] = mapped_column(ForeignKey("subject.integer_id_alias"), primary_key=True)
121119
associated_project: Mapped[str] = mapped_column(String, primary_key=True)
122120
subject: Mapped["CDASubject"] = relationship(
123121
back_populates="subject_project_relation"
124122
)
125123

126124

125+
127126
class CDADiagnosis(Base):
128127
__tablename__ = 'diagnosis'
129128
query: QueryPropertyDescriptor = Session.query_property()
130129
id: Mapped[str] = mapped_column(String, primary_key=True)
131130
primary_diagnosis: Mapped[Optional[str]] = mapped_column(String)
132131
age_at_diagnosis: Mapped[Optional[int]] = mapped_column(Integer)
133132
morphology: Mapped[Optional[str]] = mapped_column(String)
134-
stage: Mapped[Optional[str]] = mapped_column(String)
133+
pathologic_stage: Mapped[Optional[str]] = mapped_column(String)
134+
pathologic_stage_m: Mapped[Optional[str]] = mapped_column(String)
135+
pathologic_stage_n: Mapped[Optional[str]] = mapped_column(String)
136+
pathologic_stage_t: Mapped[Optional[str]] = mapped_column(String)
137+
clinical_stage: Mapped[Optional[str]] = mapped_column(String)
138+
clinical_stage_m: Mapped[Optional[str]] = mapped_column(String)
139+
clinical_stage_n: Mapped[Optional[str]] = mapped_column(String)
140+
clinical_stage_t: Mapped[Optional[str]] = mapped_column(String)
135141
grade: Mapped[Optional[str]] = mapped_column(String)
136142
method_of_diagnosis: Mapped[Optional[str]] = mapped_column(String)
137-
integer_id_alias: Mapped[Optional[int]] = mapped_column(Integer)
143+
integer_id_alias: Mapped[Optional[int]] = mapped_column(Integer, unique=True)
138144
researchsubject_diagnoses: Mapped[List["CDAResearchSubjectDiagnosis"]] = relationship(
139145
back_populates="diagnosis"
140146
)
@@ -143,8 +149,8 @@ class CDADiagnosis(Base):
143149
class CDAResearchSubjectDiagnosis(Base):
144150
__tablename__ = 'researchsubject_diagnosis'
145151
query: QueryPropertyDescriptor = Session.query_property()
146-
researchsubject_id: Mapped[str] = mapped_column(ForeignKey('researchsubject.id'), primary_key=True)
147-
diagnosis_id: Mapped[str] = mapped_column(ForeignKey('diagnosis.id'), primary_key=True)
152+
researchsubject_alias: Mapped[str] = mapped_column(ForeignKey('researchsubject.integer_id_alias'), primary_key=True)
153+
diagnosis_alias: Mapped[str] = mapped_column(ForeignKey('diagnosis.integer_id_alias'), primary_key=True)
148154
diagnosis: Mapped["CDADiagnosis"] = relationship(
149155
back_populates="researchsubject_diagnoses"
150156
)
@@ -166,7 +172,7 @@ class CDATreatment(Base):
166172
treatment_effect: Mapped[Optional[str]] = mapped_column(String)
167173
treatment_end_reason: Mapped[Optional[str]] = mapped_column(String)
168174
number_of_cycles: Mapped[Optional[int]] = mapped_column(Integer)
169-
integer_id_alias: Mapped[Optional[int]] = mapped_column(Integer)
175+
integer_id_alias: Mapped[Optional[int]] = mapped_column(Integer, unique=True)
170176
researchsubject_treatments: Mapped[List["CDAResearchSubjectTreatment"]] = relationship(
171177
back_populates="treatment"
172178
)
@@ -175,8 +181,8 @@ class CDATreatment(Base):
175181
class CDAResearchSubjectTreatment(Base):
176182
__tablename__ = 'researchsubject_treatment'
177183
query: QueryPropertyDescriptor = Session.query_property()
178-
researchsubject_id: Mapped[str] = mapped_column(ForeignKey('researchsubject.id'), primary_key=True)
179-
treatment_id: Mapped[str] = mapped_column(ForeignKey('treatment.id'), primary_key=True)
184+
researchsubject_alias: Mapped[str] = mapped_column(ForeignKey('researchsubject.integer_id_alias'), primary_key=True)
185+
treatment_alias: Mapped[str] = mapped_column(ForeignKey('treatment.integer_id_alias'), primary_key=True)
180186
researchsubject: Mapped["CDAResearchSubject"] = relationship(
181187
back_populates="treatment_researchsubjects"
182188
)
@@ -208,8 +214,8 @@ class CDASpecimen(Base):
208214
class CDAResearchSubjectSpecimen(Base):
209215
__tablename__ = 'researchsubject_specimen'
210216
query: QueryPropertyDescriptor = Session.query_property()
211-
researchsubject_id: Mapped[str] = mapped_column(ForeignKey('researchsubject.id'), primary_key=True)
212-
specimen_id: Mapped[str] = mapped_column(ForeignKey('specimen.id'), primary_key=True)
217+
researchsubject_alias: Mapped[str] = mapped_column(ForeignKey('researchsubject.integer_id_alias'), primary_key=True)
218+
specimen_alias: Mapped[str] = mapped_column(ForeignKey('specimen.integer_id_alias'), primary_key=True)
213219
specimen: Mapped["CDASpecimen"] = relationship(
214220
back_populates="researchsubject_specimens"
215221
)
@@ -237,14 +243,6 @@ class GDCProgramdbGap(Base):
237243
# query: QueryPropertyDescriptor = Session.query_property()
238244
# # TODO: make one table via all xlsx sheets
239245

240-
class CDASubjectIdentifier(Base):
241-
__tablename__ = 'cda_subject_identifier' # CDA provenance info relation table.
242-
query: QueryPropertyDescriptor = Session.query_property()
243-
subject_alias: Mapped[Optional[int]] = mapped_column(Integer, primary_key=True)
244-
value: Mapped[Optional[str]] = mapped_column(String, primary_key=True)
245-
system: Mapped[Optional[str]] = mapped_column(String, primary_key=True)
246-
field_name: Mapped[Optional[str]] = mapped_column(String, primary_key=True)
247-
248246

249247
class CDAProjectRelation(Base):
250248
__tablename__ = 'project_program_relation'
@@ -279,7 +277,7 @@ class CDAFile(Base):
279277
imaging_modality: Mapped[Optional[str]] = mapped_column(String)
280278
dbgap_accession_number: Mapped[Optional[str]] = mapped_column(String)
281279
imaging_series: Mapped[Optional[str]] = mapped_column(String)
282-
integer_id_alias: Mapped[Optional[int]] = mapped_column(Integer)
280+
integer_id_alias: Mapped[Optional[int]] = mapped_column(Integer, unique=True)
283281
specimen_file_relation: Mapped[List["CDAFileSpecimen"]] = relationship(
284282
back_populates="file"
285283
)
@@ -288,11 +286,12 @@ class CDAFile(Base):
288286
)
289287

290288

289+
291290
class CDAFileSubject(Base):
292291
__tablename__ = 'file_subject'
293292
query: QueryPropertyDescriptor = Session.query_property()
294-
file_id: Mapped[str] = mapped_column(ForeignKey("cda_file.id"), primary_key=True)
295-
subject_id: Mapped[str] = mapped_column(ForeignKey("subject.id"), primary_key=True)
293+
file_alias: Mapped[str] = mapped_column(ForeignKey("cda_file.integer_id_alias"), primary_key=True)
294+
subject_alias: Mapped[str] = mapped_column(ForeignKey("subject.integer_id_alias"), primary_key=True)
296295
subject: Mapped["CDASubject"] = relationship(
297296
back_populates="subject_file_relation"
298297
)
@@ -301,11 +300,12 @@ class CDAFileSubject(Base):
301300
)
302301

303302

303+
304304
class CDAFileSpecimen(Base):
305305
__tablename__ = 'file_specimen'
306306
query: QueryPropertyDescriptor = Session.query_property()
307-
file_id: Mapped[str] = mapped_column(ForeignKey("cda_file.id"), primary_key=True)
308-
specimen_id: Mapped[str] = mapped_column(ForeignKey("specimen.id"), primary_key=True)
307+
file_alias: Mapped[str] = mapped_column(ForeignKey("cda_file.integer_id_alias"), primary_key=True)
308+
specimen_alias: Mapped[str] = mapped_column(ForeignKey("specimen.integer_id_alias"), primary_key=True)
309309
specimen: Mapped["CDASpecimen"] = relationship(
310310
back_populates="file_specimen_relation"
311311
)

0 commit comments

Comments
 (0)