Skip to content

Commit

Permalink
core: add core__diagnosticreport table
Browse files Browse the repository at this point in the history
  • Loading branch information
mikix committed Dec 3, 2024
1 parent c4d93d1 commit 45c899a
Show file tree
Hide file tree
Showing 21 changed files with 10,231 additions and 5 deletions.
10 changes: 10 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,16 @@ Things to keep in mind:
the `id` field, add a `condition_ref` field defined like:
`concat('Condition/', id) AS condition_ref`

## DateTime fields

In general:
- Expand a Date/DateTime/Instant field into four versions:
day/week/month/year, with appropriate suffixes (look for examples).
- If the field is a Date field, leave the `_day` suffix off the day version,
since the end result is not actually a modified value.
- Add start & end versions of Period fields,
since EHRs are truly wild out there and may fill only start or only end.

## Rebuilding the reference SQL

We keep some reference SQL in git,
Expand Down
15 changes: 15 additions & 0 deletions cumulus_library/.sqlfluff
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,21 @@ schema =
'reference': True, 'display': False, 'type': True
}
},
'diagnosticreport': {
'effectivePeriod': {
'start': True, 'end': True,
},
'encounter': {
'reference': True,
},
'id': True,
'result': {
'reference': True,
},
'subject': {
'reference': True,
},
},
'documentreference': {
'id': True,
'type': True,
Expand Down
26 changes: 26 additions & 0 deletions cumulus_library/builders/counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,32 @@ def count_condition(
filter_resource=True,
)

def count_diagnosticreport(
self,
table_name: str,
source_table: str,
table_cols: list,
where_clauses: list | None = None,
min_subject: int | None = None,
) -> str:
"""wrapper method for constructing diagnosticreport counts tables
:param table_name: The name of the table to create. Must start with study prefix
:param source_table: The table to create counts data from
:param table_cols: The columns from the source table to add to the count table
:param where_clauses: An array of where clauses to use for filtering the data
:param min_subject: An integer setting the minimum bin size for inclusion
(default: 10)
"""
return self.get_count_query(
table_name,
source_table,
table_cols,
where_clauses=where_clauses,
min_subject=min_subject,
fhir_resource="diagnosticreport",
)

def count_documentreference(
self,
table_name: str,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class CountableFhirResource(Enum):

ALLERGYINTOLERANCE = "allergyintolerance"
CONDITION = "condition"
DIAGNOSTICREPORT = "diagnosticreport"
DOCUMENTREFERENCE = "documentreference"
ENCOUNTER = "encounter"
NONE = None
Expand Down
37 changes: 37 additions & 0 deletions cumulus_library/studies/core/builder_diagnosticreport.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import cumulus_library
from cumulus_library.studies.core.core_templates import core_templates
from cumulus_library.template_sql import sql_utils

expected_table_cols = {
"diagnosticreport": {
"id": [],
"status": [],
"subject": sql_utils.REFERENCE,
"encounter": sql_utils.REFERENCE,
"effectiveDateTime": [],
"effectivePeriod": ["start", "end"],
"issued": [],
"result": sql_utils.REFERENCE,
}
}


class CoreDiagnosticReportBuilder(cumulus_library.BaseTableBuilder):
display_text = "Creating DiagnosticReport tables..."

def prepare_queries(self, *args, config: cumulus_library.StudyConfig, **kwargs):
code_sources = [
sql_utils.CodeableConceptConfig(
source_table="diagnosticreport",
column_hierarchy=[("category", list)],
target_table="core__diagnosticreport_dn_category",
),
sql_utils.CodeableConceptConfig(
source_table="diagnosticreport",
column_hierarchy=[("code", dict)],
target_table="core__diagnosticreport_dn_code",
),
]
self.queries += sql_utils.denormalize_complex_objects(config.db, code_sources)
validated_schema = sql_utils.validate_schema(config.db, expected_table_cols)
self.queries.append(core_templates.get_core_template("diagnosticreport", validated_schema))
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ in this table.
(
BOOL_OR(ec.table_name = 'allergyintolerance')
AND BOOL_OR(ec.table_name = 'condition')
AND BOOL_OR(ec.table_name = 'diagnosticreport')
AND BOOL_OR(ec.table_name = 'documentreference')
AND BOOL_OR(ec.table_name = 'medicationrequest')
AND BOOL_OR(ec.table_name = 'observation')
Expand Down
102 changes: 102 additions & 0 deletions cumulus_library/studies/core/core_templates/diagnosticreport.sql.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
{% import 'core_utils.jinja' as utils %}
{% import 'unnest_utils.jinja' as unnest_utils %}

-- This table includes all fields of interest to the US Core DiagnosticReport profiles.
-- EXCEPT FOR:
-- * the 'presentedForm' field, which is an attachment array that is stripped out by the ETL.
-- * the `reporter` field, simply due to it not likely being interesting to consumers
-- and being an array field, which would require a lot of row duplication.
--
-- US Core profiles for reference:
-- * https://hl7.org/fhir/us/core/STU4/StructureDefinition-us-core-diagnosticreport-lab.html
-- * https://hl7.org/fhir/us/core/STU4/StructureDefinition-us-core-diagnosticreport-note.html

CREATE TABLE core__diagnosticreport AS
WITH temp_diagnosticreport AS (
SELECT
{{- utils.basic_cols('diagnosticreport', 'd', ['id']) }},
{{-
utils.nullable_cols(
'diagnosticreport',
'd',
[
'status',
('subject', 'reference', 'subject_ref'),
('encounter', 'reference', 'encounter_ref'),
],
schema
)
}},
{{-
utils.truncate_date_cols(
'diagnosticreport',
'd',
[
('effectiveDateTime', 'day'),
('effectiveDateTime', 'week'),
('effectiveDateTime', 'month'),
('effectiveDateTime', 'year'),
('effectivePeriod', 'start', 'effectivePeriod_start_day', 'day'),
('effectivePeriod', 'start', 'effectivePeriod_start_week', 'week'),
('effectivePeriod', 'start', 'effectivePeriod_start_month', 'month'),
('effectivePeriod', 'start', 'effectivePeriod_start_year', 'year'),
('effectivePeriod', 'end', 'effectivePeriod_end_day', 'day'),
('effectivePeriod', 'end', 'effectivePeriod_end_week', 'week'),
('effectivePeriod', 'end', 'effectivePeriod_end_month', 'month'),
('effectivePeriod', 'end', 'effectivePeriod_end_year', 'year'),
('issued', 'day'),
('issued', 'week'),
('issued', 'month'),
('issued', 'year'),
],
schema
)
}}
FROM diagnosticreport AS d
),

temp_result AS (
{{ unnest_utils.flatten('diagnosticreport', 'reference', parent_field='result') }}
)

SELECT
td.id,
td.status,

dn_category.code AS category_code,
dn_category.system AS category_system,
dn_category.display AS category_display,

dn_code.code AS code_code,
dn_code.system AS code_system,
dn_code.display AS code_display,

td.effectiveDateTime_day,
td.effectiveDateTime_week,
td.effectiveDateTime_month,
td.effectiveDateTime_year,

td.effectivePeriod_start_day,
td.effectivePeriod_start_week,
td.effectivePeriod_start_month,
td.effectivePeriod_start_year,

td.effectivePeriod_end_day,
td.effectivePeriod_end_week,
td.effectivePeriod_end_month,
td.effectivePeriod_end_year,

td.issued_day,
td.issued_week,
td.issued_month,
td.issued_year,

concat('DiagnosticReport/', td.id) AS diagnosticreport_ref,
td.subject_ref,
td.encounter_ref,
tr.reference AS result_ref

FROM temp_diagnosticreport AS td
LEFT JOIN core__diagnosticreport_dn_code AS dn_code ON td.id = dn_code.id
LEFT JOIN core__diagnosticreport_dn_category AS dn_category ON td.id = dn_category.id
LEFT JOIN temp_result AS tr ON td.id = tr.id;
16 changes: 16 additions & 0 deletions cumulus_library/studies/core/count_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,21 @@ def count_core_condition(self, duration: str = "month"):
]
return self.count_condition(table_name, from_table, cols)

def count_core_diagnosticreport(self, duration: str = "month"):
table_name = self.get_table_name("count_diagnosticreport", duration=duration)
from_table = self.get_table_name("diagnosticreport")
cols = [
["category_display", "varchar", None],
["code_display", "varchar", None],
# Issued is not the _preferred_ time to pull, since it is an administrative time,
# not a clinical one. But the clinical dates are annoyingly spread across three
# fields: effectiveDateTime, effectivePeriod.start, and effectivePeriod.end.
# So rather than do some fancy collation, just use issued. These core counts are
# just a rough idea of the data, not a polished final product.
[f"issued_{duration}", "date", None],
]
return self.count_diagnosticreport(table_name, from_table, cols)

def count_core_documentreference(self, duration: str = "month"):
table_name = self.get_table_name("count_documentreference", duration=duration)
from_table = self.get_table_name("documentreference")
Expand Down Expand Up @@ -119,6 +134,7 @@ def prepare_queries(self, *args, **kwargs):
self.queries = [
self.count_core_allergyintolerance(duration="month"),
self.count_core_condition(duration="month"),
self.count_core_diagnosticreport(duration="month"),
self.count_core_documentreference(duration="month"),
self.count_core_encounter(duration="month"),
self.count_core_encounter_all_types(),
Expand Down
2 changes: 2 additions & 0 deletions cumulus_library/studies/core/manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ file_names = [
"builder_prereq_tables.py",
"builder_allergyintolerance.py",
"builder_condition.py",
"builder_diagnosticreport.py",
"builder_patient.py",
"builder_encounter.py",
"builder_documentreference.py",
Expand All @@ -19,6 +20,7 @@ file_names = [
count_list = [
"core__count_allergyintolerance_month",
"core__count_condition_month",
"core__count_diagnosticreport_month",
"core__count_documentreference_month",
"core__count_encounter_month",
"core__count_encounter_all_types",
Expand Down
Loading

0 comments on commit 45c899a

Please sign in to comment.