Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core: add core__diagnosticreport table #322

Merged
merged 3 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,16 @@ Things to keep in mind:
the `id` field, add a `condition_ref` field defined like:
`concat('Condition/', id) AS condition_ref`

## DateTime fields

In general:
- Expand a Date/DateTime/Instant field into four versions:
day/week/month/year, with appropriate suffixes (look for examples).
- If the field is a Date field, leave the `_day` suffix off the day version,
since the end result is not actually a modified value.
- Add start & end versions of Period fields,
since EHRs are truly wild out there and may fill only start or only end.

## Rebuilding the reference SQL

We keep some reference SQL in git,
Expand Down
15 changes: 15 additions & 0 deletions cumulus_library/.sqlfluff
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,21 @@ schema =
'reference': True, 'display': False, 'type': True
}
},
'diagnosticreport': {
'effectivePeriod': {
'start': True, 'end': True,
},
'encounter': {
'reference': True,
},
'id': True,
'result': {
'reference': True,
},
'subject': {
'reference': True,
},
},
'documentreference': {
'id': True,
'type': True,
Expand Down
26 changes: 26 additions & 0 deletions cumulus_library/builders/counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,32 @@ def count_condition(
filter_resource=True,
)

def count_diagnosticreport(
self,
table_name: str,
source_table: str,
table_cols: list,
where_clauses: list | None = None,
min_subject: int | None = None,
) -> str:
"""wrapper method for constructing diagnosticreport counts tables

:param table_name: The name of the table to create. Must start with study prefix
:param source_table: The table to create counts data from
:param table_cols: The columns from the source table to add to the count table
:param where_clauses: An array of where clauses to use for filtering the data
:param min_subject: An integer setting the minimum bin size for inclusion
(default: 10)
"""
return self.get_count_query(
table_name,
source_table,
table_cols,
where_clauses=where_clauses,
min_subject=min_subject,
fhir_resource="diagnosticreport",
)

def count_documentreference(
self,
table_name: str,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class CountableFhirResource(Enum):

ALLERGYINTOLERANCE = "allergyintolerance"
CONDITION = "condition"
DIAGNOSTICREPORT = "diagnosticreport"
DOCUMENTREFERENCE = "documentreference"
ENCOUNTER = "encounter"
NONE = None
Expand Down
42 changes: 42 additions & 0 deletions cumulus_library/studies/core/builder_diagnosticreport.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import cumulus_library
from cumulus_library.studies.core.core_templates import core_templates
from cumulus_library.template_sql import sql_utils

expected_table_cols = {
"diagnosticreport": {
"id": [],
"status": [],
"subject": sql_utils.REFERENCE,
"encounter": sql_utils.REFERENCE,
"effectiveDateTime": [],
"effectivePeriod": ["start", "end"],
"issued": [],
"result": sql_utils.REFERENCE,
}
}


class CoreDiagnosticReportBuilder(cumulus_library.BaseTableBuilder):
display_text = "Creating DiagnosticReport tables..."

def prepare_queries(self, *args, config: cumulus_library.StudyConfig, **kwargs):
code_sources = [
sql_utils.CodeableConceptConfig(
source_table="diagnosticreport",
column_hierarchy=[("category", list)],
target_table="core__diagnosticreport_dn_category",
),
sql_utils.CodeableConceptConfig(
source_table="diagnosticreport",
column_hierarchy=[("code", dict)],
target_table="core__diagnosticreport_dn_code",
),
sql_utils.CodeableConceptConfig(
source_table="diagnosticreport",
column_hierarchy=[("conclusionCode", list)],
target_table="core__diagnosticreport_dn_conclusioncode",
),
]
self.queries += sql_utils.denormalize_complex_objects(config.db, code_sources)
validated_schema = sql_utils.validate_schema(config.db, expected_table_cols)
self.queries.append(core_templates.get_core_template("diagnosticreport", validated_schema))
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ in this table.
(
BOOL_OR(ec.table_name = 'allergyintolerance')
AND BOOL_OR(ec.table_name = 'condition')
AND BOOL_OR(ec.table_name = 'diagnosticreport')
AND BOOL_OR(ec.table_name = 'documentreference')
AND BOOL_OR(ec.table_name = 'medicationrequest')
AND BOOL_OR(ec.table_name = 'observation')
Expand Down
111 changes: 111 additions & 0 deletions cumulus_library/studies/core/core_templates/diagnosticreport.sql.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
{% import 'core_utils.jinja' as utils %}
{% import 'unnest_utils.jinja' as unnest_utils %}

-- This table includes all fields of interest to the US Core DiagnosticReport profiles.
-- EXCEPT FOR:
-- * the 'presentedForm' field, which is an attachment array that is stripped out by the ETL.
-- * the `reporter` field, simply due to it not likely being interesting to consumers
-- and being an array field, which would require a lot of row duplication.
--
-- AND ADDING:
-- * the `conclusionCode` field, because it has clinical relevance
--
-- US Core profiles for reference:
-- * https://hl7.org/fhir/us/core/STU4/StructureDefinition-us-core-diagnosticreport-lab.html
-- * https://hl7.org/fhir/us/core/STU4/StructureDefinition-us-core-diagnosticreport-note.html

CREATE TABLE core__diagnosticreport AS
WITH temp_diagnosticreport AS (
SELECT
{{- utils.basic_cols('diagnosticreport', 'd', ['id']) }},
{{-
utils.nullable_cols(
'diagnosticreport',
'd',
[
'status',
('subject', 'reference', 'subject_ref'),
('encounter', 'reference', 'encounter_ref'),
],
schema
)
}},
{{-
utils.truncate_date_cols(
'diagnosticreport',
'd',
[
('effectiveDateTime', 'day'),
('effectiveDateTime', 'week'),
('effectiveDateTime', 'month'),
('effectiveDateTime', 'year'),
('effectivePeriod', 'start', 'effectivePeriod_start_day', 'day'),
('effectivePeriod', 'start', 'effectivePeriod_start_week', 'week'),
('effectivePeriod', 'start', 'effectivePeriod_start_month', 'month'),
('effectivePeriod', 'start', 'effectivePeriod_start_year', 'year'),
('effectivePeriod', 'end', 'effectivePeriod_end_day', 'day'),
('effectivePeriod', 'end', 'effectivePeriod_end_week', 'week'),
('effectivePeriod', 'end', 'effectivePeriod_end_month', 'month'),
('effectivePeriod', 'end', 'effectivePeriod_end_year', 'year'),
('issued', 'day'),
('issued', 'week'),
('issued', 'month'),
('issued', 'year'),
],
schema
)
}}
FROM diagnosticreport AS d
),

temp_result AS (
{{ unnest_utils.flatten('diagnosticreport', 'reference', parent_field='result') }}
)

SELECT
td.id,
td.status,

dn_category.code AS category_code,
dn_category.system AS category_system,
dn_category.display AS category_display,

dn_code.code AS code_code,
dn_code.system AS code_system,
dn_code.display AS code_display,

td.effectiveDateTime_day,
td.effectiveDateTime_week,
td.effectiveDateTime_month,
td.effectiveDateTime_year,

td.effectivePeriod_start_day,
td.effectivePeriod_start_week,
td.effectivePeriod_start_month,
td.effectivePeriod_start_year,

td.effectivePeriod_end_day,
td.effectivePeriod_end_week,
td.effectivePeriod_end_month,
td.effectivePeriod_end_year,

td.issued_day,
td.issued_week,
td.issued_month,
td.issued_year,

dn_conclusion.code AS conclusionCode_code,
dn_conclusion.system AS conclusionCode_system,
dn_conclusion.display AS conclusionCode_display,

concat('DiagnosticReport/', td.id) AS diagnosticreport_ref,
td.subject_ref,
td.encounter_ref,
tr.reference AS result_ref

FROM temp_diagnosticreport AS td
LEFT JOIN core__diagnosticreport_dn_code AS dn_code ON td.id = dn_code.id
LEFT JOIN core__diagnosticreport_dn_category AS dn_category ON td.id = dn_category.id
LEFT JOIN core__diagnosticreport_dn_conclusioncode AS dn_conclusion
ON td.id = dn_conclusion.id
LEFT JOIN temp_result AS tr ON td.id = tr.id;
16 changes: 16 additions & 0 deletions cumulus_library/studies/core/count_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,21 @@ def count_core_condition(self, duration: str = "month"):
]
return self.count_condition(table_name, from_table, cols)

def count_core_diagnosticreport(self, duration: str = "month"):
table_name = self.get_table_name("count_diagnosticreport", duration=duration)
from_table = self.get_table_name("diagnosticreport")
cols = [
["category_display", "varchar", None],
["code_display", "varchar", None],
# Issued is not the _preferred_ time to pull, since it is an administrative time,
# not a clinical one. But the clinical dates are annoyingly spread across three
# fields: effectiveDateTime, effectivePeriod.start, and effectivePeriod.end.
# So rather than do some fancy collation, just use issued. These core counts are
# just a rough idea of the data, not a polished final product.
[f"issued_{duration}", "date", None],
]
return self.count_diagnosticreport(table_name, from_table, cols)

def count_core_documentreference(self, duration: str = "month"):
table_name = self.get_table_name("count_documentreference", duration=duration)
from_table = self.get_table_name("documentreference")
Expand Down Expand Up @@ -119,6 +134,7 @@ def prepare_queries(self, *args, **kwargs):
self.queries = [
self.count_core_allergyintolerance(duration="month"),
self.count_core_condition(duration="month"),
self.count_core_diagnosticreport(duration="month"),
self.count_core_documentreference(duration="month"),
self.count_core_encounter(duration="month"),
self.count_core_encounter_all_types(),
Expand Down
2 changes: 2 additions & 0 deletions cumulus_library/studies/core/manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ file_names = [
"builder_prereq_tables.py",
"builder_allergyintolerance.py",
"builder_condition.py",
"builder_diagnosticreport.py",
"builder_patient.py",
"builder_encounter.py",
"builder_documentreference.py",
Expand All @@ -19,6 +20,7 @@ file_names = [
count_list = [
"core__count_allergyintolerance_month",
"core__count_condition_month",
"core__count_diagnosticreport_month",
"core__count_documentreference_month",
"core__count_encounter_month",
"core__count_encounter_all_types",
Expand Down
Loading
Loading