core: add core__diagnosticreport table

smart-on-fhir · Dec 3, 2024 · 1fb444c · 1fb444c
1 parent 0e3c551
commit 1fb444c
Show file tree

Hide file tree

Showing 21 changed files with 10,231 additions and 5 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -40,6 +40,16 @@ Things to keep in mind:
   the `id` field, add a `condition_ref` field defined like:
   `concat('Condition/', id) AS condition_ref`
 
+## DateTime fields
+
+In general:
+- Expand a Date/DateTime/Instant field into four versions:
+  day/week/month/year, with appropriate suffixes (look for examples).
+- If the field is a Date field, leave the `_day` suffix off the day version,
+  since the end result is not actually a modified value.
+- Add start & end versions of Period fields,
+  since EHRs are truly wild out there and may fill only start or only end.
+
 ## Rebuilding the reference SQL
 
 We keep some reference SQL in git,

diff --git a/cumulus_library/.sqlfluff b/cumulus_library/.sqlfluff
@@ -139,6 +139,21 @@ schema =
                 'reference': True, 'display': False, 'type': True
             }
         }, 
+        'diagnosticreport': {
+            'effectivePeriod': {
+              'start': True, 'end': True,
+            },
+            'encounter': {
+                'reference': True,
+            },
+            'id': True,
+            'result': {
+                'reference': True,
+            },
+            'subject': {
+                'reference': True,
+            },
+        },
         'documentreference': {
             'id': True, 
             'type': True, 

diff --git a/cumulus_library/builders/counts.py b/cumulus_library/builders/counts.py
@@ -142,6 +142,32 @@ def count_condition(
             filter_resource=True,
         )
 
+    def count_diagnosticreport(
+        self,
+        table_name: str,
+        source_table: str,
+        table_cols: list,
+        where_clauses: list | None = None,
+        min_subject: int | None = None,
+    ) -> str:
+        """wrapper method for constructing diagnosticreport counts tables
+
+        :param table_name: The name of the table to create. Must start with study prefix
+        :param source_table: The table to create counts data from
+        :param table_cols: The columns from the source table to add to the count table
+        :param where_clauses: An array of where clauses to use for filtering the data
+        :param min_subject: An integer setting the minimum bin size for inclusion
+            (default: 10)
+        """
+        return self.get_count_query(
+            table_name,
+            source_table,
+            table_cols,
+            where_clauses=where_clauses,
+            min_subject=min_subject,
+            fhir_resource="diagnosticreport",
+        )
+
     def count_documentreference(
         self,
         table_name: str,

diff --git a/cumulus_library/builders/statistics_templates/counts_templates.py b/cumulus_library/builders/statistics_templates/counts_templates.py
@@ -17,6 +17,7 @@ class CountableFhirResource(Enum):
 
     ALLERGYINTOLERANCE = "allergyintolerance"
     CONDITION = "condition"
+    DIAGNOSTICREPORT = "diagnosticreport"
     DOCUMENTREFERENCE = "documentreference"
     ENCOUNTER = "encounter"
     NONE = None

diff --git a/cumulus_library/studies/core/builder_diagnosticreport.py b/cumulus_library/studies/core/builder_diagnosticreport.py
@@ -0,0 +1,37 @@
+import cumulus_library
+from cumulus_library.studies.core.core_templates import core_templates
+from cumulus_library.template_sql import sql_utils
+
+expected_table_cols = {
+    "diagnosticreport": {
+        "id": [],
+        "status": [],
+        "subject": sql_utils.REFERENCE,
+        "encounter": sql_utils.REFERENCE,
+        "effectiveDateTime": [],
+        "effectivePeriod": ["start", "end"],
+        "issued": [],
+        "result": sql_utils.REFERENCE,
+    }
+}
+
+
+class CoreDiagnosticReportBuilder(cumulus_library.BaseTableBuilder):
+    display_text = "Creating DiagnosticReport tables..."
+
+    def prepare_queries(self, *args, config: cumulus_library.StudyConfig, **kwargs):
+        code_sources = [
+            sql_utils.CodeableConceptConfig(
+                source_table="diagnosticreport",
+                column_hierarchy=[("category", list)],
+                target_table="core__diagnosticreport_dn_category",
+            ),
+            sql_utils.CodeableConceptConfig(
+                source_table="diagnosticreport",
+                column_hierarchy=[("code", dict)],
+                target_table="core__diagnosticreport_dn_code",
+            ),
+        ]
+        self.queries += sql_utils.denormalize_complex_objects(config.db, code_sources)
+        validated_schema = sql_utils.validate_schema(config.db, expected_table_cols)
+        self.queries.append(core_templates.get_core_template("diagnosticreport", validated_schema))
diff --git a/cumulus_library/studies/core/core_templates/completion_utils.jinja b/cumulus_library/studies/core/core_templates/completion_utils.jinja
@@ -41,6 +41,7 @@ in this table.
             (
                 BOOL_OR(ec.table_name = 'allergyintolerance')
                 AND BOOL_OR(ec.table_name = 'condition')
+                AND BOOL_OR(ec.table_name = 'diagnosticreport')
                 AND BOOL_OR(ec.table_name = 'documentreference')
                 AND BOOL_OR(ec.table_name = 'medicationrequest')
                 AND BOOL_OR(ec.table_name = 'observation')

diff --git a/cumulus_library/studies/core/core_templates/diagnosticreport.sql.jinja b/cumulus_library/studies/core/core_templates/diagnosticreport.sql.jinja
@@ -0,0 +1,102 @@
+{% import 'core_utils.jinja' as utils %}
+{% import 'unnest_utils.jinja' as unnest_utils %}
+
+-- This table includes all fields of interest to the US Core DiagnosticReport profiles.
+-- EXCEPT FOR:
+-- * the 'presentedForm' field, which is an attachment array that is stripped out by the ETL.
+-- * the `reporter` field, simply due to it not likely being interesting to consumers
+--   and being an array field, which would require a lot of row duplication.
+--
+-- US Core profiles for reference:
+-- * https://hl7.org/fhir/us/core/STU4/StructureDefinition-us-core-diagnosticreport-lab.html
+-- * https://hl7.org/fhir/us/core/STU4/StructureDefinition-us-core-diagnosticreport-note.html
+
+CREATE TABLE core__diagnosticreport AS
+WITH temp_diagnosticreport AS (
+    SELECT
+        {{- utils.basic_cols('diagnosticreport', 'd', ['id']) }},
+        {{-
+            utils.nullable_cols(
+                'diagnosticreport',
+                'd',
+                [
+                    'status',
+                    ('subject', 'reference', 'subject_ref'),
+                    ('encounter', 'reference', 'encounter_ref'),
+                ],
+                schema
+            )
+        }},
+        {{-
+            utils.truncate_date_cols(
+                'diagnosticreport',
+                'd',
+                [
+                    ('effectiveDateTime', 'day'),
+                    ('effectiveDateTime', 'week'),
+                    ('effectiveDateTime', 'month'),
+                    ('effectiveDateTime', 'year'),
+                    ('effectivePeriod', 'start', 'effectivePeriod_start_day', 'day'),
+                    ('effectivePeriod', 'start', 'effectivePeriod_start_week', 'week'),
+                    ('effectivePeriod', 'start', 'effectivePeriod_start_month', 'month'),
+                    ('effectivePeriod', 'start', 'effectivePeriod_start_year', 'year'),
+                    ('effectivePeriod', 'end', 'effectivePeriod_end_day', 'day'),
+                    ('effectivePeriod', 'end', 'effectivePeriod_end_week', 'week'),
+                    ('effectivePeriod', 'end', 'effectivePeriod_end_month', 'month'),
+                    ('effectivePeriod', 'end', 'effectivePeriod_end_year', 'year'),
+                    ('issued', 'day'),
+                    ('issued', 'week'),
+                    ('issued', 'month'),
+                    ('issued', 'year'),
+                ],
+                schema
+            )
+        }}
+    FROM diagnosticreport AS d
+),
+
+temp_result AS (
+    {{ unnest_utils.flatten('diagnosticreport', 'reference', parent_field='result') }}
+)
+
+SELECT
+    td.id,
+    td.status,
+
+    dn_category.code AS category_code,
+    dn_category.system AS category_system,
+    dn_category.display AS category_display,
+
+    dn_code.code AS code_code,
+    dn_code.system AS code_system,
+    dn_code.display AS code_display,
+
+    td.effectiveDateTime_day,
+    td.effectiveDateTime_week,
+    td.effectiveDateTime_month,
+    td.effectiveDateTime_year,
+
+    td.effectivePeriod_start_day,
+    td.effectivePeriod_start_week,
+    td.effectivePeriod_start_month,
+    td.effectivePeriod_start_year,
+
+    td.effectivePeriod_end_day,
+    td.effectivePeriod_end_week,
+    td.effectivePeriod_end_month,
+    td.effectivePeriod_end_year,
+
+    td.issued_day,
+    td.issued_week,
+    td.issued_month,
+    td.issued_year,
+
+    concat('DiagnosticReport/', td.id) AS diagnosticreport_ref,
+    td.subject_ref,
+    td.encounter_ref,
+    tr.reference AS result_ref
+
+FROM temp_diagnosticreport AS td
+LEFT JOIN core__diagnosticreport_dn_code AS dn_code ON td.id = dn_code.id
+LEFT JOIN core__diagnosticreport_dn_category AS dn_category ON td.id = dn_category.id
+LEFT JOIN temp_result AS tr ON td.id = tr.id;
diff --git a/cumulus_library/studies/core/count_core.py b/cumulus_library/studies/core/count_core.py
@@ -27,6 +27,21 @@ def count_core_condition(self, duration: str = "month"):
         ]
         return self.count_condition(table_name, from_table, cols)
 
+    def count_core_diagnosticreport(self, duration: str = "month"):
+        table_name = self.get_table_name("count_diagnosticreport", duration=duration)
+        from_table = self.get_table_name("diagnosticreport")
+        cols = [
+            ["category_display", "varchar", None],
+            ["code_display", "varchar", None],
+            # Issued is not the _preferred_ time to pull, since it is an administrative time,
+            # not a clinical one. But the clinical dates are annoyingly spread across three
+            # fields: effectiveDateTime, effectivePeriod.start, and effectivePeriod.end.
+            # So rather than do some fancy collation, just use issued. These core counts are
+            # just a rough idea of the data, not a polished final product.
+            [f"issued_{duration}", "date", None],
+        ]
+        return self.count_diagnosticreport(table_name, from_table, cols)
+
     def count_core_documentreference(self, duration: str = "month"):
         table_name = self.get_table_name("count_documentreference", duration=duration)
         from_table = self.get_table_name("documentreference")
@@ -119,6 +134,7 @@ def prepare_queries(self, *args, **kwargs):
         self.queries = [
             self.count_core_allergyintolerance(duration="month"),
             self.count_core_condition(duration="month"),
+            self.count_core_diagnosticreport(duration="month"),
             self.count_core_documentreference(duration="month"),
             self.count_core_encounter(duration="month"),
             self.count_core_encounter_all_types(),

diff --git a/cumulus_library/studies/core/manifest.toml b/cumulus_library/studies/core/manifest.toml
@@ -5,6 +5,7 @@ file_names = [
     "builder_prereq_tables.py",
     "builder_allergyintolerance.py",
     "builder_condition.py",
+    "builder_diagnosticreport.py",
     "builder_patient.py",
     "builder_encounter.py",
     "builder_documentreference.py",
@@ -19,6 +20,7 @@ file_names = [
 count_list = [
     "core__count_allergyintolerance_month",
     "core__count_condition_month",
+    "core__count_diagnosticreport_month",
     "core__count_documentreference_month",
     "core__count_encounter_month",
     "core__count_encounter_all_types",