diff --git a/cumulus_library_data_metrics/__init__.py b/cumulus_library_data_metrics/__init__.py index b86562c..e5c88e9 100644 --- a/cumulus_library_data_metrics/__init__.py +++ b/cumulus_library_data_metrics/__init__.py @@ -1,3 +1,3 @@ """Data Metrics study for Cumulus Library""" -__version__ = "6.0.0" +__version__ = "6.1.0" diff --git a/cumulus_library_data_metrics/attachment_utils.jinja b/cumulus_library_data_metrics/attachment_utils.jinja new file mode 100644 index 0000000..5789e53 --- /dev/null +++ b/cumulus_library_data_metrics/attachment_utils.jinja @@ -0,0 +1,187 @@ +{% import 'utils.jinja' as utils %} + + +{% macro _extract_attachment_info(src, unnest_field, access_field, attachment_schema, extra_field_names, extra_fields) -%} +{% set has_data_ext = attachment_schema["_data"]["extension"]["url"] and attachment_schema["_data"]["extension"]["valueCode"] %} +{% set has_url_ext = attachment_schema["_url"]["extension"]["url"] and attachment_schema["_url"]["extension"]["valueCode"] %} +( + WITH + tmp_rows AS ( + SELECT + id, + ROW_NUMBER() OVER (PARTITION BY id) AS row, + + {% for extra_field in extra_fields %} + {{ extra_field }}, + {% endfor %} + + {{ utils.nullable( + attachment_schema["contentType"], + "u." + access_field + ".contentType", + "content_type", + ) }}, + {{ utils.nullable( + attachment_schema["language"], + "u." + access_field + ".language", + "language", + ) }}, + {{ utils.nullable( + attachment_schema["data"], + "u." + access_field + ".data", + "data", + ) }}, + {{ utils.nullable( + has_data_ext, + "u." + access_field + "._data", + "_data", + ) }}, + {{ utils.nullable( + attachment_schema["url"], + "u." + access_field + ".url", + "url", + ) }}, + {{ utils.nullable( + has_url_ext, + "u." + access_field + "._url", + "_url", + ) }} + + FROM {{ src }}, + UNNEST({{ unnest_field }}) AS u ({{ unnest_field }}) + ), + + data_absent_reasons AS ( + {% if has_data_ext %} + SELECT + id, + row, + BOOL_OR( + u.extension.url = 'http://hl7.org/fhir/StructureDefinition/data-absent-reason' + AND u.extension.valueCode = 'masked' + ) AS was_masked + FROM tmp_rows, + UNNEST(_data.extension) AS u (extension) + GROUP BY id, row + + {% else %} + SELECT id, row, FALSE AS was_masked FROM tmp_rows WHERE 1=0 -- return an empty table + {% endif %} + ), + + url_absent_reasons AS ( + {% if has_url_ext %} + SELECT + id, + row, + BOOL_OR( + u.extension.url = 'http://hl7.org/fhir/StructureDefinition/data-absent-reason' + AND u.extension.valueCode = 'masked' + ) AS was_masked + FROM tmp_rows, + UNNEST(_url.extension) AS u (extension) + GROUP BY id, row + + {% else %} + SELECT id, row, FALSE AS was_masked FROM tmp_rows WHERE 1=0 -- return an empty table + {% endif %} + ) + + SELECT + id, + row, + language, + + CASE WHEN content_type IS NULL THEN NULL + ELSE lower(split_part(content_type, ';', 1)) -- chop off any encoding arg + END as content_type, + + {% for name in extra_field_names %} + {{ name }}, + {% endfor %} + + ( + {{ utils.is_string_valid('data') }} + OR (data_absent_reasons.was_masked IS NOT NULL AND data_absent_reasons.was_masked) + ) AS has_data, + + ( + {{ utils.is_string_valid('url') }} + OR (url_absent_reasons.was_masked IS NOT NULL AND url_absent_reasons.was_masked) + ) AS has_url + + FROM tmp_rows + LEFT JOIN url_absent_reasons USING(id, row) + LEFT JOIN data_absent_reasons USING(id, row) +) +{% endmacro %} + + +{% macro _extract_diagnostic_report_attachments(schema) -%} +( + WITH + att_info AS {{ _extract_attachment_info( + "diagnosticreport", "presentedForm", "presentedForm", schema["presentedForm"], [], [] + ) }} + + SELECT + id, + att.row, + dr.status, + CAST(NULL AS VARCHAR) AS doc_status, + att.content_type, + att.language, + CAST(NULL AS VARCHAR) AS format_code, + CAST(NULL AS VARCHAR) AS format_system, + att.has_data, + att.has_url + + FROM att_info AS att + JOIN diagnosticreport AS dr USING(id) +) +{% endmacro %} + + +{% macro _extract_document_reference_attachments(schema) -%} +( + WITH + att_info AS {{ _extract_attachment_info( + "documentreference", "content", "content.attachment", schema["content"]["attachment"], + ["format_system", "format_code"], + [ + utils.nullable( + schema["content"]["format"]["system"], + "u.content.format.system", + "format_system", + ), + utils.nullable( + schema["content"]["format"]["code"], + "u.content.format.code", + "format_code", + ), + ] + ) }} + + SELECT + id, + att.row, + dr.status, + dr.docStatus AS doc_status, + att.content_type, + att.language, + att.format_code, + att.format_system, + att.has_data, + att.has_url + + FROM att_info AS att + JOIN documentreference AS dr USING(id) +) +{% endmacro %} + +{% macro extract_attachments(src, schema) -%} +{% if src == "DiagnosticReport" %} + {{ _extract_diagnostic_report_attachments(schema) }} +{% elif src == "DocumentReference" %} + {{ _extract_document_reference_attachments(schema) }} +{% endif %} +{% endmacro %} diff --git a/cumulus_library_data_metrics/c_attachment_count/README.md b/cumulus_library_data_metrics/c_attachment_count/README.md new file mode 100644 index 0000000..319e35a --- /dev/null +++ b/cumulus_library_data_metrics/c_attachment_count/README.md @@ -0,0 +1,11 @@ +# c_attachment_count + +**Count of attachment metadata** + +### Fields + +- status +- content_type +- language +- format +- content diff --git a/cumulus_library_data_metrics/c_attachment_count/c_attachment_count.jinja b/cumulus_library_data_metrics/c_attachment_count/c_attachment_count.jinja new file mode 100644 index 0000000..93dc583 --- /dev/null +++ b/cumulus_library_data_metrics/c_attachment_count/c_attachment_count.jinja @@ -0,0 +1,202 @@ +{% import 'utils.jinja' as utils %} +{% import 'attachment_utils.jinja' as attachment_utils %} + +CREATE TABLE {{ study_prefix }}__count_c_attachment_count_{{ src|lower }} AS ( +WITH + +attachments AS {{ attachment_utils.extract_attachments(src, schema) }}, + +formats AS ( + SELECT + id, + row, + + -- https://www.hl7.org/fhir/R4/valueset-formatcodes.html + CASE + WHEN format_code = 'urn:ihe:pcc:xphr:2007' + THEN 'Personal Health Records. Also known as HL7 CCD and HITSP C32' + WHEN format_code = 'urn:ihe:pcc:aps:2007' + THEN 'IHE Antepartum Summary' + WHEN format_code = 'urn:ihe:pcc:xds-ms:2007' + THEN 'XDS Medical Summaries' + WHEN format_code = 'urn:ihe:pcc:edr:2007' + THEN 'Emergency Department Referral (EDR)' + WHEN format_code = 'urn:ihe:pcc:edes:2007' + THEN 'Emergency Department Encounter Summary (EDES)' + WHEN format_code = 'urn:ihe:pcc:apr:handp:2008' + THEN 'Antepartum Record (APR) - History and Physical' + WHEN format_code = 'urn:ihe:pcc:apr:lab:2008' + THEN 'Antepartum Record (APR) - Laboratory' + WHEN format_code = 'urn:ihe:pcc:apr:edu:2008' + THEN 'Antepartum Record (APR) - Education' + WHEN format_code = 'urn:ihe:pcc:crc:2008' + THEN 'Cancer Registry Content (CRC)' + WHEN format_code = 'urn:ihe:pcc:cm:2008' + THEN 'Care Management (CM)' + WHEN format_code = 'urn:ihe:pcc:ic:2008' + THEN 'Immunization Content (IC)' + WHEN format_code = 'urn:ihe:pcc:tn:2007' + THEN 'PCC TN' + WHEN format_code = 'urn:ihe:pcc:nn:2007' + THEN 'PCC NN' + WHEN format_code = 'urn:ihe:pcc:ctn:2007' + THEN 'PCC CTN' + WHEN format_code = 'urn:ihe:pcc:edpn:2007' + THEN 'PCC EDPN' + WHEN format_code = 'urn:ihe:pcc:hp:2008' + THEN 'PCC HP' + WHEN format_code = 'urn:ihe:pcc:ldhp:2009' + THEN 'PCC LDHP' + WHEN format_code = 'urn:ihe:pcc:lds:2009' + THEN 'PCC LDS' + WHEN format_code = 'urn:ihe:pcc:mds:2009' + THEN 'PCC MDS' + WHEN format_code = 'urn:ihe:pcc:nds:2010' + THEN 'PCC NDS' + WHEN format_code = 'urn:ihe:pcc:ppvs:2010' + THEN 'PCC PPVS' + WHEN format_code = 'urn:ihe:pcc:trs:2011' + THEN 'PCC TRS' + WHEN format_code = 'urn:ihe:pcc:ets:2011' + THEN 'PCC ETS' + WHEN format_code = 'urn:ihe:pcc:its:2011' + THEN 'PCC ITS' + WHEN format_code = 'urn:ihe:pcc:ript:2017' + THEN 'Routine Interfacility Patient Transport (RIPT)' + WHEN format_code = 'urn:ihe:iti:bppc:2007' + THEN 'Basic Patient Privacy Consents' + WHEN format_code = 'urn:ihe:iti:bppc-sd:2007' + THEN 'Basic Patient Privacy Consents with Scanned Document' + WHEN format_code = 'urn:ihe:iti:xds-sd:pdf:2008' + THEN 'PDF embedded in CDA per XDS-SD profile' + WHEN format_code = 'urn:ihe:iti:xds-sd:text:2008' + THEN 'Text embedded in CDA per XDS-SD profile' + WHEN format_code = 'urn:ihe:iti:xdw:2011:workflowDoc' + THEN 'XDW Workflow Document' + WHEN format_code = 'urn:ihe:iti:dsg:detached:2014' + THEN 'DSG Detached Document' + WHEN format_code = 'urn:ihe:iti:dsg:enveloping:2014' + THEN 'DSG Enveloping Document' + WHEN format_code = 'urn:ihe:iti:appc:2016:consent' + THEN 'Advanced Patient Privacy Consents' + WHEN format_code = 'urn:ihe:iti:xds:2017:mimeTypeSufficient' + THEN 'mimeType Sufficient' + WHEN format_code = 'urn:ihe:lab:xd-lab:2008' + THEN 'CDA Laboratory Report' + WHEN format_code = 'urn:ihe:rad:TEXT' + THEN 'Radiology XDS-I Text' + WHEN format_code = 'urn:ihe:rad:PDF' + THEN 'Radiology XDS-I PDF' + WHEN format_code = 'urn:ihe:rad:CDA:ImagingReportStructuredHeadings:2013' + THEN 'Radiology XDS-I Structured CDA' + WHEN format_code = 'urn:ihe:card:imaging:2011' + THEN 'Cardiac Imaging Report' + WHEN format_code = 'urn:ihe:card:CRC:2012' + THEN 'Cardiology CRC' + WHEN format_code = 'urn:ihe:card:EPRC-IE:2014' + THEN 'Cardiology EPRC-IE' + WHEN format_code = 'urn:ihe:dent:TEXT' + THEN 'Dental Text' + WHEN format_code = 'urn:ihe:dent:PDF' + THEN 'Dental PDF' + WHEN format_code = 'urn:ihe:dent:CDA:ImagingReportStructuredHeadings:2013' + THEN 'Dental CDA' + WHEN format_code = 'urn:ihe:pat:apsr:all:2010' + THEN 'Anatomic Pathology Structured Report All' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:all:2010' + THEN 'Anatomic Pathology Structured Report Cancer All' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:breast:2010' + THEN 'Anatomic Pathology Structured Report Cancer Breast' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:colon:2010' + THEN 'Anatomic Pathology Structured Report Cancer Colon' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:prostate:2010' + THEN 'Anatomic Pathology Structured Report Cancer Prostate' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:thyroid:2010' + THEN 'Anatomic Pathology Structured Report Cancer Thyroid' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:lung:2010' + THEN 'Anatomic Pathology Structured Report Cancer Lung' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:skin:2010' + THEN 'Anatomic Pathology Structured Report Cancer Skin' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:kidney:2010' + THEN 'Anatomic Pathology Structured Report Cancer Kidney' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:cervix:2010' + THEN 'Anatomic Pathology Structured Report Cancer Cervix' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:endometrium:2010' + THEN 'Anatomic Pathology Structured Report Cancer Endometrium' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:ovary:2010' + THEN 'Anatomic Pathology Structured Report Cancer Ovary' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:esophagus:2010' + THEN 'Anatomic Pathology Structured Report Cancer Esophagus' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:stomach:2010' + THEN 'Anatomic Pathology Structured Report Cancer Stomach' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:liver:2010' + THEN 'Anatomic Pathology Structured Report Cancer Liver' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:pancreas:2010' + THEN 'Anatomic Pathology Structured Report Cancer Pancreas' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:testis:2010' + THEN 'Anatomic Pathology Structured Report Cancer Testis' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:urinary_bladder:2010' + THEN 'Anatomic Pathology Structured Report Cancer Urinary Bladder' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:lip_oral_cavity:2010' + THEN 'Anatomic Pathology Structured Report Cancer Lip Oral Cavity' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:pharynx:2010' + THEN 'Anatomic Pathology Structured Report Cancer Pharynx' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:salivary_gland:2010' + THEN 'Anatomic Pathology Structured Report Cancer Salivary Gland' + WHEN format_code = 'urn:ihe:pat:apsr:cancer:larynx:2010' + THEN 'Anatomic Pathology Structured Report Cancer Larynx' + WHEN format_code = 'urn:ihe:pharm:pre:2010' + THEN 'Pharmacy Pre' + WHEN format_code = 'urn:ihe:pharm:padv:2010' + THEN 'Pharmacy PADV' + WHEN format_code = 'urn:ihe:pharm:dis:2010' + THEN 'Pharmacy DIS' + WHEN format_code = 'urn:ihe:pharm:pml:2013' + THEN 'Pharmacy PML' + WHEN format_code = 'urn:hl7-org:sdwg:ccda-structuredBody:1.1' + THEN 'For documents following C-CDA 1.1 constraints using a structured body.' + WHEN format_code = 'urn:hl7-org:sdwg:ccda-nonXMLBody:1.1' + THEN 'For documents following C-CDA 1.1 constraints using a non structured body.' + WHEN format_code = 'urn:hl7-org:sdwg:ccda-structuredBody:2.1' + THEN 'For documents following C-CDA 2.1 constraints using a structured body.' + WHEN format_code = 'urn:hl7-org:sdwg:ccda-nonXMLBody:2.1' + THEN 'For documents following C-CDA 2.1 constraints using a non structured body.' + ELSE NULL -- this system binding only holds the above values, ignore unexpected codes + END AS display + + FROM attachments + -- This system binding is marked as extensible, but for the sake of reliably providing + -- consinstent & readable display names, we'll only support the preferred system for now. + WHERE format_system = 'http://ihe.net/fhir/ValueSet/IHE.FormatCode.codesystem' +), + +simplified AS ( + SELECT + id, + CAST(row AS VARCHAR) AS row, + {{ utils.coalesce_missing('status') }} AS status, + {{ utils.coalesce_missing('content_type') }} AS content_type, + {{ utils.coalesce_missing('language') }} AS language, + {{ utils.coalesce_missing('formats.display') }} AS format, + CASE + WHEN has_data and has_url + THEN 'Embedded and external' + WHEN has_data + THEN 'Embedded' + WHEN has_url + THEN 'External' + ELSE 'Not available' + END AS content_location + + FROM attachments + LEFT JOIN formats USING(id, row) +) + +{% call utils.make_counts('simplified', output_mode, unique_ids=['id', 'row']) %} + status, + content_type, + language, + format, + content_location +{% endcall %} +); \ No newline at end of file diff --git a/cumulus_library_data_metrics/c_attachment_count/c_attachment_count.py b/cumulus_library_data_metrics/c_attachment_count/c_attachment_count.py new file mode 100644 index 0000000..00f828d --- /dev/null +++ b/cumulus_library_data_metrics/c_attachment_count/c_attachment_count.py @@ -0,0 +1,26 @@ +"""Module for generating c_attachment_count tables""" + +from typing import ClassVar + +import cumulus_library + +from cumulus_library_data_metrics import resource_info +from cumulus_library_data_metrics.base import MetricMixin + + +# Survey of attachments themselves +# e.g. How many attachments have extra info like language and format? +class AttachmentsCountBuilder(MetricMixin, cumulus_library.BaseTableBuilder): + name = "c_attachment_count" + uses_fields: ClassVar[dict] = { + "DiagnosticReport": { + **resource_info.DIAGNOSTIC_REPORT_ATTACHMENT_SCHEMA, + }, + "DocumentReference": { + **resource_info.DOCREF_ATTACHMENT_SCHEMA, + }, + } + + def add_metric_queries(self) -> None: + self.queries.append(self.render_sql(self.name, src="DiagnosticReport")) + self.queries.append(self.render_sql(self.name, src="DocumentReference")) diff --git a/cumulus_library_data_metrics/c_content_type_use/README.md b/cumulus_library_data_metrics/c_content_type_use/README.md new file mode 100644 index 0000000..2b35035 --- /dev/null +++ b/cumulus_library_data_metrics/c_content_type_use/README.md @@ -0,0 +1,24 @@ +# c_content_type_use + +**Count of resources by attachment content types** + +### Fields + +- status +- doc_status +- year +- type +- content_types + +### Notes on `doc_status` + +This field is always `cumulus__none` on resources that don't support it, +like `DiagnosticReport`. + +### Notes on `type` + +This field pulls from different FHIR fields depending on the resource. +It holds `DiagnosticReport.code` and `DocumentReference.type`. + +These type values are not yet converted to display values. +They are just presented as the raw `code` value. diff --git a/cumulus_library_data_metrics/c_content_type_use/c_content_type_use.jinja b/cumulus_library_data_metrics/c_content_type_use/c_content_type_use.jinja new file mode 100644 index 0000000..46a0bba --- /dev/null +++ b/cumulus_library_data_metrics/c_content_type_use/c_content_type_use.jinja @@ -0,0 +1,77 @@ +{% import 'utils.jinja' as utils %} +{% import 'attachment_utils.jinja' as attachment_utils %} + +CREATE TABLE {{ study_prefix }}__count_c_content_type_use_{{ src|lower }} AS ( +WITH + +attachments AS {{ attachment_utils.extract_attachments(src, schema) }}, + +simplified_attachments AS ( + SELECT + id, + row, + + -- Null out the content type if neither URL or data are present. + -- Per the spec (https://www.hl7.org/fhir/R4/datatypes.html#Attachment, att-1 constraint), + -- that means a positive assertion that no content is available for the given content_type + -- and language. So let's not report it in the available content types. + CASE + WHEN has_data OR has_url + THEN content_type + ELSE NULL + END AS content_type + + FROM attachments +), + +content_types AS ( + SELECT + id, + + {% call utils.array_to_string() %} + ARRAY_AGG(content_type) + {% endcall %} AS content_types + + FROM simplified_attachments + GROUP BY id +), + +{% if src == "DiagnosticReport" %} +{# LOINC is an extensible binding for both DiagnosticReport US Core profiles #} +types AS {{ utils.extract_codes(src, 'code', [ + 'http://loinc.org', +]) }}, +{% elif src == "DocumentReference" %} +{# These two systems are required bindings for US Core #} +types AS {{ utils.extract_codes(src, 'type', [ + 'http://loinc.org', + 'http://terminology.hl7.org/CodeSystem/v3-NullFlavor', +]) }}, +{% endif %} + +simplified AS ( + SELECT + id, + + {{ utils.coalesce_missing('status') }} AS status, +{% if src == "DocumentReference" %} + {{ utils.coalesce_missing('docStatus') }} AS doc_status, +{% else %} + 'cumulus__none' AS doc_status, +{% endif %} + {{ utils.get_date_string(dates, 'year') }} AS "year", + {{ utils.array_to_string('types.codes') }} AS type, + {{ utils.coalesce_missing('content_types.content_types') }} as content_types + FROM {{ src }} AS src + LEFT JOIN content_types USING (id) + LEFT JOIN types USING (id) +) + +{% call utils.make_counts('simplified', output_mode) %} + status, + doc_status, + "year", + type, + content_types +{% endcall %} +); \ No newline at end of file diff --git a/cumulus_library_data_metrics/c_content_type_use/c_content_type_use.py b/cumulus_library_data_metrics/c_content_type_use/c_content_type_use.py new file mode 100644 index 0000000..e38b7f1 --- /dev/null +++ b/cumulus_library_data_metrics/c_content_type_use/c_content_type_use.py @@ -0,0 +1,25 @@ +"""Module for generating c_content_type_use tables""" + +from typing import ClassVar + +import cumulus_library + +from cumulus_library_data_metrics import resource_info +from cumulus_library_data_metrics.base import MetricMixin + + +# Survey of content types available for each resource +class ContentTypeUseBuilder(MetricMixin, cumulus_library.BaseTableBuilder): + name = "c_content_type_use" + uses_fields: ClassVar[dict] = { + "DiagnosticReport": { + **resource_info.DIAGNOSTIC_REPORT_ATTACHMENT_SCHEMA, + }, + "DocumentReference": { + **resource_info.DOCREF_ATTACHMENT_SCHEMA, + }, + } + + def add_metric_queries(self) -> None: + self.queries.append(self.render_sql(self.name, src="DiagnosticReport")) + self.queries.append(self.render_sql(self.name, src="DocumentReference")) diff --git a/cumulus_library_data_metrics/manifest.toml b/cumulus_library_data_metrics/manifest.toml index 6f87787..2d9a9f2 100644 --- a/cumulus_library_data_metrics/manifest.toml +++ b/cumulus_library_data_metrics/manifest.toml @@ -4,6 +4,8 @@ dynamic_study_prefix = "gen_prefix.py" [file_config] file_names = [ "meta/meta.py", + "c_attachment_count/c_attachment_count.py", + "c_content_type_use/c_content_type_use.py", "c_pt_count/c_pt_count.py", "c_pt_deceased_count/c_pt_deceased_count.py", "c_resource_count/c_resource_count.py", @@ -19,6 +21,10 @@ file_names = [ [export_config] count_list = [ + "count_c_attachment_count_diagnosticreport", + "count_c_attachment_count_documentreference", + "count_c_content_type_use_diagnosticreport", + "count_c_content_type_use_documentreference", "count_c_pt_count", "count_c_pt_deceased_count", "count_c_resource_count_allergyintolerance_month", diff --git a/cumulus_library_data_metrics/resource_info.py b/cumulus_library_data_metrics/resource_info.py index f6afd82..9302bba 100644 --- a/cumulus_library_data_metrics/resource_info.py +++ b/cumulus_library_data_metrics/resource_info.py @@ -111,3 +111,45 @@ "Patient", "Procedure", } + +# Use these in your metrics' "uses_fields" section when you want to inspect attachments. +# Usually with docref_utils.extract_attachments(). +# Use like: +# uses_fields: ClassVar[dict] = { +# "DiagnosticReport": { +# **DIAGNOSTIC_REPORT_ATTACHMENT_SCHEMA, +# }, +# "DocumentReference": { +# **DOCREF_ATTACHMENT_SCHEMA, +# }, +# } +ATTACHMENT_SCHEMA = { + "contentType": {}, + "language": {}, + "data": {}, + "_data": { + "extension": { + "url": {}, + "valueCode": {}, + }, + }, + "url": {}, + "_url": { + "extension": { + "url": {}, + "valueCode": {}, + }, + }, +} +DIAGNOSTIC_REPORT_ATTACHMENT_SCHEMA = { + "presentedForm": ATTACHMENT_SCHEMA, +} +DOCREF_ATTACHMENT_SCHEMA = { + "content": { + "attachment": ATTACHMENT_SCHEMA, + "format": { + "system": {}, + "code": {}, + }, + }, +} diff --git a/cumulus_library_data_metrics/utils.jinja b/cumulus_library_data_metrics/utils.jinja index adb1951..558eb01 100644 --- a/cumulus_library_data_metrics/utils.jinja +++ b/cumulus_library_data_metrics/utils.jinja @@ -22,6 +22,16 @@ {%- endif %} {%- endmacro %} + +{%- macro nullable(schema_val, field, alias) -%} + {% if schema_val %} + {{ field }} AS {{ alias }} + {% else %} + CAST(NULL AS VARCHAR) AS {{ alias }} + {% endif %} +{%- endmacro -%} + + -- Extracts the codes from a codeableConcept and returns a select with (id, codes). {% macro extract_codes(src, src_field, system=null, is_array=false) -%} ( @@ -206,10 +216,15 @@ -- Abstracts whether we're doing a CUBE call or a simple GROUP BY. -- Designed to be used as a call block. -{% macro make_counts(src_table, mode) -%} +{% macro make_counts(src_table, mode, unique_ids=null) -%} {% set cols = caller() %} +{% set unique_ids = unique_ids or ['id'] %} SELECT - COUNT(DISTINCT id) AS cnt, +{% if unique_ids|length > 1 %} + COUNT(DISTINCT CONCAT({{ ", ':', ".join(unique_ids) }})) AS cnt, +{% else %} + COUNT(DISTINCT {{ unique_ids[0] }}) AS cnt, +{% endif %} {{ cols }} FROM {{ src_table }} {% if mode == "cube" %} diff --git a/tests/data/c_attachment_count/general/diagreports.ndjson b/tests/data/c_attachment_count/general/diagreports.ndjson new file mode 100644 index 0000000..5b987ba --- /dev/null +++ b/tests/data/c_attachment_count/general/diagreports.ndjson @@ -0,0 +1,6 @@ +{"resourceType": "DiagnosticReport", "id": "everything", "status": "everything", "presentedForm": [{"contentType": "text/html", "language": "en-US", "data": "xxx", "url": "xxx"}]} +{"resourceType": "DiagnosticReport", "id": "multiple-attachments", "presentedForm": [{"contentType": "text/plain", "language": "ja"}, {"contentType": "text/plain", "language": "zh-HK"}]} +{"resourceType": "DiagnosticReport", "id": "redacted-data", "status": "redacted-data", "presentedForm": [{"_data": {"extension": [{"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", "valueCode": "masked"}]}}]} +{"resourceType": "DiagnosticReport", "id": "redacted-url", "status": "redacted-url", "presentedForm": [{"_url": {"extension": [{"url": "blarg", "valueCode": "blarg"}, {"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", "valueCode": "masked"}]}}]} +{"resourceType": "DiagnosticReport", "id": "missing-data", "status": "missing-data", "presentedForm": [{"_data": {"extension": [{"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", "valueCode": "unknown"}]}}]} +{"resourceType": "DiagnosticReport", "id": "nothing"} diff --git a/tests/data/c_attachment_count/general/docs.ndjson b/tests/data/c_attachment_count/general/docs.ndjson new file mode 100644 index 0000000..1cad91e --- /dev/null +++ b/tests/data/c_attachment_count/general/docs.ndjson @@ -0,0 +1,7 @@ +{"resourceType": "DocumentReference", "id": "everything", "status": "everything", "content": [{"attachment": {"contentType": "text/html", "language": "en-US", "data": "xxx", "url": "xxx"}, "format": {"code": "urn:ihe:pcc:aps:2007", "system": "http://ihe.net/fhir/ValueSet/IHE.FormatCode.codesystem"}}]} +{"resourceType": "DocumentReference", "id": "unknown-format", "status": "unknown-format", "content": [{"format": {"code": "blarg", "system": "http://ihe.net/fhir/ValueSet/IHE.FormatCode.codesystem"}}]} +{"resourceType": "DocumentReference", "id": "multiple-attachments", "content": [{"attachment": {"contentType": "text/plain", "language": "ja"}}, {"attachment": {"contentType": "text/plain", "language": "zh-HK"}}]} +{"resourceType": "DocumentReference", "id": "redacted-data", "status": "redacted-data", "content": [{"attachment": {"_data": {"extension": [{"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", "valueCode": "masked"}]}}}]} +{"resourceType": "DocumentReference", "id": "redacted-url", "status": "redacted-url", "content": [{"attachment": {"_url": {"extension": [{"url": "blarg", "valueCode": "blarg"}, {"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", "valueCode": "masked"}]}}}]} +{"resourceType": "DocumentReference", "id": "missing-data", "status": "missing-data", "content": [{"attachment": {"_data": {"extension": [{"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason", "valueCode": "unknown"}]}}}]} +{"resourceType": "DocumentReference", "id": "nothing"} diff --git a/tests/data/c_attachment_count/general/expected_diagnosticreport.csv b/tests/data/c_attachment_count/general/expected_diagnosticreport.csv new file mode 100644 index 0000000..244f9c0 --- /dev/null +++ b/tests/data/c_attachment_count/general/expected_diagnosticreport.csv @@ -0,0 +1,7 @@ +cnt,status,content_type,language,format,content_location +1,redacted-url,cumulus__none,cumulus__none,cumulus__none,External +1,redacted-data,cumulus__none,cumulus__none,cumulus__none,Embedded +1,missing-data,cumulus__none,cumulus__none,cumulus__none,Not available +1,everything,text/html,en-US,cumulus__none,Embedded and external +1,cumulus__none,text/plain,zh-HK,cumulus__none,Not available +1,cumulus__none,text/plain,ja,cumulus__none,Not available diff --git a/tests/data/c_attachment_count/general/expected_documentreference.csv b/tests/data/c_attachment_count/general/expected_documentreference.csv new file mode 100644 index 0000000..25eaa22 --- /dev/null +++ b/tests/data/c_attachment_count/general/expected_documentreference.csv @@ -0,0 +1,8 @@ +cnt,status,content_type,language,format,content_location +1,unknown-format,cumulus__none,cumulus__none,cumulus__none,Not available +1,redacted-url,cumulus__none,cumulus__none,cumulus__none,External +1,redacted-data,cumulus__none,cumulus__none,cumulus__none,Embedded +1,missing-data,cumulus__none,cumulus__none,cumulus__none,Not available +1,everything,text/html,en-US,IHE Antepartum Summary,Embedded and external +1,cumulus__none,text/plain,zh-HK,cumulus__none,Not available +1,cumulus__none,text/plain,ja,cumulus__none,Not available diff --git a/tests/data/c_attachment_count/low-schema/diagreports.ndjson b/tests/data/c_attachment_count/low-schema/diagreports.ndjson new file mode 100644 index 0000000..1e41cc1 --- /dev/null +++ b/tests/data/c_attachment_count/low-schema/diagreports.ndjson @@ -0,0 +1 @@ +{"resourceType": "DiagnosticReport", "id": "nothing"} diff --git a/tests/data/c_attachment_count/low-schema/docs.ndjson b/tests/data/c_attachment_count/low-schema/docs.ndjson new file mode 100644 index 0000000..b9aef4e --- /dev/null +++ b/tests/data/c_attachment_count/low-schema/docs.ndjson @@ -0,0 +1 @@ +{"resourceType": "DocumentReference", "id": "nothing"} diff --git a/tests/data/c_attachment_count/low-schema/expected_diagnosticreport.csv b/tests/data/c_attachment_count/low-schema/expected_diagnosticreport.csv new file mode 100644 index 0000000..10ca9f7 --- /dev/null +++ b/tests/data/c_attachment_count/low-schema/expected_diagnosticreport.csv @@ -0,0 +1 @@ +cnt,status,content_type,language,format,content_location diff --git a/tests/data/c_attachment_count/low-schema/expected_documentreference.csv b/tests/data/c_attachment_count/low-schema/expected_documentreference.csv new file mode 100644 index 0000000..10ca9f7 --- /dev/null +++ b/tests/data/c_attachment_count/low-schema/expected_documentreference.csv @@ -0,0 +1 @@ +cnt,status,content_type,language,format,content_location diff --git a/tests/data/c_content_type_use/general/diagreports.ndjson b/tests/data/c_content_type_use/general/diagreports.ndjson new file mode 100644 index 0000000..113ab6b --- /dev/null +++ b/tests/data/c_content_type_use/general/diagreports.ndjson @@ -0,0 +1,6 @@ +{"resourceType": "DiagnosticReport", "id": "basic", "status": "basic", "issued": "2019-10-11", "code": {"coding": [{"system": "http://loinc.org", "code": "10418-2"}]}, "presentedForm": [{"contentType": "text/html; charset=utf8", "data": "xxx"}]} +{"resourceType": "DiagnosticReport", "id": "multiple-content-types", "status": "multiple-content-types", "presentedForm": [{"contentType": "text/plain", "url": "xxx"}, {"contentType": "text/html", "data": "xxx"}]} +{"resourceType": "DiagnosticReport", "id": "no-content", "status": "no-content", "presentedForm": [{"contentType": "text/with-data", "data": "xxx"}, {"contentType": "text/no-data"}]} +{"resourceType": "DiagnosticReport", "id": "multiple-types", "status": "multiple-types", "code": {"coding": [{"system": "http://loinc.org", "code": "10458-8"}, {"system": "http://loinc.org", "code": "10877-9"}]}} +{"resourceType": "DiagnosticReport", "id": "unknown-type", "status": "unknown-type", "code": {"coding": [{"system": "heyo", "code": "bye"}]}} +{"resourceType": "DiagnosticReport", "id": "nothing"} diff --git a/tests/data/c_content_type_use/general/docs.ndjson b/tests/data/c_content_type_use/general/docs.ndjson new file mode 100644 index 0000000..74d980c --- /dev/null +++ b/tests/data/c_content_type_use/general/docs.ndjson @@ -0,0 +1,6 @@ +{"resourceType": "DocumentReference", "id": "basic", "status": "basic", "docStatus": "amended", "date": "2012-01-02", "type": {"coding": [{"system": "http://loinc.org", "code": "24538-1"}]}, "content": [{"attachment": {"contentType": "text/html; charset=utf8", "data": "xxx"}}]} +{"resourceType": "DocumentReference", "id": "multiple-content-types", "status": "multiple-content-types", "content": [{"attachment": {"contentType": "text/plain", "data": "xxx"}}, {"attachment": {"contentType": "text/HTML", "url": "xxx"}}]} +{"resourceType": "DocumentReference", "id": "no-content", "status": "no-content", "content": [{"attachment": {"contentType": "text/with-data", "data": "xxx"}}, {"attachment": {"contentType": "text/no-data"}}]} +{"resourceType": "DocumentReference", "id": "multiple-types", "status": "multiple-types", "type": {"coding": [{"code": "11488-4", "system": "http://loinc.org"}, {"code": "UNK", "system": "http://terminology.hl7.org/CodeSystem/v3-NullFlavor"}]}} +{"resourceType": "DocumentReference", "id": "unknown-type", "status": "unknown-type", "type": {"coding": [{"code": "18825-0", "system": "http://loinc.org"}, {"code": "foo", "system": "blarg"}]}} +{"resourceType": "DocumentReference", "id": "nothing"} diff --git a/tests/data/c_content_type_use/general/expected_diagnosticreport.csv b/tests/data/c_content_type_use/general/expected_diagnosticreport.csv new file mode 100644 index 0000000..a421ee1 --- /dev/null +++ b/tests/data/c_content_type_use/general/expected_diagnosticreport.csv @@ -0,0 +1,7 @@ +cnt,status,doc_status,year,type,content_types +1,unknown-type,cumulus__none,cumulus__none,cumulus__none,cumulus__none +1,no-content,cumulus__none,cumulus__none,cumulus__none,text/with-data +1,multiple-types,cumulus__none,cumulus__none,10458-8; 10877-9,cumulus__none +1,multiple-content-types,cumulus__none,cumulus__none,cumulus__none,text/html; text/plain +1,cumulus__none,cumulus__none,cumulus__none,cumulus__none,cumulus__none +1,basic,cumulus__none,2019,10418-2,text/html diff --git a/tests/data/c_content_type_use/general/expected_documentreference.csv b/tests/data/c_content_type_use/general/expected_documentreference.csv new file mode 100644 index 0000000..37618fc --- /dev/null +++ b/tests/data/c_content_type_use/general/expected_documentreference.csv @@ -0,0 +1,7 @@ +cnt,status,doc_status,year,type,content_types +1,unknown-type,cumulus__none,cumulus__none,18825-0,cumulus__none +1,no-content,cumulus__none,cumulus__none,cumulus__none,text/with-data +1,multiple-types,cumulus__none,cumulus__none,11488-4; UNK,cumulus__none +1,multiple-content-types,cumulus__none,cumulus__none,cumulus__none,text/html; text/plain +1,cumulus__none,cumulus__none,cumulus__none,cumulus__none,cumulus__none +1,basic,amended,2012,24538-1,text/html diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 989da8f..ddf1835 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -20,6 +20,13 @@ class MetricsTestCase(unittest.TestCase): def test_meta(self): self.run_study("meta") + def test_c_attachment_count(self): + self.run_study("c_attachment_count", prefix="count_") + self.run_study("c_attachment_count", prefix="count_", test="low-schema") + + def test_c_content_type_use(self): + self.run_study("c_content_type_use", prefix="count_") + def test_c_pt_count(self): self.run_study("c_pt_count", prefix="count_")