-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Discovery study * Schema validation, code def cleanup
- Loading branch information
1 parent
73f3b1d
commit cd3b776
Showing
11 changed files
with
483 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# A collection of codes & codeableConcepts to extract available codes from. | ||
# Two optional booleans are available for use: | ||
# - is_array: the field in question is an array of CodeableConcepts | ||
# - is_bare_coding: the field in question is a Coding not wrapped in concepts | ||
# - otherwise, it is assumed to be a 0..1 or 1..1 CodeableConcept | ||
# TODO: if another state is needed, move to an Enum | ||
|
||
code_list = [ | ||
# Condition | ||
{"table_name": "condition", "column_name": "category", "is_array": True}, | ||
{ | ||
"table_name": "condition", | ||
"column_name": "code", | ||
}, | ||
# DocumentReference | ||
{ | ||
"table_name": "documentreference", | ||
"column_name": "type", | ||
}, | ||
{"table_name": "documentreference", "column_name": "category", "is_array": True}, | ||
# Encounter | ||
{ | ||
"table_name": "encounter", | ||
"column_name": "class", | ||
"is_bare_coding": True, | ||
}, | ||
{ | ||
"table_name": "encounter", | ||
"column_name": "type", | ||
"is_array": True, | ||
}, | ||
{ | ||
"table_name": "encounter", | ||
"column_name": "servicetype", | ||
}, | ||
{ | ||
"table_name": "encounter", | ||
"column_name": "priority", | ||
}, | ||
{"table_name": "encounter", "column_name": "reasoncode", "is_array": True}, | ||
# Medication | ||
{ | ||
"table_name": "medication", | ||
"column_name": "code", | ||
}, | ||
# Observation | ||
{"table_name": "observation", "column_name": "category", "is_array": True}, | ||
{ | ||
"table_name": "observation", | ||
"column_name": "code", | ||
}, | ||
# Patient | ||
{ | ||
"table_name": "patient", | ||
"column_name": "maritalstatus", | ||
}, | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
""" Module for generating encounter codeableConcept table""" | ||
|
||
from cumulus_library.base_table_builder import BaseTableBuilder | ||
from cumulus_library.helper import get_progress_bar, query_console_output | ||
from cumulus_library.template_sql.templates import get_code_system_pairs | ||
from cumulus_library.template_sql.utils import ( | ||
is_codeable_concept_array_populated, | ||
is_codeable_concept_populated, | ||
is_code_populated, | ||
) | ||
|
||
from cumulus_library.studies.discovery.code_definitions import code_list | ||
|
||
|
||
class CodeDetectionBuilder(BaseTableBuilder): | ||
display_text = "Selecting unique code systems..." | ||
|
||
def _check_codes_in_fields(self, code_sources: list[dict], schema, cursor) -> dict: | ||
"""checks if Coding/CodeableConcept fields are present and populated""" | ||
|
||
with get_progress_bar() as progress: | ||
task = progress.add_task( | ||
"Discovering available coding systems...", | ||
total=len(code_sources), | ||
) | ||
for code_source in code_sources: | ||
if code_source["is_array"]: | ||
code_source["has_data"] = is_codeable_concept_array_populated( | ||
schema, | ||
code_source["table_name"], | ||
code_source["column_name"], | ||
cursor, | ||
allow_partial=False, | ||
) | ||
elif code_source["is_bare_coding"]: | ||
code_source["has_data"] = is_code_populated( | ||
schema, | ||
code_source["table_name"], | ||
code_source["column_name"], | ||
cursor, | ||
allow_partial=False, | ||
) | ||
else: | ||
code_source["has_data"] = is_codeable_concept_populated( | ||
schema, | ||
code_source["table_name"], | ||
code_source["column_name"], | ||
cursor, | ||
allow_partial=False, | ||
) | ||
progress.advance(task) | ||
return code_sources | ||
|
||
def prepare_queries(self, cursor: object, schema: str): | ||
"""Constructs queries related to condition codeableConcept | ||
:param cursor: A database cursor object | ||
:param schema: the schema/db name, matching the cursor | ||
""" | ||
|
||
code_sources = [] | ||
for code_definition in code_list: | ||
if any( | ||
x not in code_definition.keys() for x in ["table_name", "column_name"] | ||
): | ||
raise KeyError( | ||
"Expected table_name and column_name keys in " | ||
f"{str(code_definition)}" | ||
) | ||
code_source = { | ||
"is_bare_coding": False, | ||
"is_array": False, | ||
"has_data": False, | ||
} | ||
for key in code_definition.keys(): | ||
code_source[key] = code_definition[key] | ||
code_sources.append(code_source) | ||
|
||
code_sources = self._check_codes_in_fields(code_sources, schema, cursor) | ||
query = get_code_system_pairs("discovery__code_sources", code_sources) | ||
self.queries.append(query) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
-- noqa: disable=all | ||
/* | ||
This is a reference output of the SQL generated by builder_code_detection.py | ||
<<<<<<< HEAD | ||
that is used by the core__encounter_type table, against the synthea dataset. | ||
It is provided as a form of documentation only and will not be invoked directly. | ||
*/ | ||
|
||
CREATE TABLE discovery__code_sources AS | ||
SELECT DISTINCT | ||
'condition' AS table_name, | ||
'category' AS column_name, | ||
t2.row2.code, | ||
t2.row2.display, | ||
t2.row2.system | ||
FROM condition, | ||
UNNEST(category) AS t1 (row1), | ||
UNNEST(t1.row1.coding) AS t2 (row2) | ||
UNION | ||
SELECT DISTINCT | ||
'condition' AS table_name, | ||
'code' AS column_name, | ||
t.row.code, | ||
t.row.display, | ||
t.row.system | ||
FROM condition, | ||
UNNEST(code.coding) AS t (row) | ||
UNION | ||
SELECT DISTINCT | ||
'documentreference' AS table_name, | ||
'type' AS column_name, | ||
t.row.code, | ||
t.row.display, | ||
t.row.system | ||
FROM documentreference, | ||
UNNEST(type.coding) AS t (row) | ||
UNION | ||
SELECT DISTINCT | ||
'documentreference' AS table_name, | ||
'category' AS column_name, | ||
t2.row2.code, | ||
t2.row2.display, | ||
t2.row2.system | ||
FROM documentreference, | ||
UNNEST(category) AS t1 (row1), | ||
UNNEST(t1.row1.coding) AS t2 (row2) | ||
UNION | ||
SELECT * | ||
FROM ( | ||
VALUES ( | ||
('encounter','class', '', '', '') | ||
) | ||
) | ||
AS t ( table_name, column_name, code, display, system ) -- noqa: L025 | ||
UNION | ||
SELECT DISTINCT | ||
'encounter' AS table_name, | ||
'type' AS column_name, | ||
t2.row2.code, | ||
t2.row2.display, | ||
t2.row2.system | ||
FROM encounter, | ||
UNNEST(type) AS t1 (row1), | ||
UNNEST(t1.row1.coding) AS t2 (row2) | ||
UNION | ||
SELECT * | ||
FROM ( | ||
VALUES ( | ||
('encounter','servicetype', '', '', '') | ||
) | ||
) | ||
AS t ( table_name, column_name, code, display, system ) -- noqa: L025 | ||
UNION | ||
SELECT * | ||
FROM ( | ||
VALUES ( | ||
('encounter','priority', '', '', '') | ||
) | ||
) | ||
AS t ( table_name, column_name, code, display, system ) -- noqa: L025 | ||
UNION | ||
SELECT DISTINCT | ||
'encounter' AS table_name, | ||
'reasoncode' AS column_name, | ||
t2.row2.code, | ||
t2.row2.display, | ||
t2.row2.system | ||
FROM encounter, | ||
UNNEST(reasoncode) AS t1 (row1), | ||
UNNEST(t1.row1.coding) AS t2 (row2) | ||
UNION | ||
SELECT DISTINCT | ||
'medication' AS table_name, | ||
'code' AS column_name, | ||
t.row.code, | ||
t.row.display, | ||
t.row.system | ||
FROM medication, | ||
UNNEST(code.coding) AS t (row) | ||
UNION | ||
SELECT DISTINCT | ||
'observation' AS table_name, | ||
'category' AS column_name, | ||
t2.row2.code, | ||
t2.row2.display, | ||
t2.row2.system | ||
FROM observation, | ||
UNNEST(category) AS t1 (row1), | ||
UNNEST(t1.row1.coding) AS t2 (row2) | ||
UNION | ||
SELECT DISTINCT | ||
'observation' AS table_name, | ||
'code' AS column_name, | ||
t.row.code, | ||
t.row.display, | ||
t.row.system | ||
FROM observation, | ||
UNNEST(code.coding) AS t (row) | ||
UNION | ||
SELECT DISTINCT | ||
'patient' AS table_name, | ||
'maritalstatus' AS column_name, | ||
t.row.code, | ||
t.row.display, | ||
t.row.system | ||
FROM patient, | ||
UNNEST(maritalstatus.coding) AS t (row) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
study_prefix = "discovery" | ||
|
||
[table_builder_config] | ||
file_names = [ | ||
"code_detection.py", | ||
] | ||
|
||
|
||
[export_config] | ||
export_list = [ | ||
"discovery__code_sources", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
CREATE TABLE {{ output_table_name }} AS | ||
{%- for source in code_system_tables %} | ||
{%- if source.has_data %} | ||
{%- if source.is_bare_coding %} | ||
SELECT DISTINCT | ||
'{{ source.table_name }}' AS table_name, | ||
'{{ source.column_name }}' AS column_name, | ||
{{ source.column_name }}.code, | ||
{{ source.column_name }}.display, | ||
{{ source.column_name }}.system | ||
FROM {{ source.table_name }} | ||
{%- elif source.is_array %} | ||
SELECT DISTINCT | ||
'{{ source.table_name }}' AS table_name, | ||
'{{ source.column_name }}' AS column_name, | ||
t2.row2.code, | ||
t2.row2.display, | ||
t2.row2.system | ||
FROM {{ source.table_name }}, | ||
UNNEST({{ source.column_name }}) AS t1 (row1), | ||
UNNEST(t1.row1.coding) AS t2 (row2) | ||
{%- else %} | ||
SELECT DISTINCT | ||
'{{ source.table_name }}' AS table_name, | ||
'{{ source.column_name }}' AS column_name, | ||
t.row.code, | ||
t.row.display, | ||
t.row.system | ||
FROM {{ source.table_name }}, | ||
UNNEST({{source.column_name}}.coding) AS t (row) | ||
{%- endif %} | ||
{%- else %} | ||
SELECT * | ||
FROM ( | ||
VALUES ( | ||
('{{ source.table_name }}','{{ source.column_name }}', '', '', '') | ||
) | ||
) | ||
AS t ( table_name, column_name, code, display, system ) -- noqa: L025 | ||
{%- endif -%} | ||
{%- if not loop.last %} | ||
UNION | ||
{%- endif -%} | ||
{% endfor %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.