Skip to content

Commit

Permalink
Updated countgen interface (#116)
Browse files Browse the repository at this point in the history
* Updated countgen interface

* tweaked docstring

* put default min_count in wrapper functions
  • Loading branch information
dogversioning authored Sep 5, 2023
1 parent 6317f33 commit b52ffb2
Show file tree
Hide file tree
Showing 10 changed files with 334 additions and 157 deletions.
3 changes: 3 additions & 0 deletions .sqlfluffignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ show_views.sql.jinja
# This is a temporary ignore due to time pressure - not sure of root cause,
# but the table in question builds
codeable_concept_denormalize.sql.jinja

# This is a common destination for debugging sql generation
output.sql
2 changes: 1 addition & 1 deletion cumulus_library/.sqlfluff
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ col_type_list = ["a string","b string"]
cc_columns = [{"name": "baz", "is_array": True}, {"name": "foobar", "is_array": False}]
cc_column = 'code'
column_name = 'bar'
cnt_encounter = False
conditions = ["1 > 0", "1 < 2"]
dataset = [["foo","foo"],["bar","bar"]]
ext_systems = ["omb", "text"]
field = 'column_name'
fhir_extension = fhir_extension
fhir_resource = patient
id = 'id'
medication_datasources = {"by_contained_ref" : True, "by_external_ref" : True}
prefix = Test
Expand Down
2 changes: 1 addition & 1 deletion cumulus_library/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""Package metadata"""
__version__ = "1.3.0"
__version__ = "1.3.1"
165 changes: 153 additions & 12 deletions cumulus_library/schema/counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@ def __init__(self, study_prefix: str = None):
super().__init__()

def get_table_name(self, table_name: str, duration=None) -> str:
"""Convenience method for constructing table name"""
"""Convenience method for constructing table name
:param table_name: table name to add after the study prefix
:param duration: a time period reflecting the table binning strategy
"""
if duration:
return f"{self.study_prefix}__{table_name}_{duration}"
else:
Expand All @@ -41,7 +45,12 @@ def get_table_name(self, table_name: str, duration=None) -> str:
def get_where_clauses(
self, clause: Union[list, str, None] = None, min_subject: int = 10
) -> str:
"""convenience method for constructing where clauses"""
"""Convenience method for constructing arbitrary where clauses.
:param clause: either a string or a list of sql where statements
:param min_subject: if clause is none, the bin size for a cnt_subject filter
(deprecated, use count_[fhir_resource](min_subject) instead)
"""
if clause is None:
return [f"cnt_subject >= {min_subject}"]
elif isinstance(clause, str):
Expand All @@ -54,45 +63,177 @@ def get_where_clauses(
def get_count_query(
self, table_name: str, source_table: str, table_cols: list, **kwargs
) -> str:
"""Wrapper method for generating a counts table from a template"""
"""Generates a counts table using a template
:param table_name: The name of the table to create. Must start with study prefix
:param source_table: The table to create counts data from
:param table_cols: The columns from the source table to add to the count table
:keyword where_clauses: An array of where clauses to use for filtering the data
:keyword min_subject: An integer setting the minimum bin size for inclusion
(default: 10)
:keyword fhir_resource: The type of FHIR resource to count (see
template_sql/templates.CountableFhirResource)
"""
if not table_name or not source_table or not table_cols:
raise CountsBuilderError(
"count_query missing required arguments. " f"output table: {table_name}"
)
for key in kwargs:
if key not in ["min_subject", "where_clauses", "cnt_encounter"]:
if key not in ["min_subject", "where_clauses", "fhir_resource"]:
raise CountsBuilderError(f"count_query received unexpected key: {key}")
return templates.get_count_query(table_name, source_table, table_cols, **kwargs)

def count_patient(
# ----------------------------------------------------------------------
# The following function all wrap get_count_query as convenience methods.
# We're not trying to be overly clever about this to persist the docstrings as the
# primary interface that study authors would see when using these functions.

def count_condition(
self,
table_name: str,
source_table: str,
table_cols: list,
where_clauses=None,
where_clauses: Union[list, None] = None,
min_subject: int = 10,
) -> str:
"""wrapper method for constructing patient counts tables"""
"""wrapper method for constructing condition counts tables
:param table_name: The name of the table to create. Must start with study prefix
:param source_table: The table to create counts data from
:param table_cols: The columns from the source table to add to the count table
:param where_clauses: An array of where clauses to use for filtering the data
:param min_subject: An integer setting the minimum bin size for inclusion
(default: 10)
"""
return self.get_count_query(
table_name, source_table, table_cols, where_clauses=where_clauses
table_name,
source_table,
table_cols,
where_clauses=where_clauses,
min_subject=min_subject,
fhir_resource="condition",
)

def count_document(
self,
table_name: str,
source_table: str,
table_cols: list,
where_clauses: Union[list, None] = None,
min_subject: int = 10,
) -> str:
"""wrapper method for constructing document counts tables
:param table_name: The name of the table to create. Must start with study prefix
:param source_table: The table to create counts data from
:param table_cols: The columns from the source table to add to the count table
:param where_clauses: An array of where clauses to use for filtering the data
:param min_subject: An integer setting the minimum bin size for inclusion
(default: 10)
"""
return self.get_count_query(
table_name,
source_table,
table_cols,
where_clauses=where_clauses,
min_subject=min_subject,
fhir_resource="document",
)

def count_encounter(
self, table_name: str, source_table: str, table_cols: list, where_clauses=None
self,
table_name: str,
source_table: str,
table_cols: list,
where_clauses: Union[list, None] = None,
min_subject: int = 10,
) -> str:
"""wrapper method for constructing encounter counts tables
:param table_name: The name of the table to create. Must start with study prefix
:param source_table: The table to create counts data from
:param table_cols: The columns from the source table to add to the count table
:param where_clauses: An array of where clauses to use for filtering the data
:param min_subject: An integer setting the minimum bin size for inclusion
(default: 10)
"""
return self.get_count_query(
table_name,
source_table,
table_cols,
where_clauses=where_clauses,
min_subject=min_subject,
fhir_resource="encounter",
)

def count_observation(
self,
table_name: str,
source_table: str,
table_cols: list,
where_clauses: Union[list, None] = None,
min_subject: int = 10,
) -> str:
"""wrapper method for constructing encounter counts tables"""
"""wrapper method for constructing observation counts tables
:param table_name: The name of the table to create. Must start with study prefix
:param source_table: The table to create counts data from
:param table_cols: The columns from the source table to add to the count table
:param where_clauses: An array of where clauses to use for filtering the data
:param min_subject: An integer setting the minimum bin size for inclusion
(default: 10)
"""
return self.get_count_query(
table_name,
source_table,
table_cols,
where_clauses=where_clauses,
cnt_encounter=True,
min_subject=min_subject,
fhir_resource="observation",
)

def count_patient(
self,
table_name: str,
source_table: str,
table_cols: list,
where_clauses: Union[list, None] = None,
min_subject: int = 10,
) -> str:
"""wrapper method for constructing patient counts tables
:param table_name: The name of the table to create. Must start with study prefix
:param source_table: The table to create counts data from
:param table_cols: The columns from the source table to add to the count table
:param where_clauses: An array of where clauses to use for filtering the data
:param min_subject: An integer setting the minimum bin size for inclusion
(default: 10)
"""
return self.get_count_query(
table_name,
source_table,
table_cols,
where_clauses=where_clauses,
min_subject=min_subject,
fhir_resource="patient",
)

# End of wrapper section
# ----------------------------------------------------------------------

def write_counts(self, filepath: str):
"""Convenience method for writing counts queries to disk"""
"""Convenience method for writing counts queries to disk
:param filepath: path to file to write queries out to.
"""
self.prepare_queries(cursor=None, schema=None)
self.comment_queries()
self.write_queries(filename=filepath)

def prepare_queries(self, cursor: object = None, schema: str = None):
"""Stub implementing abstract base class
This should be overridden in any count generator. See core study count_core.py
for an example
"""
pass
23 changes: 10 additions & 13 deletions cumulus_library/studies/core/count_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,13 @@
class CoreCountsBuilder(CountsBuilder):
display_text = "Creating core counts..."

def __init__(self):
super().__init__()

def count_core_patient(self):
table_name = self.get_table_name("count_patient")
from_table = self.get_table_name("patient")
cols = ["age", "gender", "race_display", "ethnicity_display"]
return self.count_patient(table_name, from_table, cols)

def count_core_encounter(self, duration=None):
def count_core_encounter(self, duration: str = None):
table_name = self.get_table_name("count_encounter", duration=duration)
from_table = self.get_table_name("encounter")

Expand All @@ -30,27 +27,27 @@ def count_core_encounter(self, duration=None):

return self.count_encounter(table_name, from_table, cols)

def _count_core_encounter_type(self, table_name, cols, duration):
def _count_core_encounter_type(
self, table_name: str, cols: list, duration: str = None
):
"""
Encounter Type information is for every visit, and therefore this
SQL should be precise in which fields to select (This is a BIG query).
:param table_name: name of the view from "core__encounter_type"
:param cols: from "core__encounter_type"
:param duration: None or ''month', 'year'
:return: SQL commands
:return: A SQL statement as a string
"""
table_name = self.get_table_name(table_name, duration)
from_table = self.get_table_name("encounter_type")

if duration:
cols.append(f"start_{duration}")

where = self.get_where_clauses(min_subject=10)

return self.count_encounter(table_name, from_table, cols, where_clauses=where)
return self.count_encounter(table_name, from_table, cols)

def count_core_encounter_type(self, duration=None):
def count_core_encounter_type(self, duration: str = None):
cols = [
"enc_class_display",
"enc_type_display",
Expand All @@ -59,19 +56,19 @@ def count_core_encounter_type(self, duration=None):
]
return self._count_core_encounter_type("count_encounter_type", cols, duration)

def count_core_encounter_enc_type(self, duration="month"):
def count_core_encounter_enc_type(self, duration: str = "month"):
cols = ["enc_class_display", "enc_type_display"]
return self._count_core_encounter_type(
"count_encounter_enc_type", cols, duration
)

def count_core_encounter_service(self, duration="month"):
def count_core_encounter_service(self, duration: str = "month"):
cols = ["enc_class_display", "enc_service_display"]
return self._count_core_encounter_type(
"count_encounter_service", cols, duration
)

def count_core_encounter_priority(self, duration="month"):
def count_core_encounter_priority(self, duration: str = "month"):
cols = ["enc_class_display", "enc_priority_display"]
return self._count_core_encounter_type(
"count_encounter_priority", cols, duration
Expand Down
Loading

0 comments on commit b52ffb2

Please sign in to comment.