-
-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
📊 cancer: NHS stats on diagnosis routes and survival rates (#3290)
- Loading branch information
1 parent
e601a70
commit 331e28a
Showing
19 changed files
with
719 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
50 changes: 50 additions & 0 deletions
50
etl/steps/data/garden/cancer/2024-09-13/diagnosis_routes_by_route.meta.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# NOTE: To learn more about the fields, hover over their names. | ||
definitions: | ||
common: | ||
presentation: | ||
topic_tags: | ||
- Cancer | ||
|
||
diag_route: &diag_route |- | ||
<% if route == "Screening" %> | ||
Screening is flagged by the cancer registry as detected via the breast, bowel, or cervical screening programmes. | ||
<% elif route == "USC" %> | ||
USC (Urgent Suspected Cancer) refers to urgent GP referrals with a suspicion of cancer (previously known as two-week wait/TWW). | ||
<% elif route == "GP referral" %> | ||
Routine and urgent referrals where the patient was not referred under the USC (Urgent Suspected Cancer) referral route | ||
<% elif route == "Emergency presentation" %> | ||
An emergency route via accident and emergency (A&E), emergency GP referral, emergency transfer, emergency admission or attendance. | ||
<% elif route == "Other outpatient" %> | ||
An elective route starting with an outpatient appointment that is either a self-referral, consultant to consultant referral, other or unknown referral (excludes patients originally referred under the USC referral route) | ||
<% elif route == "Inpatient elective" %> | ||
No earlier information can be found prior to admission from a waiting list, booked or planned. | ||
<% elif route == "Unknown route" %> | ||
No relevant data available from Inpatient or Outpatient Hospital Episode Statistics, National Cancer Waiting Times or National Screening Programmes. | ||
<% endif %> | ||
# Learn more about the available fields: | ||
# http://docs.owid.io/projects/etl/architecture/metadata/reference/ | ||
dataset: | ||
update_period_days: 365 | ||
|
||
|
||
|
||
tables: | ||
diagnosis_routes_by_route: | ||
variables: | ||
count_by_route: | ||
title: Number of << stage.lower() >> cancer diagnoses via the << route.lower() >> route | ||
description_key: | ||
- *diag_route | ||
unit: cases | ||
display: | ||
numDecimalPlaces: 0 | ||
name: << route >> | ||
percentage_by_route: | ||
title: Share of << stage.lower() >> cancer diagnoses via the << route.lower() >> route | ||
description_key: | ||
- *diag_route | ||
unit: '%' | ||
short_unit: '%' | ||
display: | ||
numDecimalPlaces: 1 | ||
name: << route >> |
34 changes: 34 additions & 0 deletions
34
etl/steps/data/garden/cancer/2024-09-13/diagnosis_routes_by_route.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
"""Load a meadow dataset and create a garden dataset.""" | ||
|
||
from etl.helpers import PathFinder, create_dataset | ||
|
||
# Get paths and naming conventions for current step. | ||
paths = PathFinder(__file__) | ||
|
||
|
||
def run(dest_dir: str) -> None: | ||
# | ||
# Load inputs. | ||
# | ||
# Load meadow dataset. | ||
ds_meadow = paths.load_dataset("diagnosis_routes_by_route") | ||
|
||
# Read table from meadow dataset. | ||
tb = ds_meadow["diagnosis_routes_by_route"].reset_index() | ||
|
||
# | ||
# Process data. | ||
# | ||
tb["route"] = tb["route"].str.replace(r"^\d+\s", "", regex=True) | ||
tb = tb.format(["country", "year", "site", "stage", "route"]) | ||
|
||
# | ||
# Save outputs. | ||
# | ||
# Create a new garden dataset with the same metadata as the meadow dataset. | ||
ds_garden = create_dataset( | ||
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata | ||
) | ||
|
||
# Save changes in the new garden dataset. | ||
ds_garden.save() |
49 changes: 49 additions & 0 deletions
49
etl/steps/data/garden/cancer/2024-09-13/diagnosis_routes_by_stage.meta.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# NOTE: To learn more about the fields, hover over their names. | ||
definitions: | ||
common: | ||
presentation: | ||
topic_tags: | ||
- Cancer | ||
diag_route: &diag_route |- | ||
<% if route == "Screening" %> | ||
Screening is flagged by the cancer registry as detected via the breast, bowel, or cervical screening programmes. | ||
<% elif route == "USC" %> | ||
USC (Urgent Suspected Cancer) refers to urgent GP referrals with a suspicion of cancer (previously known as two-week wait/TWW). | ||
<% elif route == "GP referral" %> | ||
Routine and urgent referrals where the patient was not referred under the USC (Urgent Suspected Cancer) referral route | ||
<% elif route == "Emergency presentation" %> | ||
An emergency route via accident and emergency (A&E), emergency GP referral, emergency transfer, emergency admission or attendance. | ||
<% elif route == "Other outpatient" %> | ||
An elective route starting with an outpatient appointment that is either a self-referral, consultant to consultant referral, other or unknown referral (excludes patients originally referred under the USC referral route) | ||
<% elif route == "Inpatient elective" %> | ||
No earlier information can be found prior to admission from a waiting list, booked or planned. | ||
<% elif route == "Unknown route" %> | ||
No relevant data available from Inpatient or Outpatient Hospital Episode Statistics, National Cancer Waiting Times or National Screening Programmes. | ||
<% endif %> | ||
# Learn more about the available fields: | ||
# http://docs.owid.io/projects/etl/architecture/metadata/reference/ | ||
dataset: | ||
update_period_days: 365 | ||
|
||
|
||
|
||
tables: | ||
diagnosis_routes_by_stage: | ||
variables: | ||
count_by_stage: | ||
title: Number of cancer diagnoses via the << route.lower() >> route that are at << stage.lower() >> | ||
description_key: | ||
- *diag_route | ||
display: | ||
numDecimalPlaces: 0 | ||
name: << stage >> | ||
unit: cases | ||
percentage_by_stage: | ||
title: Share of cancer diagnoses via the << route.lower() >> route that are at << stage.lower() >> | ||
description_key: | ||
- *diag_route | ||
unit: '%' | ||
short_unit: '%' | ||
display: | ||
numDecimalPlaces: 1 | ||
name: << stage >> |
34 changes: 34 additions & 0 deletions
34
etl/steps/data/garden/cancer/2024-09-13/diagnosis_routes_by_stage.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
"""Load a meadow dataset and create a garden dataset.""" | ||
|
||
from etl.helpers import PathFinder, create_dataset | ||
|
||
# Get paths and naming conventions for current step. | ||
paths = PathFinder(__file__) | ||
|
||
|
||
def run(dest_dir: str) -> None: | ||
# | ||
# Load inputs. | ||
# | ||
# Load meadow dataset. | ||
ds_meadow = paths.load_dataset("diagnosis_routes_by_stage") | ||
|
||
# Read table from meadow dataset. | ||
tb = ds_meadow["diagnosis_routes_by_stage"].reset_index() | ||
|
||
# | ||
# Process data. | ||
# | ||
tb["route"] = tb["route"].str.replace(r"^\d+\s", "", regex=True) | ||
tb = tb.format(["country", "year", "site", "stage", "route"]) | ||
|
||
# | ||
# Save outputs. | ||
# | ||
# Create a new garden dataset with the same metadata as the meadow dataset. | ||
ds_garden = create_dataset( | ||
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata | ||
) | ||
|
||
# Save changes in the new garden dataset. | ||
ds_garden.save() |
64 changes: 64 additions & 0 deletions
64
etl/steps/data/garden/cancer/2024-09-13/diagnosis_routes_survival.meta.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
# NOTE: To learn more about the fields, hover over their names. | ||
definitions: | ||
common: | ||
presentation: | ||
topic_tags: | ||
- Cancer | ||
grapher_config: | ||
note: The year represents the start of the 5-year age-standardized survival estimates by cancer site for 5-year rolling cohorts from 2006-2010 to 2016-2020. | ||
|
||
diag_route: &diag_route |- | ||
<% if route == "Screening" %> | ||
Screening is flagged by the cancer registry as detected via the breast, bowel, or cervical screening programmes. | ||
<% elif route == "USC" %> | ||
USC (Urgent Suspected Cancer) refers to urgent GP referrals with a suspicion of cancer (previously known as two-week wait/TWW). | ||
<% elif route == "GP referral" %> | ||
Routine and urgent referrals where the patient was not referred under the USC (Urgent Suspected Cancer) referral route | ||
<% elif route == "Emergency presentation" %> | ||
An emergency route via accident and emergency (A&E), emergency GP referral, emergency transfer, emergency admission or attendance. | ||
<% elif route == "Other outpatient" %> | ||
An elective route starting with an outpatient appointment that is either a self-referral, consultant to consultant referral, other or unknown referral (excludes patients originally referred under the USC referral route) | ||
<% elif route == "Inpatient elective" %> | ||
No earlier information can be found prior to admission from a waiting list, booked or planned. | ||
<% elif route == "Unknown route" %> | ||
No relevant data available from Inpatient or Outpatient Hospital Episode Statistics, National Cancer Waiting Times or National Screening Programmes. | ||
<% endif %> | ||
sex: |- | ||
<% if gender == "Persons" %>all<% elif gender == "Male" %>male<% elif gender == "Female" %>female<% endif %> | ||
# Learn more about the available fields: | ||
# http://docs.owid.io/projects/etl/architecture/metadata/reference/ | ||
dataset: | ||
update_period_days: 365 | ||
|
||
|
||
|
||
tables: | ||
diagnosis_routes_survival: | ||
variables: | ||
patients: | ||
title: Number of {definitions.sex} patients diagnosed via << route.lower() >> surviving at the end of the << length.lower() >> period | ||
description_short: |- | ||
The number of {definitions.sex} patients diagnosed via << route.lower() >> route surviving at the end of the << length.lower() >> period. | ||
description_key: | ||
- *diag_route | ||
unit: cases | ||
display: | ||
numDecimalPlaces: 0 | ||
name: << route >> | ||
presentation: | ||
title_public: Number of {definitions.sex} patients diagnosed via << route.lower() >> surviving at the end of the << length.lower() >> period | ||
|
||
survival: | ||
title: Age-standardized << length.lower() >> survival rate diagnosed via << route.lower() >> among {definitions.sex} patients | ||
description_short: |- | ||
The age-standardized << length.lower() >> survival rate diagnosed via << route.lower() >> route among {definitions.sex} patients. | ||
description_key: | ||
- *diag_route | ||
unit: '%' | ||
short_unit: '%' | ||
display: | ||
numDecimalPlaces: 1 | ||
name: << route >> | ||
presentation: | ||
title_public: Age-standardized << length.lower() >> survival rate diagnosed via << route.lower() >> among {definitions.sex} patients |
37 changes: 37 additions & 0 deletions
37
etl/steps/data/garden/cancer/2024-09-13/diagnosis_routes_survival.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
"""Load a meadow dataset and create a garden dataset.""" | ||
|
||
from etl.helpers import PathFinder, create_dataset | ||
|
||
# Get paths and naming conventions for current step. | ||
paths = PathFinder(__file__) | ||
|
||
|
||
def run(dest_dir: str) -> None: | ||
# | ||
# Load inputs. | ||
# | ||
# Load meadow dataset. | ||
ds_meadow = paths.load_dataset("diagnosis_routes_survival") | ||
|
||
# Read table from meadow dataset. | ||
tb = ds_meadow["diagnosis_routes_survival"].reset_index() | ||
|
||
# | ||
# Process data. | ||
# | ||
|
||
# Extract the last year from the 'year' column which is in the format '2006-2010'. | ||
tb["year"] = tb["year"].apply(lambda x: int(x.split("-")[0])).astype(int) | ||
tb["route"] = tb["route"].str.replace(r"^\d+\s", "", regex=True) | ||
tb = tb.format(["country", "year", "site", "gender", "route", "length"]) | ||
|
||
# | ||
# Save outputs. | ||
# | ||
# Create a new garden dataset with the same metadata as the meadow dataset. | ||
ds_garden = create_dataset( | ||
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata | ||
) | ||
|
||
# Save changes in the new garden dataset. | ||
ds_garden.save() |
51 changes: 51 additions & 0 deletions
51
etl/steps/data/grapher/cancer/2024-09-13/diagnosis_routes_by_route.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
"""Load a garden dataset and create a grapher dataset.""" | ||
|
||
from etl.helpers import PathFinder, create_dataset | ||
|
||
# Get paths and naming conventions for current step. | ||
paths = PathFinder(__file__) | ||
|
||
|
||
def run(dest_dir: str) -> None: | ||
# | ||
# Load inputs. | ||
# | ||
# Load garden dataset. | ||
ds_garden = paths.load_dataset("diagnosis_routes_by_route") | ||
|
||
# Read table from garden dataset. | ||
tb = ds_garden["diagnosis_routes_by_route"].reset_index() | ||
tb = tb.drop(columns=["country"]) | ||
|
||
# Define mapping dictionary with only the first word capitalized | ||
cancer_mapping = { | ||
"All Malignant Neoplasms (excl. NMSC)": "All malignant neoplasms (excl. NMSC)", | ||
"Bladder": "Bladder cancer", | ||
"Breast": "Breast cancer", | ||
"Cervix": "Cervical cancer", | ||
"Colorectal": "Colorectal cancer", | ||
"Kidney": "Kidney cancer", | ||
"Lung - non-small cell": "Lung cancer (non-small cell)", | ||
"Lung - small cell": "Lung cancer (small cell)", | ||
"Ovary": "Ovarian cancer", | ||
"Pancreas": "Pancreatic cancer", | ||
"Prostate": "Prostate cancer", | ||
"Uterus": "Uterine cancer", | ||
} | ||
|
||
# Map cancer types to descriptive labels | ||
tb["site"] = tb["site"].map(cancer_mapping) | ||
|
||
# Make cancer type appear as country. | ||
tb = tb.rename(columns={"site": "country"}) | ||
tb = tb.format(["country", "year", "stage", "route"]) | ||
# | ||
# Save outputs. | ||
# | ||
# Create a new grapher dataset with the same metadata as the garden dataset. | ||
ds_grapher = create_dataset( | ||
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata | ||
) | ||
|
||
# Save changes in the new grapher dataset. | ||
ds_grapher.save() |
Oops, something went wrong.