-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinkage_prep_SUS.R
110 lines (80 loc) · 4.19 KB
/
linkage_prep_SUS.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
## ==========================================================================##
# Project: DACHA
# Team: Improvement Analytics Unit (IAU) at the Health Foundation
# Script: linkage_prep_SUS.R
# Corresponding author:Freya Tracey ([email protected])
# Description: This will be the final script to prepare a pre linkage data set
# for all SUS data sets, bringing together all the variables we said we would
# include across the three (OP, APC, ECDS)
# Inputs:
# Individual one row per person datasets for OP, APC and ECDS
# (come from derived_variables_x scripts)
# Outputs:
# One row per person dataset with all SUS data
# Notes: To use, need to adjust locations of R scripts and csv files
## ==========================================================================##
# Source relevant scripts -------------------------------------------------
# Set up
library (aws.s3)
library (tidyverse)
project_bucket <- '' # assign project directory
# Import APC data set
APC_clean <- s3read_using(read.csv, object = "APC.csv", bucket = project_bucket, na.strings = c("", "NA"))%>%
mutate (
activity_date = dmy(activity_date) ,
cip_end = dmy(cip_end) )
test <- APC_clean %>%
distinct (pseudonhsno)
# Select variables from cleaned APC
APC <- APC_clean %>%
select (c(
pseudonhsno, # pseudo id
adm_em_12h, # count of emergency adm in year pre index date
adm_el_12h, # count of elective adm in year pre index date
adm_avoid_12h, # count of avoidable emergency adm in year pre index date
bedsore_12h, # count of bedsore adm in year pre index date
lrti_12h, # count of adm with lrti diag in year pre index date
urti_12h, # count of adm with urti diag in year pre index date
destch_12h # flag for any discharge to a care home in year pre index date
# not adding death_hosp, this comes from derived_variables_APC script
),
starts_with ("c_"), # charlson variables
starts_with ("e_"), # elixhauser variables
starts_with ("ec_"), # variables in charlson and elixhauser
starts_with ("f_"), # not sure this one
starts_with ("nr_elix_")
) %>%
rename(adm_bedsore_12h = bedsore_12h,
adm_lrti_12h = lrti_12h,
adm_urti_12h = urti_12h,
discharge_ch_12h = destch_12h) %>%
distinct ()
# Read in APC derived variables
APC_CH <- s3read_using(read.csv, object = "person level data sets for linkage/APC_CH.csv", bucket = project_bucket, na.strings = c("", "NA"))
APC_CH <- APC_CH %>%
select(-X)
# Read in OP derived variables - nothing else to get from OP raw data set as covered in SUS demographics
OP <- s3read_using(read.csv, object = "person level data sets for linkage/outpatients.csv", bucket = project_bucket, na.strings = c("", "NA"))
OP <- OP %>%
select(-X)
# Read in ECDS derived variables - nothing else to get from ECDS eaw data set as covered in SUS demographics
ECDS <- s3read_using(read.csv, object = "person level data sets for linkage/ECDS.csv", bucket = project_bucket, na.strings = c("", "NA"))
ECDS <- ECDS %>%
select(-X)
# Read in SUS demographics file
SUS_demographics <-s3read_using(read.csv, object = "demographics.csv", bucket = project_bucket, na.strings = c("", "NA"))
# demographic information from SUS data sets, derived by combining info across SUS data sets
# Save APC pre-linkage dataset
SUS_pre_linkage <- APC %>%
full_join(APC_CH, by = "pseudonhsno" ) %>%
full_join(ECDS, by = "pseudonhsno" ) %>%
full_join(OP, by = "pseudonhsno" ) %>%
full_join(SUS_demographics, by= "pseudonhsno") %>%
mutate ( # need to have some combined variables re anaemia, diabetes and hypertension for in text table
diabetes_combined = ifelse(e_diab_comp_h36 ==1 |e_diab_uncomp_h36 ==1 ,1,0),
anaemia_combined = ifelse( e_anaemia_bloodloss_h36 ==1 | e_anaemia_deficiency_h36 ==1,1,0),
hypertension_combined = ifelse (e_ht_comp_h36 ==1 | e_ht_uncomp_h36 ==1, 1,0),
ngproc_combined = ifelse(n_ED_attendances_ngproc==1 | n_ngproc_op ==1, 1,0)
)
#save prelinkage data set
s3write_using(SUS_pre_linkage, FUN=write.csv, object = "person level data sets for linkage/SUS_combo_pre_linkage.csv", bucket = project_bucket)