-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathderived_variables_PDS.R
66 lines (44 loc) · 1.67 KB
/
derived_variables_PDS.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
## ==========================================================================##
# Project: DACHA
# Team: Improvement Analytics Unit (IAU) at the Health Foundation
# Script: derived_variables_PDS.R
# Corresponding author:Freya Tracey ([email protected])
# Description:Clean and derive variables for MDS from PDS data and save a
# version where we have one row per person
#
# Inputs:
# PDS data
# Outputs:
# One row per person data set
# Notes: To use, need to adjust locations of R scripts and csv files
## ==========================================================================##
# Source relevant scripts -------------------------------------------------
# Set up
library (aws.s3)
library (tidyverse)
library(janitor)
project_bucket <- '' # assign project directory
# Import data sets
source (filepaths.R) #script with all filepaths in
PDS <- s3read_using(readRDS,
object = 'PDS.rds',
bucket = project_bucket)
# want to get the PDS record which covers the period that the index date falls into
most_recent_PDS <- PDS_test %>%
group_by(pseudonhsno) %>%
filter (Change_Time_Stamp <= index_date) %>%
slice_max(order_by = Change_Time_Stamp) %>%
ungroup() %>%
select(-Change_Time_Stamp) %>%
rename (
practice_code_PDS = Der_Practice_Code,
CCG_residence_PDS = Der_CCGofResidence,
gender_PDS = Gender,
date_of_death_PDS = DateOfDeath,
LSOA_PDS = Der_Postcode_LSOA_Code,
DOB_yr_mth_PDS =Der_DOBYearMth
)
s3write_using(most_recent_PDS,
FUN=write.csv,
object = "person level data sets for linkage/PDS_pre_linkage.csv",
bucket = project_bucket)