-
Notifications
You must be signed in to change notification settings - Fork 1
/
0. functions.R
213 lines (177 loc) · 8.26 KB
/
0. functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
load_if_needed <- function( object, filename ){
# Object becomes a list in case we need to load multiple file from the same file
if( object %>%
lapply(function(object){
length(ls( pattern=object, envir = .GlobalEnv)) == 0}) %>%
c(recursive = T) %>% any())
load( filename, envir = .GlobalEnv )
}
download_hospitalization <- function(startday, lastday, nationwide = T){
download.file("https://data.rivm.nl/covid-19/COVID-19_ziekenhuisopnames.csv",
destfile = here(runname, "output", "model_data", "hospitalizations.csv"),
mode = "wb")
df_hospital <- read.csv2(here(runname, "output", "model_data", "hospitalizations.csv")) %>%
select("date" = "Date_of_statistics", "hospitalization" = "Hospital_admission",
"municipality" = "Municipality_name") %>%
# There are a few difference in municipality names between the open data
# and our own data
mutate(municipality = str_remove(municipality,"^\'"), # Den Haag & Bosch
municipality = str_remove(municipality," \\(O\\.\\)"), # Hengelo
municipality = str_replace(municipality,",","."), # Nuenen etc.
municipality = str_replace(municipality,"â","a"), # Fryslan
municipality = str_replace(municipality,"ú","u")) %>% # Sudwest Fryslan
mutate(date = as.Date(date)) %>%
filter(between(date,startday,lastday))
# Aggregate the results if we want the nationwide number of hospitalizations
if(nationwide){
df_hospital <- df_hospital %>%
group_by(date) %>%
summarize(hospitalization = log10(sum(hospitalization)), .groups = "drop")
} else {
df_hospital <- df_hospital %>%
group_by(municipality) %>%
arrange(date) %>%
mutate(hospitalization = log10(mean_run(hospitalization,7,-3)))
}
return(df_hospital)
}
download_testresults <- function(startday, lastday){
download.file("https://data.rivm.nl/covid-19/COVID-19_aantallen_gemeente_per_dag.csv",
destfile = here(runname, "output", "model_data", "tests_results.csv"),
mode = "wb")
df_hospital <- read.csv2(here(runname, "output", "model_data", "tests_results.csv")) %>%
select("date" = "Date_of_publication", "test" = "Total_reported",
"municipality" = "Municipality_name") %>%
# There are a few difference in municipality names between the open data
# and our own data
mutate(municipality = str_remove(municipality,"^\'"), # Den Haag & Bosch
municipality = str_remove(municipality," \\(O\\.\\)"), # Hengelo
municipality = str_replace(municipality,",","."), # Nuenen etc.
municipality = str_replace(municipality,"â","a"), # Fryslan
municipality = str_replace(municipality,"ú","u")) %>% # Sudwest Fryslan
mutate(date = as.Date(date)) %>%
filter(between(date,startday,lastday))
return(df_hospital)
}
calc_df_load_municipality <- function(df_posteriors,df_fractions){
# We redistribute the loads per STP over the different municipalities
df_posteriors %>%
group_by(date) %>%
group_split() %>%
future_map(function(df){
left_join(df,df_fractions, by = "rwzi") %>%
mutate(load = frac_municipality2RWZI * 10^load) %>%
group_by(municipality,date,.draw) %>%
summarize(load = log10(sum(load)), .groups = "drop_last") %>%
median_qi(load)
}) %>%
bind_rows()
}
read_df_sewage <- function( df_viralload_human_regions ){
df_viralload_human_regions %>%
mutate(rwzi = as.factor( RWZI ),
date = Datum,
concentration = case_when(
RNA_100000_RWZI != 0 ~ log10(RNA_100000_RWZI),
RNA_100000_RWZI == 0 ~ 0,
is.na(RNA_100000_RWZI) ~ -1),
rwzi_persons = Inwoneraantal,
municipality_persons = Inwoneraantal_municipality,
municipality = as.factor( municipality ),
vr = as.factor( VRname ),
.keep="none") %>%
filter(date >= startday, date <= lastday) %>%
mutate( date=as.factor(date) )
}
# initialize variables
initials = function() {
list(
k = 6.0,
x0 = 12.0,
sigma_observations = 0.35,
RWvar = 0.35,
a_population = c(11, rep(0.2, num_knots + spline_degree - 2 )),
a_individual = matrix(0.0, ncol = length(levels(df_sewage$rwzi)),
nrow = num_knots + spline_degree - 1)
)
}
# colorblind-friendly scheme
cbPalette <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
# the estimated probability of detection
probdetection = function(x, x0, k) {
return(1 / (1 + exp((-k) * (x - x0))))
}
# Split the model fit for future map
stan_split <- function(fit_hospitalization,num_groups,parameters,par_ignore,
startday,lastday){
# We split the fit of the model, fit_hospitalizations in multiple groups along
# the second dimension of the parameters of interest.
# num_groups is the number of groups we create
# parameters is a vector of strings of the parameters of interest, i.e. the
# parameters we want to extract from the model
# par_ignore is a vector of strings of parameters which are also of high dimension.
# To make sure we can parallize spread_draws, we already throw out most of these results.
places <- 1:fit_hospitalization@sim[["n_flatnames"]]
# We create to integer vectors where the results from the parameters are. place_rest
# will be a vector of all other results.
place_parameters <- NULL
place_log_like <- NULL
place_rest <- NULL
# We find the positions of the parameters we want to extract from the model
for(i in seq_len(length(parameters))){
place_parameters[[i]] <- places[str_detect(fit_hospitalization@sim[["fnames_oi"]],
paste0("^",parameters[i],"(\\[|$)"))]
place_rest <- c(place_rest,place_parameters[[i]])
}
# We find the positions of the parameters of which we want to forget most entries
for(i in seq_len(length(par_ignore))){
place_log_like[[i]] <- places[str_detect(fit_hospitalization@sim[["fnames_oi"]],
paste0("^",par_ignore[i],"(\\[|$)"))]
place_rest <- c(place_rest,place_log_like[[i]])
}
# Find the postion of all other variables
place_rest <- sort(setdiff(places,place_rest))
# We divide the results in (almost) equally sized lists
n_chains <- fit_hospitalization@sim[["chains"]]
n_days <- as.numeric(lastday - startday) + 1
n_muni <- floor(fit_hospitalization@par_dims[[parameters[1]]][[2]]/num_groups)
muni_extra <- 0
fit_hosp_list <- NULL
for(counter in 1:num_groups){
# We create a smaller stan-object
fit_hosp_list_new <- fit_hospitalization
if(counter == num_groups){
muni_extra <- fit_hospitalization@par_dims[[parameters[1]]][[2]] - num_groups*n_muni
}
# We only keep a subset of the expected and simulated hospitalizations
places_sub <- ((counter-1)*n_muni*n_days + 1):(counter*n_muni*n_days + muni_extra*n_days)
# In case of the final worker, we have a bit more municipalities
n_muni <- n_muni + muni_extra
# We find the place of this group only
place_hosp <- place_rest
for(i in seq_len(length(parameters))){
place_hosp <- c(place_hosp,place_parameters[[i]][places_sub])
}
for(i in seq_len(length(par_ignore))){
place_hosp <- c(place_hosp,place_log_like[[i]][1])
}
# We find the places of the results we keep and those we discard
place_hosp <- sort(place_hosp)
place_discard <- sort(setdiff(places,place_hosp))
for(i in 1:n_chains){
fit_hosp_list_new@sim[["samples"]][[i]][place_discard] <- NULL
}
for(i in seq_len(length(parameters))){
fit_hosp_list_new@sim[["dims_oi"]][[parameters[i]]][[2]] <- n_muni
}
for(i in seq_len(length(par_ignore))){
fit_hosp_list_new@sim[["dims_oi"]][[par_ignore[i]]] <-
rep(1,length(fit_hosp_list_new@sim[["dims_oi"]][[par_ignore[1]]]))
}
fit_hosp_list_new@sim[["fnames_oi"]] <- fit_hosp_list_new@sim[["fnames_oi"]][place_hosp]
fit_hosp_list_new@sim[["n_flatnames"]] <- length(fit_hosp_list_new@sim[["fnames_oi"]])
fit_hosp_list[[counter]] <- fit_hosp_list_new
}
return(fit_hosp_list)
}
functions_sourced <- T