Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
MichelleKendall committed Jan 25, 2023
0 parents commit f5b30c6
Show file tree
Hide file tree
Showing 57 changed files with 344,007 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Auto detect text files and perform LF normalization
* text=auto
12 changes: 12 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
*checkpoint*
*.Rhistory
.Rproj.user
*.DS_Store

data/private/*.csv
plots/*.pdf
plots/*.png
plots/*.zip
results/*.csv
results/*.RData

86 changes: 86 additions & 0 deletions R/compute_TPAEN.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# This code calculates TPAEN.
# Please note that the calculation can take a long time, especially if `parallel::detectCores()` is 1.

compute_TPAEN <- function(cleaned.app.data){

using("rstan", #v.2.21.5
"splines", #v.4.0.4
"Rcpp") #v.1.0.9

# R code to call Stan
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
sm<-stan_model(file = "R/compute_TPAEN.stan")

df.discrete <- read_csv("data/private/df.discrete.csv", show_col_types = FALSE)

delaydistr <- df.discrete$pmf[1:31]
delaydistr <- delaydistr/sum(delaydistr)
reverse_delaydistr <- rev(delaydistr[1:31])

data.orig <- cleaned.app.data

# Select columns of interest and aggregate to national
data <- data.orig %>%
group_by(date) %>%
summarise(
users = sum(users, na.rm=TRUE),
received_exposure_notification = sum(notifications, na.rm=TRUE),
test_positive = sum(test_positive, na.rm=TRUE),
test_positive_after_exposure = sum(positive_after_EN, na.rm=TRUE)
) %>%
ungroup() %>%
drop_na()

num.days <- nrow(distinct(data, date))
data$day.as.int <- 1:num.days

N <- data$received_exposure_notification # notified
P <- data$test_positive_after_exposure # notified and positive

stopifnot(sort(data$day.as.int)==seq(from=min(data$day.as.int), to=max(data$day.as.int), by=1))

X <- data$day.as.int # generating inputs
knots.dates <- rev(seq(from=max(X)-1, to=min(X), by=-7))

B <- t(ns(X, knots=knots.dates, intercept = TRUE)) # creating the natural B-splines
num_data <- length(X); num_basis <- nrow(B)
#Y_true <- as.vector(a0*X + a%*%B) # generating the output
n_ref <- mean(N) # typical value
s_ref <- mean(P)/n_ref # typical value
data_for_stan<-list(
num_data = num_data,
num_basis = num_basis,
n_ref = n_ref,
s_ref = s_ref,
P = P,
N = N,
X = X,
reverse_delaydistr = reverse_delaydistr,
B = B
)

fit<-sampling(sm, data=data_for_stan, iter=2000,control = list(adapt_delta=0.95))

fit_summary<-summary(fit)$summary
sampled_values<-extract(fit)


tpaen.df <- fit_summary[grepl("S_",rownames(fit_summary)),]
tpaen.df <- as_tibble(tpaen.df)
tpaen.df$date <- data$date

tpaen.df <- tpaen.df %>%
select(
date,
"TPAEN" = mean,
"lower.TPAEN" = `2.5%`,
"upper.TPAEN" = `97.5%`
)

write_csv(tpaen.df, "data/private/TPAEN.csv")
}




52 changes: 52 additions & 0 deletions R/compute_TPAEN.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
data {
int num_data;
int num_basis;
real n_ref;
real s_ref;
int P[num_data];
int N[num_data];
vector[num_data] X;
vector[31] reverse_delaydistr;
matrix[num_basis, num_data] B;
}

parameters {
row_vector[num_basis] n_rel;
real n_abs;
real n0;
row_vector[num_basis] s_rel;
real s_abs;
real s0;
real <lower=0> invphi_noise;
}

transformed parameters {
row_vector[num_basis] n;
row_vector[num_basis] s;
vector[num_data] N_exp;
vector[num_data] P_exp;
vector[num_data] S_exp;
n = n_rel*n_abs;
s = s_rel*s_abs;
N_exp = exp( n0*X + to_vector(n*B) );
S_exp = exp( s0*X + to_vector(s*B) );
for(day in 31:num_data) {
P_exp[day] = dot_product( segment( N_exp .* S_exp, day-31+1, 31), reverse_delaydistr ) ;
}
for(day in 1:30) {
P_exp[day] = P_exp[31] ;
}
}

model {
n_abs ~ cauchy(0, fabs(log(5*n_ref)));
s_abs ~ cauchy(0, fabs(log(5*s_ref)));
n_rel ~ normal(0, 1);
s_rel ~ normal(0, 1);
invphi_noise ~ cauchy(0, 1) T[0,];
for(day in 31:num_data) {
P[day] ~ neg_binomial_2( P_exp[day] , 1/invphi_noise );
N[day] ~ neg_binomial_2( N_exp[day] , 1/invphi_noise );
}
}

100 changes: 100 additions & 0 deletions R/compute_cases_averted_ltlas.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
compute_cases_averted_ltlas <- function(app.and.case.data, recalculate = FALSE) {

if(!file.exists("results/averted.first.year.by.ltla.csv")||recalculate){

by.la <- sapply(sort(unique(app.and.case.data$ltla_name)), function(area) {

area.app.and.case.data <- app.and.case.data %>% filter(ltla_name == area)

wave1 <- compute_cases_averted_per_wave(wave.start.date=as.Date("2020-09-24"),
wave.end.date=as.Date("2021-05-17"),
app.and.case.data = area.app.and.case.data,
wave="pre.alpha",
other_quarantine_reduction = risky.contact.reduction.factor.pre.alpha,
max.proportion.who.know.infected = max.omega.pre.alpha)

wave2 <- compute_cases_averted_per_wave(wave.start.date=as.Date("2020-09-24"),
wave.end.date=as.Date("2021-05-17"),
app.and.case.data = area.app.and.case.data,
wave="alpha",
other_quarantine_reduction = risky.contact.reduction.factor.alpha,
max.proportion.who.know.infected = max.omega.alpha)

wave3 <- compute_cases_averted_per_wave(wave.start.date=as.Date("2021-05-18"),
wave.end.date=as.Date("2021-09-24"),
app.and.case.data = area.app.and.case.data,
wave="delta",
other_quarantine_reduction = risky.contact.reduction.factor.delta,
max.proportion.who.know.infected = max.omega.delta)

wave1$total_averted + wave2$total_averted + wave3$total_averted

})

#stopifnot(signif(sum(by.la),2) == signif(total.cases.averted, 2))
#signif(by.la,3)

by.la.tibble <- tibble(
"ltla_name" = names(by.la),
"averted" = by.la
)

# join with actual cases data - useful for plotting later
by.la.tibble <- left_join(by.la.tibble,
app.and.case.data %>%
filter(date >= as.Date("2020-09-24")) %>%
filter(date <= as.Date("2021-09-24")) %>%
group_by(ltla_name) %>%
summarise("total.actual.cases" = sum(cases)),
by="ltla_name") %>%
mutate("averted.as.perc.of.actual" = averted / total.actual.cases * 100, # how many more cases there would have been as a percent of actual
"averted.as.perc.of.counterfactual" = averted / (averted + total.actual.cases) * 100)


write_csv(by.la.tibble, file="results/averted.first.year.by.ltla.csv")

} # end if

} # end function


# BY REGION - NOT USED

#by.region <- sapply(sort(unique(app.and.case.data$Region)), function(area) {
#
# region.app.and.case.data <- app.and.case.data %>% filter(Region == area)
#
# wave1 <- get_total_cases_averted_per_wave_with_realising(wave.start.date=as.Date("2020-09-24"),
# wave.end.date=as.Date("2021-05-17"),
# app.and.case.data = region.app.and.case.data,
# wave="pre.alpha",
# other_quarantine_reduction = risky.contact.reduction.factor.pre.alpha,
# max.proportion.who.know.infected = max.omega.pre.alpha)
#
# wave2 <- get_total_cases_averted_per_wave_with_realising(wave.start.date=as.Date("2020-09-24"),
# wave.end.date=as.Date("2021-05-17"),
# app.and.case.data = region.app.and.case.data,
# wave="alpha",
# other_quarantine_reduction = risky.contact.reduction.factor.alpha,
# max.proportion.who.know.infected = max.omega.alpha)
#
# wave3 <- get_total_cases_averted_per_wave_with_realising(wave.start.date=as.Date("2021-05-18"),
# wave.end.date=as.Date("2021-09-24"),
# app.and.case.data = region.app.and.case.data,
# wave="delta",
# other_quarantine_reduction = risky.contact.reduction.factor.delta,
# max.proportion.who.know.infected = max.omega.delta)
#
# wave1$total_averted + wave2$total_averted + wave3$total_averted
#})
#
#stopifnot(signif(sum(by.region),2) == signif(total.cases.averted, 2))
#signif(by.region,3)
#
#by.region.tibble <- tibble(
# "region" = names(by.region),
# "averted" = by.region
#)
#
#write_csv(by.region.tibble, file="data/averted.first.year.by.region.csv")

Loading

0 comments on commit f5b30c6

Please sign in to comment.