Skip to content

Commit

Permalink
addings runs
Browse files Browse the repository at this point in the history
  • Loading branch information
quantifish committed Sep 27, 2023
1 parent e4dbcce commit aae8858
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 81 deletions.
Binary file modified vignettes/fit1.rds
Binary file not shown.
Binary file added vignettes/fit2.rds
Binary file not shown.
2 changes: 1 addition & 1 deletion vignettes/hurdle.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ plot_bubble(df = sim_data, group = c("year", "group"), fill = "green")

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
m1 <- brm(bf(y ~ year + group, hu ~ 1),
data = sim_data, family = "hurdle_lognormal", cores = 1)
data = sim_data, family = "hurdle_lognormal", cores = 1, file = "m1")
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "The new Bayesian CDI plot from the influ2 package."}
Expand Down
155 changes: 75 additions & 80 deletions vignettes/influ2.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ in the data set.

```{r echo=TRUE, message=FALSE}
library(knitr)
library(dplyr)
library(ggplot2)
library(tidyverse)
library(reshape2)
library(brms)
library(influ2)
Expand Down Expand Up @@ -91,70 +90,73 @@ third-order polynomial, a spline, and a random-effect.

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
fit1 <- brm(lobsters ~ year + month + poly(soak, 3),
data = lobsters_per_pot, family = poisson,
chains = 2, iter = 1500, refresh = 0, seed = 42, file = "fit1")
fit2 <- brm(lobsters ~ year + (1 | month) + s(depth, k = 3),
data = lobsters_per_pot, family = negbinomial(),
chains = 2, iter = 1100, refresh = 0, seed = 42, file = "fit1")
control = list(adapt_delta = 0.99),
chains = 2, iter = 3000, refresh = 0, seed = 1, file = "fit2")
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
fit2 <- brm(lobsters ~ year + (1 | month) + s(depth, k = 2),
data = lobsters_per_pot, family = negbinomial(),
chains = 2, iter = 1100, refresh = 0, seed = 42, file = "fit2")
criterion <- c("loo", "waic", "bayes_R2")
fit1 <- add_criterion(fit1, criterion = criterion)
fit2 <- add_criterion(fit2, criterion = criterion)
```

Fit a model using glm and generate influence statistics using the original influ
package

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
fit_glm <- glm(lobsters ~ year + month + poly(soak, 3),
data = lobsters_per_pot, family = negbinomial)
myInfl <- Influence$new(model = fit_glm)
myInfl$calc()
data = lobsters_per_pot, family = poisson)
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "The original CDI plot from the influ package."}
myInfl$cdiPlot(term = "Species")
myInfl <- Influence$new(model = fit_glm)
myInfl$calc()
myInfl$cdiPlot(term = "month")
```

Generate a CDI plot for a factor

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "The new Bayesian CDI plot from the influ2 package."}
get_coefs(fit = fit2, var = "Species")
plot_bayesian_cdi(fit = fit2, xfocus = "Species", yfocus = "Year", xlab = "Species")
cdi_month <- plot_bayesian_cdi(fit = fit1, xfocus = "month", yfocus = "year",
xlab = "Month", ylab = "Year")
cdi_month
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "The original step-plot from the influ package."}
myInfl$stepPlot()
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "The new Bayesian step-plot from the influ2 package. Note that the new step-plot requires that all models/steps be run in brms before the function can be used."}
fits <- list(fit0, fit1, fit2)
plot_step(fits = fits, year = "Year")
fits <- list(fit1, fit2)
plot_step(fits = fits, year = "year")
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
plot_index(fit2, year = "Year")
plot_index(fit2, year = "year")
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
myInfl$stanPlot()
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
plot_influ(fit2, year = "Year")
# plot_influ(fit2, year = "year")
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
myInfl$influPlot()
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Comparison of influences produced by the original influ package (points) and the new influ2 calculation (lines)."}
# i1 <- myInfl$influences %>%
# pivot_longer(cols = -level, names_to = "variable")
# i_species <- get_influ(fit = fit2, group = c("Year", "Species")) %>%
i1 <- myInfl$influences %>%
pivot_longer(cols = -level, names_to = "variable")
# i_month <- get_influ(fit = fit1, group = c("year", "month")) %>%
# group_by(Year) %>%
# summarise(delta = mean(delta)) %>%
# mutate(variable = "Species")
# i_duration <- get_influ(fit = fit2, group = c("Year", "Duration")) %>%
# mutate(variable = "month")
# i_duration <- get_influ(fit = fit1, group = c("year", "Duration")) %>%
# group_by(Year) %>%
# summarise(delta = mean(delta)) %>%
# mutate(variable = "Duration")
Expand All @@ -169,28 +171,20 @@ myInfl$influPlot()

# Functions for exploring models

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
# Here I evaluate model fit using loo and waic.
# Other options include kfold, loo_subsample, bayes_R2, loo_R2 and marglik
criterion <- c("loo", "waic", "bayes_R2")
fit0 <- add_criterion(fit0, criterion = criterion)
fit1 <- add_criterion(fit1, criterion = criterion)
fit2 <- add_criterion(fit2, criterion = criterion)
fit3 <- add_criterion(fit3, criterion = criterion)
# fit4 <- add_criterion(fit4, criterion = criterion)
# fit5 <- add_criterion(fit5, criterion = criterion)
# fit6 <- add_criterion(fit6, criterion = criterion)
Here I evaluate model fit using loo and waic. Other options include kfold,
loo_subsample, bayes_R2, loo_R2, and marglik.

fit0$criteria$loo
fit0$criteria$waic
```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
fit1$criteria$loo
fit1$criteria$waic
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
loo_compare(fit0, fit1, fit2, criterion = "loo") %>% kable(digits = 1)
loo_compare(fit1, fit2, criterion = "loo") %>% kable(digits = 1)
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
loo_compare(fit0, fit1, fit2, criterion = "waic") %>% kable(digits = 1)
loo_compare(fit1, fit2, criterion = "waic") %>% kable(digits = 1)
```

Bayesian R2 `bayes_R2` for the three model runs
Expand All @@ -199,78 +193,77 @@ Bayesian R2 `bayes_R2` for the three model runs
get_bayes_R2(fits)
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "CDI plot for a linear variable."}
plot_bayesian_cdi(fit = fit1, xfocus = "Duration", yfocus = "Year", xlab = "Duration")
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "CDI plot for a polynomial."}
plot_bayesian_cdi(fit = fit3, xfocus = "Duration", yfocus = "Year", xlab = "Duration")
```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "CDI plot for a fixed effect."}
cdi_month
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "CDI plot for a spline."}
plot_bayesian_cdi(fit = fit4, xfocus = "Duration", yfocus = "Year", xlab = "Duration")
```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "CDI plot for a random effect."}
plot_bayesian_cdi(fit = fit2, xfocus = "month", yfocus = "year",
xlab = "Month", ylab = "Year")
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "CDI plot for a random-effect."}
plot_bayesian_cdi(fit = fit5, xfocus = "Duration2", yfocus = "Year", xlab = "Duration")
```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "CDI plot for a polynomial."}
plot_bayesian_cdi(fit = fit1, xfocus = "soak", yfocus = "year",
xlab = "Soak time (hours)", ylab = "Year")
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "CDI plot for a fixed-effect."}
plot_bayesian_cdi(fit = fit6, xfocus = "Duration2", yfocus = "Year", xlab = "Duration")
```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "CDI plot for a spline."}
plot_bayesian_cdi(fit = fit2, xfocus = "depth", yfocus = "year",
xlab = "Depth (m)", ylab = "Year")
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Comparison plot of all model runs."}
fits <- list(fit1, fit3, fit4, fit5, fit6)
plot_compare(fits = fits,
labels = c("linear", "poly", "spline", "random-effect", "fixed-effect"),
year = "Year")
fits <- list(fit1, fit2)
plot_compare(fits = fits, year = "year")
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Unstandardised versus standardised series."}
plot_index(fit = fit4, year = "Year")
plot_index(fit = fit1, year = "year")
```

# Diagnostics

The `influ2` package is based on the original `influ` package which was developed to generate influence plots. The `influ` package has functions that use outputs from the `glm` function. The `influ2` package was developed to use outputs from `brms` and relies heavily on the R packages `ggplot2` and `dplyr`. This vignette showcases the `influ2` package with a focus on model diagnostics including posterior predictive distributions, residuals, and quantile-quantile plots.

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Comparison of the empirical distribution of the data (y) to the distributions of simulated/replicated data (yrep) from the posterior predictive distribution."}
yrep <- posterior_predict(fit4, nsamples = 100)
yrep <- posterior_predict(fit1, ndraws = 100)
ppc_dens_overlay(y = lobsters_per_pot$lobsters, yrep = yrep) +
labs(x = "CPUE", y = "Density")
ppc_dens_overlay(y = iris2$CPUE, yrep = yrep) +
ppc_bars(y = lobsters_per_pot$lobsters, yrep = yrep) +
labs(x = "CPUE", y = "Density")
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Posterior predictive check."}
ppc_ecdf_overlay(y = iris2$CPUE, yrep = yrep) +
ppc_ecdf_overlay(y = lobsters_per_pot$lobsters, yrep = yrep, discrete = TRUE) +
coord_cartesian(xlim = c(0, 10))
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Quantile-quantile plot from LOO."}
loo <- loo(fit4, save_psis = TRUE)
psis <- loo$psis_object
lw <- weights(psis)
yrep <- posterior_predict(fit4)
ppc_loo_pit_qq(y = iris2$CPUE, yrep = yrep, lw = lw)
# loo <- loo(fit1, save_psis = TRUE)
# psis <- loo$psis_object
# lw <- weights(psis)
# yrep <- posterior_predict(fit1)
# ppc_loo_pit_qq(y = lobsters_per_pot$lobsters, yrep = yrep, lw = lw)
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Quantile-quantile plot."}
plot_qq(fit = fit4)
# plot_qq(fit = fit1)
```

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Residuals and a loess smoother."}
plot_predicted_residuals(fit = fit4, trend = "loess")
plot_predicted_residuals(fit = fit1, trend = "loess")
```

```{r echo=TRUE, fig.height=6, fig.width=10, message=FALSE, fig.cap = "Residuals and a linear fit by year."}
plot_predicted_residuals(fit = fit4, trend = "lm") +
facet_wrap(Year ~ .)
plot_predicted_residuals(fit = fit1, trend = "lm") +
facet_wrap(year ~ .)
```

```{r echo=TRUE, fig.height=6, fig.width=10, message=FALSE, fig.cap = "Residuals and a loess smoother by year."}
# A new style of residual plot
fit <- fit4
fit <- fit1
# Extract predicted values
pred <- fitted(fit) %>% data.frame()
Expand All @@ -280,12 +273,12 @@ names(pred) <- paste0("pred.", names(pred))
resid <- residuals(fit) %>% data.frame()
names(resid) <- paste0("resid.", names(resid))
df <- cbind(resid, pred, iris2)
df <- cbind(resid, pred, lobsters_per_pot)
ggplot(data = df, aes(x = Year, y = .data$resid.Estimate)) +
ggplot(data = df, aes(x = year, y = .data$resid.Estimate)) +
geom_pointrange(aes(ymin = .data$resid.Q2.5, ymax = .data$resid.Q97.5), alpha = 0.75) +
facet_wrap(Area ~ Species) +
geom_hline(yintercept = 0, linetype = "dashed") +
facet_wrap(month ~ .) +
geom_hline(yintercept = 0, linetype = "dashed") +
# geom_errorbarh(aes(xmax = .data$pred.Q2.5, xmin = .data$pred.Q97.5, height = 0), alpha = 0.75) +
# labs(x = "Predicted values", y = "Residuals") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Expand All @@ -296,24 +289,26 @@ ggplot(data = df, aes(x = Year, y = .data$resid.Estimate)) +
Implied coefficients (points) are calculated as the normalised fishing year coefficient (grey line) plus the mean of the standardised residuals in each year for each category of a variable. These values approximate the coefficients obtained when an area x year interaction term is fitted, particularly for those area x year combinations which have a substantial proportion of the records. The error bars indicate one standard error of the standardised residuals. The information at the top of each panel identifies the plotted category, provides the correlation coefficient (rho) between the category year index and the overall model index, and the number of records supporting the category.

```{r echo=TRUE, fig.height=6, fig.width=10, message=FALSE, fig.cap = "Implied residuals by year for the categorical variable Area which is not included in the model."}
plot_implied_residuals(fit = fit2, data = iris2,
year = "Year", groups = "Area") +
plot_implied_residuals(fit = fit2, data = lobsters_per_pot,
year = "year", groups = "month") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```

```{r echo=TRUE, fig.height=6, fig.width=10, message=FALSE, fig.cap = "Implied residuals by year for the categorical variable Sepcies which is included in the model."}
plot_implied_residuals(fit = fit2, data = iris2,
year = "Year", groups = "Species") +
plot_implied_residuals(fit = fit2, data = lobsters_per_pot,
year = "year", groups = "depth") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```

# Functions for generating outputs for stock assessment

Finally, in order to fit to the data in a stock assessment model then indices can be produced using the `get_index` function. The `Estimate` and `Est.Error` columns are what should be used in stock assessment.
Finally, in order to fit to the data in a stock assessment model then indices
can be produced using the `get_index` function. The `Estimate` and `Est.Error`
columns are what should be used in stock assessment.

```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
unstd <- get_unstandarsied(fit = fit2, year = "Year")
cpue0 <- get_index(fit = fit2, year = "Year")
unstd <- get_unstandarsied(fit = fit2, year = "year")
cpue0 <- get_index(fit = fit2, year = "year")
cpue0 %>%
mutate(Unstandardised = unstd$Median) %>%
Expand Down
Binary file added vignettes/m1.rds
Binary file not shown.

0 comments on commit aae8858

Please sign in to comment.