addings runs

quantifish · Sep 27, 2023 · aae8858 · aae8858
1 parent e4dbcce
commit aae8858
Show file tree

Hide file tree

Showing 5 changed files with 76 additions and 81 deletions.
diff --git a/vignettes/fit1.rds b/vignettes/fit1.rds
diff --git a/vignettes/fit2.rds b/vignettes/fit2.rds
diff --git a/vignettes/hurdle.Rmd b/vignettes/hurdle.Rmd
@@ -62,7 +62,7 @@ plot_bubble(df = sim_data, group = c("year", "group"), fill = "green")
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
 m1 <- brm(bf(y ~ year + group, hu ~ 1), 
-          data = sim_data, family = "hurdle_lognormal", cores = 1)
+          data = sim_data, family = "hurdle_lognormal", cores = 1, file = "m1")
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "The new Bayesian CDI plot from the influ2 package."}

diff --git a/vignettes/influ2.Rmd b/vignettes/influ2.Rmd
@@ -46,8 +46,7 @@ in the data set.
 
 ```{r echo=TRUE, message=FALSE}
 library(knitr)
-library(dplyr)
-library(ggplot2)
+library(tidyverse)
 library(reshape2)
 library(brms)
 library(influ2)
@@ -91,70 +90,73 @@ third-order polynomial, a spline, and a random-effect.
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
 fit1 <- brm(lobsters ~ year + month + poly(soak, 3), 
+            data = lobsters_per_pot, family = poisson, 
+            chains = 2, iter = 1500, refresh = 0, seed = 42, file = "fit1")
+fit2 <- brm(lobsters ~ year + (1 | month) + s(depth, k = 3), 
             data = lobsters_per_pot, family = negbinomial(), 
-            chains = 2, iter = 1100, refresh = 0, seed = 42, file = "fit1")
+            control = list(adapt_delta = 0.99),
+            chains = 2, iter = 3000, refresh = 0, seed = 1, file = "fit2")
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
-fit2 <- brm(lobsters ~ year + (1 | month) + s(depth, k = 2), 
-            data = lobsters_per_pot, family = negbinomial(), 
-            chains = 2, iter = 1100, refresh = 0, seed = 42, file = "fit2")
+criterion <- c("loo", "waic", "bayes_R2")
+fit1 <- add_criterion(fit1, criterion = criterion)
+fit2 <- add_criterion(fit2, criterion = criterion)
 ```
 
 Fit a model using glm and generate influence statistics using the original influ 
 package
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
 fit_glm <- glm(lobsters ~ year + month + poly(soak, 3), 
-               data = lobsters_per_pot, family = negbinomial)
-myInfl <- Influence$new(model = fit_glm)
-myInfl$calc()
+               data = lobsters_per_pot, family = poisson)
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "The original CDI plot from the influ package."}
-myInfl$cdiPlot(term = "Species")
+myInfl <- Influence$new(model = fit_glm)
+myInfl$calc()
+myInfl$cdiPlot(term = "month")
 ```
 
-Generate a CDI plot for a factor
-
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "The new Bayesian CDI plot from the influ2 package."}
-get_coefs(fit = fit2, var = "Species")
-plot_bayesian_cdi(fit = fit2, xfocus = "Species", yfocus = "Year", xlab = "Species")
+cdi_month <- plot_bayesian_cdi(fit = fit1, xfocus = "month", yfocus = "year", 
+                               xlab = "Month", ylab = "Year")
+cdi_month
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "The original step-plot from the influ package."}
 myInfl$stepPlot()
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "The new Bayesian step-plot from the influ2 package. Note that the new step-plot requires that all models/steps be run in brms before the function can be used."}
-fits <- list(fit0, fit1, fit2)
-plot_step(fits = fits, year = "Year")
+fits <- list(fit1, fit2)
+plot_step(fits = fits, year = "year")
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
-plot_index(fit2, year = "Year")
+plot_index(fit2, year = "year")
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
 myInfl$stanPlot()
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
-plot_influ(fit2, year = "Year")
+# plot_influ(fit2, year = "year")
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
 myInfl$influPlot()
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Comparison of influences produced by the original influ package (points) and the new influ2 calculation (lines)."}
-# i1 <- myInfl$influences %>%
-#   pivot_longer(cols = -level, names_to = "variable")
-# i_species <- get_influ(fit = fit2, group = c("Year", "Species")) %>%
+i1 <- myInfl$influences %>%
+  pivot_longer(cols = -level, names_to = "variable")
+# i_month <- get_influ(fit = fit1, group = c("year", "month")) %>%
 #   group_by(Year) %>%
 #   summarise(delta = mean(delta)) %>%
-#   mutate(variable = "Species")
-# i_duration <- get_influ(fit = fit2, group = c("Year", "Duration")) %>%
+#   mutate(variable = "month")
+# i_duration <- get_influ(fit = fit1, group = c("year", "Duration")) %>%
 #   group_by(Year) %>%
 #   summarise(delta = mean(delta)) %>%
 #   mutate(variable = "Duration")
@@ -169,28 +171,20 @@ myInfl$influPlot()
 
 # Functions for exploring models
 
-```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
-# Here I evaluate model fit using loo and waic. 
-# Other options include kfold, loo_subsample, bayes_R2, loo_R2 and marglik
-criterion <- c("loo", "waic", "bayes_R2")
-fit0 <- add_criterion(fit0, criterion = criterion)
-fit1 <- add_criterion(fit1, criterion = criterion)
-fit2 <- add_criterion(fit2, criterion = criterion)
-fit3 <- add_criterion(fit3, criterion = criterion)
-# fit4 <- add_criterion(fit4, criterion = criterion)
-# fit5 <- add_criterion(fit5, criterion = criterion)
-# fit6 <- add_criterion(fit6, criterion = criterion)
+Here I evaluate model fit using loo and waic. Other options include kfold, 
+loo_subsample, bayes_R2, loo_R2, and marglik.
 
-fit0$criteria$loo
-fit0$criteria$waic
+```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
+fit1$criteria$loo
+fit1$criteria$waic
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
-loo_compare(fit0, fit1, fit2, criterion = "loo") %>% kable(digits = 1)
+loo_compare(fit1, fit2, criterion = "loo") %>% kable(digits = 1)
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
-loo_compare(fit0, fit1, fit2, criterion = "waic") %>% kable(digits = 1)
+loo_compare(fit1, fit2, criterion = "waic") %>% kable(digits = 1)
 ```
 
 Bayesian R2 `bayes_R2` for the three model runs
@@ -199,78 +193,77 @@ Bayesian R2 `bayes_R2` for the three model runs
 get_bayes_R2(fits)
 ```
 
-```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "CDI plot for a linear variable."}
-plot_bayesian_cdi(fit = fit1, xfocus = "Duration", yfocus = "Year", xlab = "Duration")
-```
-
-```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "CDI plot for a polynomial."}
-plot_bayesian_cdi(fit = fit3, xfocus = "Duration", yfocus = "Year", xlab = "Duration")
+```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "CDI plot for a fixed effect."}
+cdi_month
 ```
 
-```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "CDI plot for a spline."}
-plot_bayesian_cdi(fit = fit4, xfocus = "Duration", yfocus = "Year", xlab = "Duration")
+```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "CDI plot for a random effect."}
+plot_bayesian_cdi(fit = fit2, xfocus = "month", yfocus = "year", 
+                  xlab = "Month", ylab = "Year")
 ```
 
-```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "CDI plot for a random-effect."}
-plot_bayesian_cdi(fit = fit5, xfocus = "Duration2", yfocus = "Year", xlab = "Duration")
+```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "CDI plot for a polynomial."}
+plot_bayesian_cdi(fit = fit1, xfocus = "soak", yfocus = "year", 
+                  xlab = "Soak time (hours)", ylab = "Year")
 ```
 
-```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "CDI plot for a fixed-effect."}
-plot_bayesian_cdi(fit = fit6, xfocus = "Duration2", yfocus = "Year", xlab = "Duration")
+```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "CDI plot for a spline."}
+plot_bayesian_cdi(fit = fit2, xfocus = "depth", yfocus = "year", 
+                  xlab = "Depth (m)", ylab = "Year")
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Comparison plot of all model runs."}
-fits <- list(fit1, fit3, fit4, fit5, fit6)
-
-plot_compare(fits = fits, 
-             labels = c("linear", "poly", "spline", "random-effect", "fixed-effect"), 
-             year = "Year")
+fits <- list(fit1, fit2)
+plot_compare(fits = fits, year = "year")
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Unstandardised versus standardised series."}
-plot_index(fit = fit4, year = "Year")
+plot_index(fit = fit1, year = "year")
 ```
 
 # Diagnostics
 
 The `influ2` package is based on the original `influ` package which was developed to generate influence plots. The `influ` package has functions that use outputs from the `glm` function. The `influ2` package was developed to use outputs from `brms` and relies heavily on the R packages `ggplot2` and `dplyr`. This vignette showcases the `influ2` package with a focus on model diagnostics including posterior predictive distributions, residuals, and quantile-quantile plots.
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Comparison of the empirical distribution of the data (y) to the distributions of simulated/replicated data (yrep) from the posterior predictive distribution."}
-yrep <- posterior_predict(fit4, nsamples = 100)
+yrep <- posterior_predict(fit1, ndraws = 100)
+
+ppc_dens_overlay(y = lobsters_per_pot$lobsters, yrep = yrep) + 
+  labs(x = "CPUE", y = "Density")
 
-ppc_dens_overlay(y = iris2$CPUE, yrep = yrep) + 
+ppc_bars(y = lobsters_per_pot$lobsters, yrep = yrep) + 
   labs(x = "CPUE", y = "Density")
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Posterior predictive check."}
-ppc_ecdf_overlay(y = iris2$CPUE, yrep = yrep) + 
+ppc_ecdf_overlay(y = lobsters_per_pot$lobsters, yrep = yrep, discrete = TRUE) + 
   coord_cartesian(xlim = c(0, 10))
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Quantile-quantile plot from LOO."}
-loo <- loo(fit4, save_psis = TRUE)
-psis <- loo$psis_object
-lw <- weights(psis)
-yrep <- posterior_predict(fit4)
-ppc_loo_pit_qq(y = iris2$CPUE, yrep = yrep, lw = lw)
+# loo <- loo(fit1, save_psis = TRUE)
+# psis <- loo$psis_object
+# lw <- weights(psis)
+# yrep <- posterior_predict(fit1)
+# ppc_loo_pit_qq(y = lobsters_per_pot$lobsters, yrep = yrep, lw = lw)
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Quantile-quantile plot."}
-plot_qq(fit = fit4)
+# plot_qq(fit = fit1)
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE, fig.cap = "Residuals and a loess smoother."}
-plot_predicted_residuals(fit = fit4, trend = "loess")
+plot_predicted_residuals(fit = fit1, trend = "loess")
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=10, message=FALSE, fig.cap = "Residuals and a linear fit by year."}
-plot_predicted_residuals(fit = fit4, trend = "lm") +
-  facet_wrap(Year ~ .)
+plot_predicted_residuals(fit = fit1, trend = "lm") +
+  facet_wrap(year ~ .)
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=10, message=FALSE, fig.cap = "Residuals and a loess smoother by year."}
 # A new style of residual plot
-fit <- fit4
+fit <- fit1
 
 # Extract predicted values
 pred <- fitted(fit) %>% data.frame()
@@ -280,12 +273,12 @@ names(pred) <- paste0("pred.", names(pred))
 resid <- residuals(fit) %>% data.frame()
 names(resid) <- paste0("resid.", names(resid))
 
-df <- cbind(resid, pred, iris2)
+df <- cbind(resid, pred, lobsters_per_pot)
 
-ggplot(data = df, aes(x = Year, y = .data$resid.Estimate)) +
+ggplot(data = df, aes(x = year, y = .data$resid.Estimate)) +
   geom_pointrange(aes(ymin = .data$resid.Q2.5, ymax = .data$resid.Q97.5), alpha = 0.75) +
-  facet_wrap(Area ~ Species) +
-    geom_hline(yintercept = 0, linetype = "dashed") +
+  facet_wrap(month ~ .) +
+  geom_hline(yintercept = 0, linetype = "dashed") +
   # geom_errorbarh(aes(xmax = .data$pred.Q2.5, xmin = .data$pred.Q97.5, height = 0), alpha = 0.75) +
   # labs(x = "Predicted values", y = "Residuals") +
   theme(axis.text.x = element_text(angle = 45, hjust = 1))
@@ -296,24 +289,26 @@ ggplot(data = df, aes(x = Year, y = .data$resid.Estimate)) +
 Implied coefficients (points) are calculated as the normalised fishing year coefficient (grey line) plus the mean of the standardised residuals in each year for each category of a variable. These values approximate the coefficients obtained when an area x year interaction term is fitted, particularly for those area x year combinations which have a substantial proportion of the records. The error bars indicate one standard error of the standardised residuals. The information at the top of each panel identifies the plotted category, provides the correlation coefficient (rho) between the category year index and the overall model index, and the number of records supporting the category.
 
 ```{r echo=TRUE, fig.height=6, fig.width=10, message=FALSE, fig.cap = "Implied residuals by year for the categorical variable Area which is not included in the model."}
-plot_implied_residuals(fit = fit2, data = iris2, 
-                       year = "Year", groups = "Area") +
+plot_implied_residuals(fit = fit2, data = lobsters_per_pot, 
+                       year = "year", groups = "month") +
   theme(axis.text.x = element_text(angle = 45, hjust = 1))
 ```
 
 ```{r echo=TRUE, fig.height=6, fig.width=10, message=FALSE, fig.cap = "Implied residuals by year for the categorical variable Sepcies which is included in the model."}
-plot_implied_residuals(fit = fit2, data = iris2, 
-                       year = "Year", groups = "Species") +
+plot_implied_residuals(fit = fit2, data = lobsters_per_pot, 
+                       year = "year", groups = "depth") +
   theme(axis.text.x = element_text(angle = 45, hjust = 1))
 ```
 
 # Functions for generating outputs for stock assessment
 
-Finally, in order to fit to the data in a stock assessment model then indices can be produced using the `get_index` function. The `Estimate` and `Est.Error` columns are what should be used in stock assessment.
+Finally, in order to fit to the data in a stock assessment model then indices 
+can be produced using the `get_index` function. The `Estimate` and `Est.Error` 
+columns are what should be used in stock assessment.
 
 ```{r echo=TRUE, fig.height=6, fig.width=6, message=FALSE}
-unstd <- get_unstandarsied(fit = fit2, year = "Year")
-cpue0 <- get_index(fit = fit2, year = "Year")
+unstd <- get_unstandarsied(fit = fit2, year = "year")
+cpue0 <- get_index(fit = fit2, year = "year")
 
 cpue0 %>%
   mutate(Unstandardised = unstd$Median) %>%

diff --git a/vignettes/m1.rds b/vignettes/m1.rds