-
Notifications
You must be signed in to change notification settings - Fork 0
/
analysis_04_imputed_data_wrangling.R
101 lines (86 loc) · 3.67 KB
/
analysis_04_imputed_data_wrangling.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# ---
# title: Imputed data wrangling
# author: Michelle María Early Capistrán
# email: [email protected]
# date: March 2021
# Script and data info:
# - This script creates dataframes that join all imputed values along with
# observed CPUE values used in analyses (mean values per year).
# - Multiply imputed data is generated by "~R/cons_mice_analysis.R" and
# "~R/cf_mice_analysis.R".
# - Observed data were obtained from ecological monitoring (1995-2018) in
# Bahía de los Ángeles, Baja California, Mexico by Dr. J. Seminoff, Grupo
# Tortuguero de Bahía de los Ángeles, and Comisión Nacional de Áreas
# Naturales Protegidas and LEK-derived data is from Early-Capistrán et al.
# (PeerJ, 2020)
# - - -
# Load libraries
library("devtools")
library("here")
# Load consLettersUtils package
devtools::load_all("consLettersUtils")
# Load data and prepare data -------------------------------------------------
# Load data
cmydas_data <- read.csv("data/cpue_data.csv", header=TRUE)
# Prepare a data frame with data from the "Conservation" phase for curve
# fitting. This will include the values from the Commercial Fishery "collapse"
# phase to interpolate values from 1983-1994.
cons_data_curve = cmydas_data %>%
# Include data Commercial fishery "collapse" stage
filter(stage>=4) %>%
# Remove na's for "collapse" (these are imputed with the other LEK data:
# "R/cf_mice_analysis.R")
filter(!(stage == 4 & is.na(cpue))) %>%
select(yearSerial, cpue)
# We'll set up another dataframe with monitoring values for scatterplots.
# This only includes data after 1983 for correct color mapping.
cons_data_sp = cmydas_data %>%
filter(stage==5) %>%
select(yearSerial, cpue)
# Prepare data for "Commercial Fishing" phase
cf_data = cmydas_data %>%
filter(type=="LEK") %>%
select(yearSerial,cpue)
# Load multiply imputed datasets
cons_mice_data <- readRDS("results/cons_mice_data.rds")
cf_mice_data <- readRDS("results/cf_mice_data.rds")
# Prepare data for plotting ---------------------------------------------------
# Stack all imputed values and store as a data frame with indicator
cf_imp_long <- pivotImp(cf_mice_data, cf_data, "yearSerial", "cpue") %>%
mutate(type = "imputed")
# For monitoring, set "data" argument to "cons_data_curve", as these were the
# data used for multiple imputation
cons_imp_long <- pivotImp(cons_mice_data, cons_data_curve,
"yearSerial", "cpue") %>%
mutate(type = "imputed")
# Get mean imputed values for each year and add indicator
cons_imp_means <- getMeanImp(cons_mice_data, cons_data_curve,
"yearSerial", "cpue") %>%
mutate(type = "imputed_means")
cf_imp_means <- getMeanImp(cf_mice_data, cf_data,
"yearSerial", "cpue") %>%
mutate(type = "imputed_means")
# Add an indicator for data type to observed values
cons_data_sp <- cons_data_sp %>%
mutate(type = "observed")
cf_data <- cf_data %>%
mutate(type = "observed")
# Make a dataframe with all imputed and observed values
cf_all_data <- cf_imp_long %>%
select(yearSerial, cpue, type) %>%
full_join(cf_imp_means) %>%
full_join(cf_data) %>%
mutate(dataSource = "LEK") %>%
na.omit()
cons_all_data <- cons_imp_long %>%
select(yearSerial, cpue, type) %>%
full_join(cons_imp_means) %>%
full_join(cons_data_sp) %>% # Scatterplot for correct color mapping
mutate(dataSource = "Monitoring") %>%
na.omit()
all_data <- cf_all_data %>%
full_join(cons_all_data) %>%
mutate(dataSource = as.factor(dataSource),
type = as.factor(type))
# Export in RDS format to facilitate plotting
obs_imp_data <- saveRDS(all_data, "results/obs_imp_data")