Merge branch 'release-2.1.0'

tmatta · Jun 23, 2021 · 2c18508 · 2c18508
2 parents 14a6608 + 12632ff
commit 2c18508
Show file tree

Hide file tree

Showing 106 changed files with 6,458 additions and 252 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -1,11 +1,12 @@
-^.*\.Rproj$
-^\.Rproj\.user$
-^NEWS\.md$
-^README\.md$
-^dev\.R$
-^\.travis\.yml$
-^cran-comments\.md$
-^desktop\.ini$
-gendat3*
-.vscode
-doc
+^.*\.Rproj$
+^\.Rproj\.user$
+^NEWS\.md$
+^README\.md$
+^dev\.R$
+^\.travis\.yml$
+^cran-comments\.md$
+^desktop\.ini$
+gendat3*
+.vscode
+^Meta$
+^doc$
diff --git a/.gitignore b/.gitignore
@@ -15,6 +15,7 @@ archive/
 jss/
 .Rproj.user/
 .vscode/
+
 #-----------------#
 #--- TeX stuff ---#
 #-----------------#
@@ -66,4 +67,6 @@ desktop.ini
 
 # Test files
 gendat3*
-doc
+inst/doc
+doc
+Meta
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,7 @@
 Package: lsasim
 Title: Functions to Facilitate the Simulation of Large Scale Assessment Data
-Version: 2.0.2
+Version: 2.1.0
+Date: 2021-06-22
 Authors@R: c(
   person("Tyler", "Matta",
          email = "[email protected]", role = "aut"),
@@ -13,18 +14,27 @@ Authors@R: c(
   person("Kondwani Kajera", "Mughogho",
          email = "[email protected]", role = "ctb"),
   person("Waldir", "Leoncio",
-         email = "[email protected]", role = c("aut", "cre"))
+         email = "[email protected]", role = c("aut", "cre")),
+  person("Sinan", "Yavuz", role = "ctb"),
+  person("Paul", "Bailey", role = "ctb")
   )
-Maintainer: Waldir Leoncio <[email protected]>
 BugReports: https://github.com/tmatta/lsasim/issues
 Description: Provides functions to simulate data from large-scale educational
   assessments, including background questionnaire data and cognitive item
-  responses that adhere to a multiple-matrix sampled design.
-Imports: mvtnorm
+  responses that adhere to a multiple-matrix sampled design. The theoretical
+  foundation can be found on
+  Matta, T.H., Rutkowski, L., Rutkowski, D. et al. (2018)
+  <doi:10.1186/s40536-018-0068-8>.
+Imports: mvtnorm, cli, methods, polycor
 Depends:
-    R (>= 3.3.0)
+    R (>= 3.6.0)
 License: GPL-3
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.1.1
-Suggests: testthat, polycor
+Suggests:
+    testthat,
+    knitr,
+    formatR,
+    rmarkdown
+VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,18 +1,39 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(anova,lsasimcluster)
+S3method(summary,lsasimcluster)
 export(beta_gen)
 export(block_design)
 export(booklet_design)
 export(booklet_sample)
+export(brr)
+export(calc_replicate_weights)
+export(cluster_gen)
+export(cluster_gen_separate)
+export(cluster_gen_together)
+export(convert_vector_to_list)
 export(cor_gen)
 export(cov_gen)
+export(draw_cluster_structure)
 export(gen_variable_n)
+export(intraclass_cor)
 export(irt_gen)
 export(item_gen)
+export(jackknife)
+export(label_respondents)
 export(proportion_gen)
 export(pt_bis_conversion)
 export(questionnaire_gen)
+export(ranges)
+export(recalc_final_weights)
+export(replicate_var)
 export(response_gen)
+export(sample_from)
+export(select)
+export(trim_sample)
+importFrom(methods,is)
+importFrom(stats,anova)
+importFrom(stats,complete.cases)
 importFrom(stats,cov)
 importFrom(stats,cov2cor)
 importFrom(stats,dnorm)
@@ -23,9 +44,12 @@ importFrom(stats,qnorm)
 importFrom(stats,quantile)
 importFrom(stats,rbeta)
 importFrom(stats,rbinom)
+importFrom(stats,rchisq)
 importFrom(stats,reshape)
 importFrom(stats,rgamma)
 importFrom(stats,rnorm)
 importFrom(stats,rpois)
 importFrom(stats,runif)
+importFrom(stats,sd)
 importFrom(stats,setNames)
+importFrom(stats,weighted.mean)
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,29 @@
+lsasim 2.1.0
+------------
+
+### API changes
+
+* Adds `cluster_gen`, a function to generate background questionnaires (with `questionnaire_gen`) in a cluster sampling structure.
+* Package description now includes a literary reference for the theoretical framework behind the package (issue #38, pull request #42)
+
+### Bug fixes
+
+* Fixes item parameter generation by `irt_gen` when the guesssing parameter is larger than zero (issue #40)
+
+lsasim 2.0.2
+------------
+
+### Bug fixes
+
+* Solves [Issue #11](https://github.com/tmatta/lsasim/issues/11), which was causing `item_gen()` to produce out-of-bounds item difficulties in some situations.
+
+lsasim 2.0.1
+------------
+
+### Bug fixes
+
+* Makes lsasim compliant with changes to be introduced in the next major release of R. lsasim 2.0.1 is functionally identical to 2.0.0, as all changes relate to how `if(class(x)) == "matrix"` statements are written. Specifically, such statements were changed to `if(class(x))[1] == "matrix"` to accomodate for cases where `x` may be a matrix with complex classes such as `"matrix" "array"`.
+
 lsasim 2.0.0
 ------------
 

diff --git a/R/anova.R b/R/anova.R
@@ -0,0 +1,88 @@
+#' @title Generate an ANOVA table for LSASIM clusters
+#' @description Prints Analysis of Variance table for `cluster_gen` output.
+#' @param object list output of `cluster_gen`
+#' @param print if `TRUE`, output will be a list containing estimators; if `FALSE` (default), output are formatted tables of this information
+#' @param calc.se if `TRUE`, will try to calculate the standard error of the intreaclass correlation
+#' @param ... additional objects of the same type (see `help("anova")` for details)
+#' @return Printed ANOVA table or list of parameters
+#' @note  If the rhos for different levels are varied in scale, the generated rho will be less accurate.
+#' @references Snijders, T. A. B., & Bosker, R. J. (1999). Multilevel Analysis. Sage Publications.
+#' @importFrom stats anova
+#' @method anova lsasimcluster
+#' @export
+anova.lsasimcluster <- function(object, print = TRUE, calc.se = TRUE, ...) {
+    # Wrap data in a list (for !separate_questionnaires) =======================
+    if (all(sapply(object, class) != "list")) {
+        data <- list(object)
+        names(data) <- gsub("[0-9]", "", names(data[[1]])[1])
+    } else {
+        data <- object
+    }
+
+    # Create summary statistics ================================================
+    data_summary <- summary.lsasimcluster(data, print = "none")
+
+    # Create other relevant input ==============================================
+    out_complete <- list()
+    for (n in names(data)) {
+        ds <- data_summary[[n]]
+        if (ds$N == 1) {
+            message("There is only one group of ", pluralize(n),
+                    ". Skipping ANOVA for that level.")
+        } else {
+            ## Sample statistics -----------------------------------------------
+            s2_within <- calc_var_within(ds$n_j, ds$s2_j, ds$M, ds$N)
+            s2_between <- calc_var_between(ds$n_j, ds$y_bar_j, ds$y_bar, ds$n_tilde, ds$N)
+            s2_total <- calc_var_tot(ds$M, ds$N, ds$n_tilde, s2_within, s2_between)
+            out <- list(sample_statistics = c(s2_within = s2_within,
+                                            s2_between = s2_between,
+                                            s2_total = s2_total))
+
+            ## Population parameters and ANOVA table ---------------------------
+            sigma2_hat <- s2_within
+            X <- names(sigma2_hat)
+            for (x in X) {
+                tau2_hat <- max(s2_between[x] - s2_within[x] / ds$n_tilde, 0)
+                rho_hat <-  intraclass_cor(tau2_hat, sigma2_hat[x])
+                if (stats::var(ds$n_j) == 0) {
+                    if (calc.se) {
+                        se_rho <- calc_se_rho(rho_hat, ds$n_j, ds$N)
+                    } else {
+                        se_rho <- NA
+                    }
+                } else {
+                    se_rho <- NA
+                    if (calc.se) {
+                        warning(
+                            "SE not yet implemented for different ",
+                            "sample sizes. You can set calc.se=FALSE ",
+                            "to get rid of this message."
+                        )
+                    }
+                }
+                out$population_estimates[[x]] <- c(sigma2_hat = sigma2_hat[x],
+                                                tau2_hat = tau2_hat,
+                                                rho_hat = rho_hat,
+                                                se_rho = se_rho)
+
+                ### ANOVA table ................................................
+                if (print) {
+                    message("\nANOVA table for ", pluralize(n), ", ", x)
+                    print_anova(s2_within[x], s2_between[x], s2_total[x],
+                                    sigma2_hat[x], tau2_hat, rho_hat, se_rho,
+                                    ds$n_tilde, ds$M, ds$N)
+                }
+            }
+            if (print & sapply(data, class)[1] == "list" &
+                (n != names(data)[length(names(data))])) cli::cat_rule()
+            out_complete[[n]] <- out
+        }
+    }
+    if (!print) {
+        if (length(out_complete) > 1) {
+            return(out_complete)
+        } else {
+            return(out)
+        }
+    }
+}
diff --git a/R/attribute_cluster_labels.R b/R/attribute_cluster_labels.R
@@ -0,0 +1,27 @@
+#' @title Attribute Labels in Hierarchical Structure
+#' @description Attributes cluster and respondent labels in the context of `cluster_gen`.
+#' @seealso cluster_gen
+#' @param n numeric vector or list
+#' @return list containing appropriate labels for the clusters and their respondents
+attribute_cluster_labels <- function(n)
+{
+    if (!is.null(names(n))) {
+        cluster_labels <- names(n)
+        resp_labels <- names(n)[-1]
+    } else {
+        # General lists for all level ranges up to 5 ---------------------------
+        cluster_labels <- list(c("school"),
+                               c("class", "school"),
+                               c("class", "school", "state"),
+                               c("class", "school", "city", "state"))
+        resp_labels    <- list(c("student", "teacher"),
+                               c("student", "teacher", "principal"),
+                               c("student", "teacher", "principal", "mayor"),
+                               c("student", "teacher", "principal", "governor"))
+
+        # Filtering the labels above according to length(n) --------------------
+        cluster_labels <- rev(cluster_labels[[length(n) - 1]])
+        resp_labels <- rev(resp_labels[[length(n) - 1]])[-1]
+    }
+    return(list(cl = cluster_labels, resp = resp_labels))
+}
diff --git a/R/beta_gen.R b/R/beta_gen.R
@@ -20,6 +20,13 @@
 #'   \code{questionnaire_gen}, when \code{family = "gaussian"}, \code{theta =
 #'   TRUE}, and \code{full_output = TRUE}. However, it can also be directly
 #'   called by the user so they can perform further analysis.
+#'   
+#'   This function primarily calculates the true regression coefficients 
+#'   (\eqn{\beta}) for the linear influence of the background questionnaire 
+#'   variables in \eqn{\theta}. From a statistical perspective, this 
+#'   relationship can be modeled as follows, where \eqn{E(\theta | \boldsymbol{X}, \boldsymbol{W})}{E(\theta | X, W)} is the expectation of \eqn{\theta} given \eqn{\boldsymbol{X} = \{X_1, \ldots, X_P\}}{X = {X_1, ..., X_P}} and \eqn{\boldsymbol{W} = \{W_1, \ldots, W_Q\}}{W = {W_1, ..., W_Q}}:
+#'   
+#'   \deqn{E(\theta | \boldsymbol{X}, \boldsymbol{W}) = \beta_0 + \sum_{p = 1}^P \beta_p X_p + \sum_{q = 1}^Q \beta_{P + q} W_q}{E(theta | X, W) = b_0 + \sum_{p = 1}^P b_p X_p + \sum_{q = 1}^Q b_{P + q} W_q}
 #'
 #'   The regression coefficients are calculated using the true covariance matrix
 #'   either provided by the user upon calling of \code{questionnaire_gen} or
@@ -56,6 +63,7 @@
 #'   the first one, \code{betas}, will contain the same output described in the
 #'   previous paragraph. The second one, called \code{vcov_YXW}, contains
 #'   the covariance matrix of the regression coefficients.
+#' @note The equation in this page is best formatted in PDF. We recommend issuing `help("beta_gen", help_type = "PDF")` in your terminal and opening the `beta_gen.pdf` file generated in your working directly. You may also set `help_type = "HTML"`, but the equations will have degraded formatting.
 #' @seealso questionnaire_gen
 #' @export
 #' @examples