3.10-91

alexanderrobitzsch · Apr 2, 2021 · 0612b1b · 0612b1b
1 parent 213afad
commit 0612b1b
Show file tree

Hide file tree

Showing 24 changed files with 383 additions and 181 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: sirt
 Type: Package
 Title: Supplementary Item Response Theory Models
-Version: 3.10-79
-Date: 2021-03-09 13:01:29
+Version: 3.10-91
+Date: 2021-04-02 14:53:21
 Author: Alexander Robitzsch [aut,cre] (<https://orcid.org/0000-0002-8226-3132>)
 Maintainer: Alexander Robitzsch <[email protected]>
 Description: 

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -1,5 +1,5 @@
 ## File Name: RcppExports.R
-## File Version: 3.010079
+## File Version: 3.010091
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 

diff --git a/R/data.prep.R b/R/data.prep.R
@@ -1,5 +1,5 @@
 ## File Name: data.prep.R
-## File Version: 1.144
+## File Version: 1.145
 
 #----- data preparations for rasch.jml and rasch.mml
 data.prep <- function( dat, weights=NULL, use.freqpatt=TRUE,
@@ -56,16 +56,6 @@ data.prep <- function( dat, weights=NULL, use.freqpatt=TRUE,
                 "freq.patt"=freq.patt, "I"=I, "n"=n, "dat9"=dat.9 )
 }
 
-#*******************
-    #   OUTPUT:
-    #   dat         ... original data
-    #   dat2        ... reduced original data. Each different item resonse is represented by one row.
-    #   dat2.resp   ... indicator response matrix for each item response pattern
-    #   dat1        ...     --- ADD AN EXPLANATION HERE     ----
-    #   freq.patt   ... absolute frequency of each item response pattern
-    #   I           ... number of items
-    #   n           ... number of subjects
-    #   dat9        ... This is the original data exact from the fact that missings are recoded by 9.
 
 .data.prep <- data.prep
 
@@ -82,9 +72,6 @@ data.prep <- function( dat, weights=NULL, use.freqpatt=TRUE,
 }
 
 
-
-
-
 #-- Function for calculation of a response pattern
 #   for dichotomous responses
 resp.pattern2 <- function(x)

diff --git a/R/dif.strata.variance.R b/R/dif.strata.variance.R
@@ -0,0 +1,37 @@
+## File Name: dif.strata.variance.R
+## File Version: 0.14
+
+
+
+#* stratified DIF variance
+dif.strata.variance <- function( dif, se.dif, itemcluster )
+{
+    # stratified DIF variance
+    # means in differential item functioning
+    # itemcluster is a vector of strata corresponding to items
+    stratadif <- stats::aggregate( 1+0*dif, list(itemcluster), sum, na.rm=TRUE )
+    colnames(stratadif) <- c("strata", "N.Items" )
+    stratadif <- data.frame(stratadif)
+    stratadif$M.DIF <- stats::aggregate( dif, list(itemcluster), mean, na.rm=TRUE )[,2]
+    # DIF in strata
+    SS <- nrow(stratadif)
+    for (ss in 1:SS){
+        items.ss <- which( itemcluster==stratadif[ss,"strata"]  )
+        dif.ss <- dif[ items.ss ]
+        difv.ss <- dif.variance( dif=dif.ss, se.dif=se.dif[ items.ss ] )
+        stratadif$weighted.tau[ss] <- difv.ss$weighted.DIFSD
+        stratadif$unweighted.tau[ss] <- difv.ss$unweighted.DIFSD
+    }
+    stratadif[ is.na(stratadif ) ] <- 0
+
+    sd_ni1 <- stratadif$N.Items-1
+    weighted.DIFSD <- sum(stratadif$N.Items/sum(stratadif$N.Items)*stratadif$weighted.tau)
+    unweighted.DIFSD <- sum( sd_ni1/ (sum(stratadif$N.Items)-1)*stratadif$unweighted.tau)
+
+    #-- output
+    res <- list( stratadif=stratadif, weighted.DIFSD=weighted.DIFSD,
+                    unweighted.DIFSD=unweighted.DIFSD)
+    return(res)
+}
+
+
diff --git a/R/dif.variance.R b/R/dif.variance.R
@@ -1,61 +1,33 @@
 ## File Name: dif.variance.R
-## File Version: 0.18
+## File Version: 0.193
 
 
-#-------------------------------------------------------------------------#
-# Routine for calculating DIF variance   (Camilli & Penfield, 1997)      #
-dif.variance <- function( dif, se.dif, items=paste("item",1:length(dif),sep="") ){
-        # calculating the weights
-        wi <-  1/se.dif^2
-        # mean DIF
-        md <- mean(dif)
-        # weighted tau2 estimator
-        # formula (5) of PENFIELD & ALGINA (2006)
-        weighted.tauq <- sum( wi^2 * ( dif - md )^2 - wi  ) / sum( wi^2 )
-        # unweighted tau2 estimator
-        # formula (5) of PENFIELD & ALGINA (2006)
-        unweighted.tauq <- sum( ( dif - md )^2 - se.dif^2 ) / ( length(items) - 1 )
-        # calculation of variances v_i
-        vi <- weighted.tauq + se.dif^2
-        weighted.tauq[ weighted.tauq < 0 ] <- 0
-        unweighted.tauq[ unweighted.tauq < 0 ] <- 0
-        # Empirical Bayes DIF estimate
-        lambda.i <- weighted.tauq / vi
-        eb.dif <- lambda.i * ( dif - md ) + ( 1 - lambda.i) *md
-        list( weighted.DIFSD=sqrt(weighted.tauq),
-                unweighted.DIFSD=sqrt(unweighted.tauq),
-                mean.se.dif=sqrt( mean( se.dif^2 ) ), eb.dif=eb.dif  )
-        }
-#-------------------------------------------------------------------------#
 
+#*** Routine for calculating DIF variance   (Camilli & Penfield, 1997)      #
+dif.variance <- function( dif, se.dif, items=paste("item",1:length(dif),sep="") )
+{
+    # calculating the weights
+    wi <-  1/se.dif^2
+    # mean DIF
+    md <- mean(dif)
+    # weighted tau2 estimator
+    # formula (5) of PENFIELD & ALGINA (2006)
+    weighted.tauq <- sum( wi^2 * ( dif - md )^2 - wi  ) / sum( wi^2 )
+    # unweighted tau2 estimator
+    # formula (5) of PENFIELD & ALGINA (2006)
+    unweighted.tauq <- sum( ( dif - md )^2 - se.dif^2 ) / ( length(items) - 1 )
+    # calculation of variances v_i
+    vi <- weighted.tauq + se.dif^2
+    weighted.tauq[ weighted.tauq < 0 ] <- 0
+    unweighted.tauq[ unweighted.tauq < 0 ] <- 0
+    # Empirical Bayes DIF estimate
+    lambda.i <- weighted.tauq / vi
+    eb.dif <- lambda.i * ( dif - md ) + ( 1 - lambda.i) *md
 
-#################################################################################
-dif.strata.variance <- function( dif, se.dif, itemcluster ){
-    # stratified DIF variance
-    # means in differential item functioning
-    # itemcluster is a vector of strata corresponding to items
-    stratadif <- stats::aggregate( 1+0*dif, list( itemcluster ), sum, na.rm=T )
-    colnames(stratadif) <- c("strata", "N.Items" )
-    stratadif <- data.frame(stratadif)
-    stratadif$M.DIF <- stats::aggregate( dif, list( itemcluster ), mean, na.rm=T )[,2]
-    # DIF in strata
-    SS <- nrow(stratadif)
-    for (ss in 1:SS){
-        # ss <- 1
-        items.ss <- which( itemcluster==stratadif[ss,"strata"]  )
-        dif.ss <- dif[ items.ss ]
-        difv.ss <- dif.variance( dif=dif.ss, se.dif=se.dif[ items.ss ] )
-        stratadif$weighted.tau[ss] <- difv.ss$weighted.DIFSD
-        stratadif$unweighted.tau[ss] <- difv.ss$unweighted.DIFSD
-                }
-        stratadif[ is.na(stratadif ) ] <- 0
-    res <- list( "stratadif"=stratadif,
-                "weighted.DIFSD"=sum( stratadif$N.Items /  sum( stratadif$N.Items ) *
-                            stratadif$weighted.tau ),
-                "unweighted.DIFSD"=sum(
-                    ( stratadif$N.Items -1 )/  ( sum( stratadif$N.Items ) - 1) *  stratadif$unweighted.tau )
-                        )
+    #* output
+    res <- list( weighted.DIFSD=sqrt(weighted.tauq),
+                unweighted.DIFSD=sqrt(unweighted.tauq),
+                mean.se.dif=sqrt( mean( se.dif^2 ) ), eb.dif=eb.dif  )
     return(res)
-    }
-#################################################################################
+}
 
diff --git a/R/fuzcluster.R b/R/fuzcluster.R
@@ -1,45 +1,42 @@
 ## File Name: fuzcluster.R
-## File Version: 0.15
+## File Version: 0.163
 
-#*********************************************
-# Clustering for continuous fuzzy data
+
+#*** Clustering for continuous fuzzy data
 fuzcluster <- function(dat_m, dat_s, K=2, nstarts=7,
-        seed=NULL, maxiter=100, parmconv=.001,
-        fac.oldxsi=0.75, progress=TRUE ){
+            seed=NULL, maxiter=100, parmconv=.001, fac.oldxsi=0.75, progress=TRUE )
+{
     s1 <- Sys.time()
-    dev0 <- 10^(200)
+    dev0 <- 1e200
     dat_resp <- 1 - is.na(dat_m)
-    if ( ! is.null(seed) ){ nstarts <- 1 }
+    if ( ! is.null(seed) ){
+        nstarts <- 1
+    }
     for ( rr in 1:nstarts ){
         res1 <- fuzcluster_estimate(K, dat_m, dat_s, dat_resp,
-            maxiter=maxiter, parmconv=parmconv, progress=progress,
-            seed=seed, fac.oldxsi=fac.oldxsi)
+                    maxiter=maxiter, parmconv=parmconv, progress=progress,
+                    seed=seed, fac.oldxsi=fac.oldxsi)
         if ( res1$deviance < dev0 ){
             res <- res1
             dev0 <- res1$deviance
-            }
-            }
+        }
+    }
     s2 <- Sys.time()
     ### end random starts
     ### computation of information criteria
     dev <- res$deviance
-    ic <- list( "deviance"=dev, "n"=nrow(dat_m) )
+    ic <- list( deviance=dev, n=nrow(dat_m) )
     I <- ncol(dat_m)
     ic$np <- (K-1) + 2*K*I
-    # AIC
-        ic$AIC <- dev + 2*ic$np
-        # BIC
-        ic$BIC <- dev + ( log(ic$n) )*ic$np
-        # CAIC (conistent AIC)
-        ic$CAIC <- dev + ( log(ic$n) + 1 )*ic$np
-        # corrected AIC
-        ic$AICc <- ic$AIC + 2*ic$np * ( ic$np + 1 ) / ( ic$n - ic$np - 1 )
+    ic$AIC <- dev + 2*ic$np
+    ic$BIC <- dev + ( log(ic$n) )*ic$np
+    ic$CAIC <- dev + ( log(ic$n) + 1 )*ic$np
+    ic$AICc <- ic$AIC + 2*ic$np * ( ic$np + 1 ) / ( ic$n - ic$np - 1 )
     res$ic <- ic
     res$K <- K
     res$s1 <- s1
     res$s2 <- s2
     res$nstarts <- nstarts
     class(res) <- "fuzcluster"
-    ###
     return(res)
-    }
+}
diff --git a/R/fuzcluster_alg.R → R/fuzcluster_estimate.R b/R/fuzcluster_alg.R → R/fuzcluster_estimate.R
@@ -1,8 +1,9 @@
-## File Name: fuzcluster_alg.R
-## File Version: 0.25
+## File Name: fuzcluster_estimate.R
+## File Version: 0.265
+
 
 fuzcluster_estimate <- function(K, dat_m, dat_s, dat_resp,
-    maxiter=1000, parmconv=.0001, progress=TRUE, seed=NULL,    fac.oldxsi=0)
+        maxiter=1000, parmconv=.0001, progress=TRUE, seed=NULL, fac.oldxsi=0)
 {
 
     dat.resp <- dat_resp
@@ -24,11 +25,10 @@ fuzcluster_estimate <- function(K, dat_m, dat_s, dat_resp,
                 stats::runif( I, 1.05*s1, 2*s1 ) } ))
     # initial pi estimate
     pi_est <- stats::runif(K)
-#    pi_est <- rep(1/K, K )
     pi_est <- pi_est / sum( pi_est )
     dev <- 0
     iter <- 0
-    eps <- 10^(-10)
+    eps <- 1e-10
     parmchange <- 1
 
     mu_estmin <- mu_est
@@ -37,7 +37,7 @@ fuzcluster_estimate <- function(K, dat_m, dat_s, dat_resp,
     dev_min <- 1E90
     itermin <- iter
     if (progress){
-       cat("****************************************************************\n")
+        cat("****************************************************************\n")
     }
     #*************************************************
     # begin algorithm
@@ -48,7 +48,6 @@ fuzcluster_estimate <- function(K, dat_m, dat_s, dat_resp,
         dev0 <- dev
 
         #*** E-step
-
         # matrix inits
         t_ik <- matrix( 0, nrow=N, ncol=K)
         phi_ik <- xi_ik <- sig2_ik <- mu_ik <- array( 0, dim=c(N,I,K) )
@@ -79,66 +78,61 @@ fuzcluster_estimate <- function(K, dat_m, dat_s, dat_resp,
 
         # pi estimate
         pi_est <- colMeans(t_ik )+eps
-#        pi_est[ pi_est < .001 ] <- .001
         pi_est <- pi_est / sum( pi_est )
         if ( fac.oldxsi > 0 ){
-            # fac.oldxsi <- .75
             pi_est <- (1-fac.oldxsi) * pi_est + fac.oldxsi *pi_est0
             mu_est <- (1-fac.oldxsi) * mu_est + fac.oldxsi *mu_est0
             sd_est <- (1-fac.oldxsi) * sd_est + fac.oldxsi *sd_est0
-                    }
+        }
         # mu estimate
         for (kk in 1:K){
-            # kk <- 1
             mu_est[kk,] <- colSums( mu_ik[,,kk] * t_ik[,kk] ) / sum_tik[kk]
-                        }
+        }
 
         # sd estimate
         for (kk in 1:K){
-            # kk <- 1
             mu_k <- matrix( mu_est[kk,], nrow=N, ncol=I, byrow=TRUE )
             h1 <- colSums( t_ik[,kk] * ( xi_ik[,,kk] - 2*mu_k*mu_ik[,,kk] + mu_k^2 ) ) / sum_tik[kk]
             h1 <- ifelse( h1 < eps, eps, sqrt(h1) )
             sd_est[kk,] <- h1
-                        }
+        }
         sd_est[ sd_est < .01 ] <- .01
         # calculate deviance
         dev <- sum( colSums( t_ik ) * log( pi_est +eps) )
         dev <- dev - N*I / 2 * log(2*pi)
         for (kk in 1:K){
             dev <- dev - sum( t_ik[,kk] * sum( log( sd_est[kk,] + eps) ) )
-                }
+        }
         for (kk in 1:K){
             sd2_k <- matrix( sd_est[kk,]^2, nrow=N, ncol=I, byrow=TRUE )
             mu_k <- matrix( mu_est[kk,], nrow=N, ncol=I, byrow=TRUE )
-            dev <- dev - .5 * sum( t_ik[,kk] * rowSums( ( xi_ik[,,kk] - 2*mu_k*mu_ik[,,kk] + mu_k^2 ) / (sd2_k+eps) ) )
-                        }
+            dev <- dev - .5 * sum( t_ik[,kk] * rowSums( ( xi_ik[,,kk] -
+                                    2*mu_k*mu_ik[,,kk] + mu_k^2 ) / (sd2_k+eps) ) )
+        }
         dev <- -2*dev
         iter <- iter+1
         devchange <- abs( dev - dev0 )
         devchange1 <- - ( dev - dev0 )
         parmchange <- max( abs( mu_est - mu_est0 ), abs( sd_est - sd_est0) )
         if (progress){
-           cat("............ seed=", seed, " | Iteration",iter, ".............\n")
-           cat(paste("Deviance=", round( dev,3) ))
-           cat(paste(" | Deviance Change=", round(devchange1,3), "\n"))
-           cat("Class Probabilities=", paste(round( pi_est,4) ))
-           cat(paste(" | Maximum Parameter Change=", round(parmchange,3),"\n"))
-           utils::flush.console()
-                }
+            cat("............ seed=", seed, " | Iteration",iter, ".............\n")
+            cat(paste("Deviance=", round( dev,3) ))
+            cat(paste(" | Deviance Change=", round(devchange1,3), "\n"))
+            cat("Class Probabilities=", paste(round( pi_est,4) ))
+            cat(paste(" | Maximum Parameter Change=", round(parmchange,3),"\n"))
+            utils::flush.console()
+        }
         if ( dev < dev_min){
             t_ikmin <- t_ik
             mu_estmin <- mu_est
             sd_estmin <- sd_est
             pi_estmin <- pi_est
             itermin <- iter
             dev_min <- dev
-            }
+        }
     }
-    #*************** end algorithm
-    res <- list( "deviance"=dev_min, "iter"=itermin, "pi_est"=pi_estmin,
-        "mu_est"=mu_estmin, "sd_est"=sd_estmin, "posterior"=t_ikmin,
-        "seed"=seed
-         )
+    #** end algorithm
+    res <- list( deviance=dev_min, iter=itermin, pi_est=pi_estmin,
+        mu_est=mu_estmin, sd_est=sd_estmin, posterior=t_ikmin, seed=seed )
     return(res)
 }