#################################################################################### # L10 - DIF and simulation study # # NMST570 # # Last update: 10/12/2018 # #################################################################################### # Slides available at http://www.cs.cas.cz/martinkova/NMST570.html #################################################################################### # Loading packages ################################################################# #################################################################################### library(difNLR) library(difR) library(ltm) #################################################################################### # Generate data #################################################################### #################################################################################### # item parameters # first column for reference group, second column for focal group # when columns equal, no DIF a <- rep(seq(0.8, 1.2, 0.1), each = 3); (a <- cbind(a, a)) b <- rep(c(-1, 0, 1), 5); (b <- cbind(b, b)) c <- rep(0, 15); (c <- cbind(c, c)) d <- rep(1, 15); (d <- cbind(d, d)) # introduce DIF ### here we introduce uniform DIF (i.e., in parameter b) for item 1 ### we change the value of b in the second columns b[1, 2] <- b[1, 2] + 1 ### we save DIF items into trueDIF trueDIF <- 1 # sample size N <- 1000 # number of items I <- nrow(a) # always use set.seed() for reproducibility set.seed(1234) # generating data x <- genNLR(N = N, itemtype = "dich", a = a, b = b, c = c, d = d) head(x) data <- x[, colnames(x) != "group"] group <- x$group #################################################################################### # DIF detection methods ############################################################ #################################################################################### # save DIFitems output ### Mantel-Haenszel test (DIFMH <- difMH(data, group, focal.name = 1)$DIFitems) ### logistic regression (DIFLR <- difLogistic(data, group, focal.name = 1)$DIFitems) ### SIBTEST (DIFSibtest <- difSIBTEST(data, group, focal.name = 1)$DIFitems) ### Lord's test (DIFLord <- difLord(data, group, focal.name = 1, model = "2PL")$DIFitems) ### Raju's test (DIFRaju <- difRaju(data, group, focal.name = 1, model = "2PL")$DIFitems) #################################################################################### # Evaluation ####################################################################### #################################################################################### # we need to calculate whether DIF detection methods: ### correctly identify trueDIF item as DIF (TRUE POSSITIVE) ### correctly identify non DIF items as non DIF (TRUE NEGATIVE) ### incorrectly marked trueDIF item as non DIF (FALSE NEGATIVE) ### incorrectly marked non DIF items as DIF (FALSE POSSITIVE) # for this, we can use TFPN() function TFPN <- function(DIF, trueDIF, n){ # true possitive TP <- length(intersect(trueDIF, DIF)) # true negative TN <- length(intersect(setdiff(1:n, trueDIF), setdiff(1:n, DIF))) # false negative - type II error FN <- length(setdiff(trueDIF, DIF)) # false possitive - type I error FP <- length(setdiff(DIF, trueDIF)) # output res <- data.frame(TP = TP, TN = TN, FN = FN, FP = FP) return(res) } # and use it as follows: TFPN(DIFMH, trueDIF, n = I) TFPN(DIFLR, trueDIF, n = I) TFPN(DIFSibtest, trueDIF, n = I) TFPN(DIFLord, trueDIF, n = I) TFPN(DIFRaju, trueDIF, n = I) #################################################################################### # Simulation ####################################################################### #################################################################################### #--------------------------------------------- # Run the simulation #--------------------------------------------- # set number of simulation runs numSim <- 10 # always use set.seed() for reproducibility set.seed(1234) # preparing for saving results res <- matrix(0, nrow = 5, ncol = 4) # running simulation for (i in 1:numSim){ # generating data x <- genNLR(N = N, itemtype = "dich", a = a, b = b, c = c, d = d) data <- x[, colnames(x) != "group"] group <- x$group # DIF detection methods ### Mantel-Haenszel test (DIFMH <- difMH(data, group, focal.name = 1)$DIFitems) ### logistic regression (DIFLR <- difLogistic(data, group, focal.name = 1)$DIFitems) ### SIBTEST (DIFSibtest <- difSIBTEST(data, group, focal.name = 1)$DIFitems) ### Lord's test (DIFLord <- difLord(data, group, focal.name = 1, model = "2PL")$DIFitems) ### Raju's test (DIFRaju <- difRaju(data, group, focal.name = 1, model = "2PL")$DIFitems) # saving results res <- res + rbind(TFPN(DIFMH, trueDIF, n = I), TFPN(DIFLR, trueDIF, n = I), TFPN(DIFSibtest, trueDIF, n = I), TFPN(DIFLord, trueDIF, n = I), TFPN(DIFRaju, trueDIF, n = I)) } # print results rownames(res) <- c("MH", "LR", "SIBTEST", "Lord", "Raju") res #--------------------------------------------- # Evaluate the simulation #--------------------------------------------- # calculate power = probability of rejecting H0 when H1 is true # TP / (TP + FN) power <- res$TP / (res$TP + res$FN) names(power) <- c("MH", "LR", "SIBTEST", "Lord", "Raju") power # calculate type I error = probability of rejecting H0 when H0 is true # FP / (FP + TN) tie <- res$FP / (res$FP + res$TN) names(tie) <- c("MH", "LR", "SIBTEST", "Lord", "Raju") tie #################################################################################### # Examples ######################################################################### #################################################################################### #--------------------------------------------- # Effect of item purification in MH #--------------------------------------------- # always use set.seed() for reproducibility set.seed(1234) # set number of simulations numSim <- 100 # preparing for saving results res <- matrix(0, nrow = 2, ncol = 4) # running simulation for (i in 1:numSim){ # generating data x <- genNLR(N = N, itemtype = "dich", a = a, b = b, c = c, d = d) data <- x[, colnames(x) != "group"] group <- x$group # DIF detection methods ### Mantel-Haenszel test - without purification (DIFMH1 <- difMH(data, group, focal.name = 1)$DIFitems) ### Mantel-Haenszel test - with purification (DIFMH2 <- difMH(data, group, focal.name = 1, purify = T, nrIter = 20)$DIFitems) # saving results res <- res + rbind(TFPN(DIFMH1, trueDIF, n = I), TFPN(DIFMH2, trueDIF, n = I)) } # print results rownames(res) <- c("MH - without purification", "MH - with purification") res # power res$TP / (res$TP + res$FN) # type I error res$FP / (res$FP + res$TN) #--------------------------------------------- # Effect of sample size in LR #--------------------------------------------- # always use set.seed() for reproducibility set.seed(1234) # set number of simulations numSim <- 100 # preparing for saving results res <- matrix(0, nrow = 4, ncol = 4) j <- 0 # running simulation for (N in c(100, 200, 500, 1000)){ j <- j + 1 for (i in 1:numSim){ # generating data x <- genNLR(N = N, itemtype = "dich", a = a, b = b, c = c, d = d) data <- x[, colnames(x) != "group"] group <- x$group # DIF detection methods ### logistic regression (DIFLR <- difLogistic(data, group, focal.name = 1)$DIFitems) # saving results res[j, ] <- res[j, ] + unlist(TFPN(DIFLR, trueDIF, n = I)) } } # print results rownames(res) <- c("LR 100", "LR 200", "LR 500", "LR 1000") colnames(res) <- c("TP", "TN", "FN", "FP") (res <- as.data.frame(res)) # power (power <- res$TP / (res$TP + res$FN)) plot(power ~ c(100, 200, 500, 1000), type = "b", xlab = "Sample size", ylab = "Power") abline(h = 0.8, col = "red", lty = 2) # type I error (tie <- res$FP / (res$FP + res$TN)) plot(tie ~ c(100, 200, 500, 1000), type = "b", xlab = "Sample size", ylab = "Type I error", ylim = c(0, 0.1)) abline(h = 0.05, col = "red", lty = 2) #--------------------------------------------- # Uniform vs non-uniform DIF in MH #--------------------------------------------- # item parameters a1 <- rep(seq(0.8, 1.2, 0.1), each = 3); (a1 <- cbind(a1, a1)) b1 <- rep(c(-1, 0, 1), 5); (b1 <- cbind(b1, b1)) c1 <- rep(0, 15); (c1 <- cbind(c1, c1)) d1 <- rep(1, 15); (d1 <- cbind(d1, d1)) # set similar DIF effect size ### introduce non-uniform DIF a2 <- a1; c2 <- c1; d2 <- d1 b2 <- b1 a2[1, 2] <- a2[1, 2] + 0.424 ### introduce uniform DIF b1[1, 2] <- b1[1, 2] + 0.6 ### checking area between characteristic curves area <- function(a1, b1, a2, b2){ if (a1 == a2){ b2 - b1 } else { abs((2*(a2 - a1) / (a1 * a2)) * log(1 + exp(a1 * a2 * (b2 - b1) / (a2 - a1))) - (b2 - b1)) } } area(a1[1, 1], b1[1, 1], a1[1, 2], b1[1, 2]) area(a2[1, 1], b2[1, 1], a2[1, 2], b2[1, 2]) ### we save DIF items into trueDIF trueDIF <- 1 # sample size N <- 500 # number of items I <- nrow(a) # preparing for saving results res <- matrix(0, nrow = 2, ncol = 4) # always use set.seed() for reproducibility set.seed(1234) # running simulation for (i in 1:numSim){ # generating data with uniform DIF x1 <- genNLR(N = N, itemtype = "dich", a = a1, b = b1, c = c1, d = d1) data1 <- x1[, colnames(x) != "group"] group1 <- x1$group ### Mantel-Haenszel test (DIFMH1 <- difMH(data1, group1, focal.name = 1)$DIFitems) # generating data with non-uniform DIF x2 <- genNLR(N = N, itemtype = "dich", a = a2, b = b2, c = c2, d = d2) data2 <- x2[, colnames(x) != "group"] group2 <- x2$group ### Mantel-Haenszel test (DIFMH2 <- difMH(data2, group2, focal.name = 1)$DIFitems) # saving results res <- res + rbind(TFPN(DIFMH1, trueDIF, n = I), TFPN(DIFMH2, trueDIF, n = I)) } # print results rownames(res) <- c("MH - uniform DIF", "MH - non-uniform DIF") res # power res$TP / (res$TP + res$FN) # type I error res$FP / (res$FP + res$TN) #################################################################################### # Try by yourself ################################################################## #################################################################################### #--------------------------------------------- # Effect of correction method for multiple comparison #--------------------------------------------- # Create simulation study to evaluate effect of DIF effect size # Prepare 1 set of parameters with 1 DIF item: a <- rep(seq(0.8, 1.2, 0.1), each = 3); (a <- cbind(a, a)) b <- rep(c(-1, 0, 1), 5); (b <- cbind(b, b)) c <- rep(0, 15); (c <- cbind(c, c)) d <- rep(1, 15); (d <- cbind(d, d)) b1[1, 2] <- b1[1, 2] + 1 # in each run of the simulation study, create 1 dataset # and use 1 DIF detection method - first, with no correction method, # second, with choosen correction method via argument p.adjust.method # save results # evaluate results - calculate power and type I error #--------------------------------------------- # DIF effect size #--------------------------------------------- # Create simulation study to evaluate effect of DIF effect size # Prepare 3 sets of parameters with 1 uniform DIF item: a <- rep(seq(0.8, 1.2, 0.1), each = 3); (a <- cbind(a, a)) b <- rep(c(-1, 0, 1), 5); (b <- cbind(b, b)) c <- rep(0, 15); (c <- cbind(c, c)) d <- rep(1, 15); (d <- cbind(d, d)) a1 <- a2 <- a3 <- a b1 <- b2 <- b3 <- b c1 <- c2 <- c3 <- c d1 <- d2 <- d3 <- d # first set with difference in difficulty between groups = 0.5 b1[1, 2] # first set with difference in difficulty between groups = 1 b2[1, 2] # first set with difference in difficulty between groups = 1.5 b3[1, 2] # in each run of the simulation study, create 3 different datasets # and use one DIF detection method for each dataset # save results # evaluate results - calculate power and type I error