####################################################################################
#                          L10 - DIF and simulation study                          #
#                                    NMST570                                       #
#                             Last update: 10/12/2018                              #
####################################################################################

# Slides available at http://www.cs.cas.cz/martinkova/NMST570.html

####################################################################################
# Loading packages #################################################################
####################################################################################

library(difNLR)
library(difR)
library(ltm)

####################################################################################
# Generate data ####################################################################
####################################################################################

# item parameters
# first column for reference group, second column for focal group
# when columns equal, no DIF
a <- rep(seq(0.8, 1.2, 0.1), each = 3); (a <- cbind(a, a))
b <- rep(c(-1, 0, 1), 5); (b <- cbind(b, b))
c <- rep(0, 15); (c <- cbind(c, c))
d <- rep(1, 15); (d <- cbind(d, d))

# introduce DIF
### here we introduce uniform DIF (i.e., in parameter b) for item 1
### we change the value of b in the second columns
b[1, 2] <- b[1, 2] + 1
### we save DIF items into trueDIF
trueDIF <- 1

# sample size
N <- 1000
# number of items
I <- nrow(a)

# always use set.seed() for reproducibility
set.seed(1234)
# generating data
x <- genNLR(N = N, itemtype = "dich", a = a, b = b, c = c, d = d)
head(x)

data  <- x[, colnames(x) != "group"]
group <- x$group

####################################################################################
# DIF detection methods ############################################################
####################################################################################

# save DIFitems output

### Mantel-Haenszel test
(DIFMH <- difMH(data, group, focal.name = 1)$DIFitems)

### logistic regression
(DIFLR <- difLogistic(data, group, focal.name = 1)$DIFitems)

### SIBTEST
(DIFSibtest <- difSIBTEST(data, group, focal.name = 1)$DIFitems)

### Lord's test
(DIFLord <- difLord(data, group, focal.name = 1, model = "2PL")$DIFitems)
  
### Raju's test
(DIFRaju <- difRaju(data, group, focal.name = 1, model = "2PL")$DIFitems)

####################################################################################
# Evaluation #######################################################################
####################################################################################

# we need to calculate whether DIF detection methods:
### correctly identify trueDIF item as DIF (TRUE POSSITIVE)
### correctly identify non DIF items as non DIF (TRUE NEGATIVE)
### incorrectly marked trueDIF item as non DIF (FALSE NEGATIVE)
### incorrectly marked non DIF items as DIF (FALSE POSSITIVE)

# for this, we can use TFPN() function 
TFPN <- function(DIF, trueDIF, n){
  # true possitive
  TP <- length(intersect(trueDIF, DIF))
  
  # true negative
  TN <- length(intersect(setdiff(1:n, trueDIF), setdiff(1:n, DIF)))
    
  # false negative - type II error
  FN <- length(setdiff(trueDIF, DIF))
  
  # false possitive - type I error
  FP <- length(setdiff(DIF, trueDIF))
  
  # output
  res <- data.frame(TP = TP, TN = TN, FN = FN, FP = FP)
  return(res)
}

# and use it as follows:
TFPN(DIFMH, trueDIF, n = I)
TFPN(DIFLR, trueDIF, n = I)
TFPN(DIFSibtest, trueDIF, n = I)
TFPN(DIFLord, trueDIF, n = I)
TFPN(DIFRaju, trueDIF, n = I)

####################################################################################
# Simulation #######################################################################
####################################################################################

#---------------------------------------------
# Run the simulation
#---------------------------------------------

# set number of simulation runs
numSim <- 10

# always use set.seed() for reproducibility
set.seed(1234)

# preparing for saving results
res <- matrix(0, nrow = 5, ncol = 4)

# running simulation
for (i in 1:numSim){
  # generating data
  x <- genNLR(N = N, itemtype = "dich", a = a, b = b, c = c, d = d)
  
  data  <- x[, colnames(x) != "group"]
  group <- x$group
  
  # DIF detection methods
  ### Mantel-Haenszel test
  (DIFMH <- difMH(data, group, focal.name = 1)$DIFitems)
  
  ### logistic regression
  (DIFLR <- difLogistic(data, group, focal.name = 1)$DIFitems)
  
  ### SIBTEST
  (DIFSibtest <- difSIBTEST(data, group, focal.name = 1)$DIFitems)
  
  ### Lord's test
  (DIFLord <- difLord(data, group, focal.name = 1, model = "2PL")$DIFitems)
  
  ### Raju's test
  (DIFRaju <- difRaju(data, group, focal.name = 1, model = "2PL")$DIFitems)
  
  # saving results
  res <- res + rbind(TFPN(DIFMH, trueDIF, n = I),
                     TFPN(DIFLR, trueDIF, n = I),
                     TFPN(DIFSibtest, trueDIF, n = I),
                     TFPN(DIFLord, trueDIF, n = I),
                     TFPN(DIFRaju, trueDIF, n = I))
}

# print results
rownames(res) <- c("MH", "LR", "SIBTEST", "Lord", "Raju")
res

#---------------------------------------------
# Evaluate the simulation
#---------------------------------------------

# calculate power = probability of rejecting H0 when H1 is true
# TP / (TP + FN)
power <- res$TP / (res$TP + res$FN)
names(power) <- c("MH", "LR", "SIBTEST", "Lord", "Raju")
power

# calculate type I error = probability of rejecting H0 when H0 is true
# FP / (FP + TN)
tie <- res$FP / (res$FP + res$TN)
names(tie) <- c("MH", "LR", "SIBTEST", "Lord", "Raju")
tie


####################################################################################
# Examples #########################################################################
####################################################################################

#---------------------------------------------
# Effect of item purification in MH
#---------------------------------------------

# always use set.seed() for reproducibility
set.seed(1234)

# set number of simulations
numSim <- 100

# preparing for saving results
res <- matrix(0, nrow = 2, ncol = 4)

# running simulation
for (i in 1:numSim){
  # generating data
  x <- genNLR(N = N, itemtype = "dich", a = a, b = b, c = c, d = d)
  
  data  <- x[, colnames(x) != "group"]
  group <- x$group
  
  # DIF detection methods
  ### Mantel-Haenszel test - without purification
  (DIFMH1 <- difMH(data, group, focal.name = 1)$DIFitems)
  ### Mantel-Haenszel test - with purification
  (DIFMH2 <- difMH(data, group, focal.name = 1, purify = T, nrIter = 20)$DIFitems)

  # saving results
  res <- res + rbind(TFPN(DIFMH1, trueDIF, n = I),
                     TFPN(DIFMH2, trueDIF, n = I))
}

# print results
rownames(res) <- c("MH - without purification", "MH - with purification")
res

# power
res$TP / (res$TP + res$FN)
# type I error
res$FP / (res$FP + res$TN)

#---------------------------------------------
# Effect of sample size in LR
#---------------------------------------------

# always use set.seed() for reproducibility
set.seed(1234)

# set number of simulations
numSim <- 100

# preparing for saving results
res <- matrix(0, nrow = 4, ncol = 4)
j <- 0

# running simulation
for (N in c(100, 200, 500, 1000)){
  j <- j + 1
  for (i in 1:numSim){
    # generating data
    x <- genNLR(N = N, itemtype = "dich", a = a, b = b, c = c, d = d)
    
    data  <- x[, colnames(x) != "group"]
    group <- x$group
    
    # DIF detection methods
    ### logistic regression
    (DIFLR <- difLogistic(data, group, focal.name = 1)$DIFitems)
    
    # saving results
    res[j, ] <- res[j, ] + unlist(TFPN(DIFLR, trueDIF, n = I))
  }
}

# print results
rownames(res) <- c("LR 100", "LR 200", "LR 500", "LR 1000")
colnames(res) <- c("TP", "TN", "FN", "FP")
(res <- as.data.frame(res))

# power
(power <- res$TP / (res$TP + res$FN))

plot(power ~ c(100, 200, 500, 1000), 
     type = "b",
     xlab = "Sample size", ylab = "Power")
abline(h = 0.8, col = "red", lty = 2)

# type I error
(tie <- res$FP / (res$FP + res$TN))

plot(tie ~ c(100, 200, 500, 1000), 
     type = "b",
     xlab = "Sample size", ylab = "Type I error",
     ylim = c(0, 0.1))
abline(h = 0.05, col = "red", lty = 2)

#---------------------------------------------
# Uniform vs non-uniform DIF in MH
#---------------------------------------------

# item parameters
a1 <- rep(seq(0.8, 1.2, 0.1), each = 3); (a1 <- cbind(a1, a1))
b1 <- rep(c(-1, 0, 1), 5); (b1 <- cbind(b1, b1))
c1 <- rep(0, 15); (c1 <- cbind(c1, c1))
d1 <- rep(1, 15); (d1 <- cbind(d1, d1))

# set similar DIF effect size
### introduce non-uniform DIF
a2 <- a1; c2 <- c1; d2 <- d1
b2 <- b1
a2[1, 2] <- a2[1, 2] + 0.424

### introduce uniform DIF
b1[1, 2] <- b1[1, 2] + 0.6

### checking area between characteristic curves
area <- function(a1, b1, a2, b2){
  if (a1 == a2){
    b2 - b1
  } else {
    abs((2*(a2 - a1) / (a1 * a2)) * log(1 + exp(a1 * a2 * (b2 - b1) / (a2 - a1))) - (b2 - b1))
  }
}

area(a1[1, 1], b1[1, 1], a1[1, 2], b1[1, 2])
area(a2[1, 1], b2[1, 1], a2[1, 2], b2[1, 2])

### we save DIF items into trueDIF
trueDIF <- 1

# sample size
N <- 500
# number of items
I <- nrow(a)

# preparing for saving results
res <- matrix(0, nrow = 2, ncol = 4)

# always use set.seed() for reproducibility
set.seed(1234)

# running simulation
for (i in 1:numSim){
  # generating data with uniform DIF
  x1 <- genNLR(N = N, itemtype = "dich", a = a1, b = b1, c = c1, d = d1)
  
  data1  <- x1[, colnames(x) != "group"]
  group1 <- x1$group
  
  ### Mantel-Haenszel test
  (DIFMH1 <- difMH(data1, group1, focal.name = 1)$DIFitems)
  
  # generating data with non-uniform DIF
  x2 <- genNLR(N = N, itemtype = "dich", a = a2, b = b2, c = c2, d = d2)
  
  data2  <- x2[, colnames(x) != "group"]
  group2 <- x2$group
  
  ### Mantel-Haenszel test
  (DIFMH2 <- difMH(data2, group2, focal.name = 1)$DIFitems)
  
  # saving results
  res <- res + rbind(TFPN(DIFMH1, trueDIF, n = I),
                     TFPN(DIFMH2, trueDIF, n = I))
}
# print results
rownames(res) <- c("MH - uniform DIF", "MH - non-uniform DIF")
res

# power
res$TP / (res$TP + res$FN)
# type I error
res$FP / (res$FP + res$TN)


####################################################################################
# Try by yourself ##################################################################
####################################################################################

#---------------------------------------------
# Effect of correction method for multiple comparison
#---------------------------------------------

# Create simulation study to evaluate effect of DIF effect size

# Prepare 1 set of parameters with 1 DIF item:
a <- rep(seq(0.8, 1.2, 0.1), each = 3); (a <- cbind(a, a))
b <- rep(c(-1, 0, 1), 5); (b <- cbind(b, b))
c <- rep(0, 15); (c <- cbind(c, c))
d <- rep(1, 15); (d <- cbind(d, d))

b1[1, 2] <- b1[1, 2] + 1 

# in each run of the simulation study, create 1 dataset
# and use 1 DIF detection method - first, with no correction method,
# second, with choosen correction method via argument p.adjust.method
# save results

# evaluate results - calculate power and type I error

#---------------------------------------------
# DIF effect size
#---------------------------------------------

# Create simulation study to evaluate effect of DIF effect size

# Prepare 3 sets of parameters with 1 uniform DIF item:
a <- rep(seq(0.8, 1.2, 0.1), each = 3); (a <- cbind(a, a))
b <- rep(c(-1, 0, 1), 5); (b <- cbind(b, b))
c <- rep(0, 15); (c <- cbind(c, c))
d <- rep(1, 15); (d <- cbind(d, d))

a1 <- a2 <- a3 <- a
b1 <- b2 <- b3 <- b
c1 <- c2 <- c3 <- c
d1 <- d2 <- d3 <- d

# first set with difference in difficulty between groups = 0.5
b1[1, 2] 
# first set with difference in difficulty between groups = 1
b2[1, 2] 
# first set with difference in difficulty between groups = 1.5
b3[1, 2] 


# in each run of the simulation study, create 3 different datasets
# and use one DIF detection method for each dataset
# save results

# evaluate results - calculate power and type I error