#################################################################################### # L3 - VALIDITY # # NMST570 # # Last update: 12/10/2018 # #################################################################################### # Slides available at http://www.cs.cas.cz/martinkova/documents/NMST570_L3slides.pdf # HW assignment available at http://www.cs.cas.cz/drabinova/documents/NMST570_HW3.pdf #################################################################################### # Ex. 2.1. ######################################################################### #################################################################################### # load data load("test_theory_practice.RData") # examine dataset head(data) summary(data) attach(data) #----------------------------------------------------------------------------------- # 2.1.1 # How many examinees passed theoretical test? # What is the pass rate of the theoretical test? sum(score_theory >= 52) # 1500 sum(score_theory >= 52)/length(score_theory) # 0.6654836 #----------------------------------------------------------------------------------- # 2.1.2 # How many examinees passed practical test? # What is the pass rate of the practical test inrestricted sample # (i.e., examinees who passed theoretical test)? sum(score_practice == 5, na.rm = T) # 1039 sum(score_practice == 5, na.rm = T)/length(score_practice[score_theory >= 52]) # 0.6926667 #----------------------------------------------------------------------------------- # 2.1.3 # Calculate observed calculation between scores of theoretical and practicle tests # in restricted sample (i.e., examinees who passed theoretical test) (rxy <- cor(score_theory[score_theory >= 52], score_practice[score_theory >= 52])) # alternatively # (rxy <- cor(score_theory, score_practice, use = "complete.obs")) # 0.609573 #----------------------------------------------------------------------------------- # 2.1.4 # Use formula formula to correct of restriction of range ### restricted sd sx <- sd(score_theory[score_theory >= 52]) ### unrestricted sd SX <- sd(score_theory) ### formula (rXY <- SX * rxy / (SX^2 * rxy^2 + sx^2 - sx^2 * rxy^2)^(1/2)) # 0.8686929 #################################################################################### # Ex. 2.2. ######################################################################### #################################################################################### #----------------------------------------------------------------------------------- # 2.2.1 # Using R create script to generate X and Y of sample size 1000. Use set.seed(123) # for reproducibility. set.seed(123) N <- 1000 X <- rnorm(N, 5, 1) Y <- 2*X - 1 + rnorm(N, 0, 2) plot(X, Y, xlim = c(2, 8), ylim = c(0, 20)) # theoretical correlation is 2/sqrt(8) = 0.7071068 #----------------------------------------------------------------------------------- # 2.2.2 # Calculate estimate of correlation between generated scores X and Y . Compare to # theoretical correlation. cor(X, Y) # 0.731456 #----------------------------------------------------------------------------------- # 2.2.3 # Consider only those respondents who are # at least in 85th percentile # at most in 15th percentile # at least in 15th percentile but at most in 85th percentile # at most in 15th percentile or at least in 85th percentile # Recalculate the estimate of correlation, apply formula for correction of restriction of the range, compare # results and briefy comment. #---------------- ### at least in 85th percentile x <- X[X >= quantile(X, probs = 0.85)] y <- Y[X >= quantile(X, probs = 0.85)] plot(x, y, xlim = c(2, 8), ylim = c(0, 20)) ### correlation in restricted sample before correction (rxy <- cor(x, y)) # 0.5263605 ### applying correction sx <- sd(x) SX <- sd(X) SX * rxy / (SX^2 * rxy^2 + sx^2 - sx^2 * rxy^2)^(1/2) # 0.8002039 #---------------- ### at most in 15th percentile x <- X[X <= quantile(X, probs = 0.15)] y <- Y[X <= quantile(X, probs = 0.15)] plot(x, y, xlim = c(2, 8), ylim = c(0, 20)) ### correlation in restricted sample before correction (rxy <- cor(x, y)) # 0.5647845 ### applying correction sx <- sd(x) SX <- sd(X) SX * rxy / (SX^2 * rxy^2 + sx^2 - sx^2 * rxy^2)^(1/2) # 0.8671302 #---------------- ### at least in 15th percentile but at most in 85th percentile x <- X[X >= quantile(X, probs = 0.15) & X <= quantile(X, probs = 0.85)] y <- Y[X >= quantile(X, probs = 0.15) & X <= quantile(X, probs = 0.85)] plot(x, y, xlim = c(2, 8), ylim = c(0, 20)) ### correlation in restricted sample before correction (rxy <- cor(x, y)) # 0.5200838 ### applying correction sx <- sd(x) SX <- sd(X) SX * rxy / (SX^2 * rxy^2 + sx^2 - sx^2 * rxy^2)^(1/2) # 0.7444003 #---------------- ### at most in 15th percentile or at least in 85th percentile x <- X[X <= quantile(X, probs = 0.15) | X >= quantile(X, probs = 0.85)] y <- Y[X <= quantile(X, probs = 0.15) | X >= quantile(X, probs = 0.85)] plot(x, y, xlim = c(2, 8), ylim = c(0, 20)) ### correlation in restricted sample before correction (rxy <- cor(x, y)) # 0.8667387 ### applying correction sx <- sd(x) SX <- sd(X) SX * rxy / (SX^2 * rxy^2 + sx^2 - sx^2 * rxy^2)^(1/2) # 0.7302225