## Chapter 3 R code. require(ggplot2) ## Tailor the following for your system: ## MPLS <- read.table("C:\\Mine\\MPLSdata.txt", header = TRUE, na.strings = "-99") ## MPLS <- read.table(file.choose(), header = TRUE, na.strings = "-99") ## MPLS <- read.table("C:\\Mine\\MPLSdata.txt", header = TRUE, na.strings = "-99", sep = ",") ## MPLS <- read.csv("C:\\Mine\\MPLSdata.txt", header = TRUE, na.strings = "-99") head(MPLS) tail(MPLS) MPLS[1:2,1:2] MPLS[1:5, ] str(MPLS) MPLS$ell2 <- factor(MPLS$ell, levels = c(0, 1), labels = c("No", "Yes")) str(MPLS) MPLS$riskC[MPLS$risk == "HHM"] <- "DADV" # Note "==" rather than "=" . MPLS$riskC[MPLS$risk == "POV"] <- "DADV" MPLS$riskC[MPLS$risk == "ADV"] <- "ADV" MPLS$risk2 <- factor(MPLS$riskC) str(MPLS) summary(MPLS) mymeans <- mean(MPLS[ ,2:5], na.rm = TRUE) mymeans mysds <- sd(MPLS[ ,2:5], na.rm = TRUE) mysds cor(MPLS[ ,2:5], use = "complete.obs") cov(MPLS[ ,2:5], use = "complete.obs") mymiss <- colMeans(is.na(MPLS[ ,2:5])) mymiss myrisk <- ddply(.data = MPLS[ ,2:5], .variables = .(MPLS$risk), .fun = mean, na.rm = TRUE) myrisk msplit <- ifelse(test = MPLS $att > median(MPLS$att), yes = 1, no = 0) msplit ddply(.data = MPLS[ ,2:5], .variables = .(msplit), .fun = mean, na.rm = TRUE) MPLS.L <- reshape(data = MPLS, varying = 2:5, v.names = "read", timevar = "grade", times = 5:8, idvar = "subid", direction = "long") head(MPLS.L, n = 10) MPLS.LS <- MPLS.L[order(MPLS.L$subid, MPLS.L$grade), ] head(MPLS.LS, n = 10) rownames(MPLS.LS) <- NULL MPLS.LS <- subset(MPLS.LS, select = -riskC) ## save(MPLS.LS, file = "C:/Mine/MPLS.LS.Rdata") ## Tailor to your system. ## load(file = "C:/Mine/MPLS.LS.Rdata") MPLS.W <- reshape(data = MPLS.LS, v.names = "read", idvar = "subid", drop = c("riskC", "ell2"), timevar = "grade", direction = "wide") MPLS.W ddply(.data = data.frame(MPLS.LS$read), .variables = .(grade = MPLS.LS$grade), each(read.mean = mean), na.rm = TRUE) mylist <- split(MPLS.LS$read, MPLS.LS$grade) myread <- do.call(cbind, mylist) colnames(myread) <- paste("grade.", as.character(5:8), sep = "") myread cov(myread, use = "pairwise.complete.obs") cor(myread, use = "pairwise.complete.obs") ddply(.data = data.frame(is.na(MPLS.LS$read)), .variables = .(grade = MPLS.LS$grade), each(prop.miss = mean)) mgrge <- ddply(data.frame(MPLS.LS$read), .(grade = MPLS.LS$grade, gender = MPLS.LS$gen), each(mean.read = mean), na.rm = TRUE) mgrge mgrat <- ddply(data.frame(MPLS.LS$read), .(grade = MPLS.LS $grade, att.range = cut_number(MPLS.LS$att, n = 2)), each(read.mean = mean), na.rm = TRUE) mgrat mgrat$att.range <- factor(mgrat$att.range, labels = c("Low", "High")) mgrat ddply(data.frame(is.na(MPLS.LS$read)), .(grade = MPLS.LS$grade, gender = MPLS.LS$gen), each(prop.miss = mean)) MPLS.LS3 <- subset(MPLS.LS, subid < 4, select = c(subid, read, grade, gen)) MPLS.LS3 MPLS.LS3 <- subset(MPLS.LS, subid < 4, select = c(subid, read, grade, gen)) omit1 <- na.omit(MPLS.LS3) omit1 MPLS.LS3a <- MPLS.LS3 MPLS.LS3a[1:4,4] <- NA MPLS.LS3a omit2 <- na.omit(MPLS.LS3a) omit2 set.seed(222) mydat <- data.frame(risk2 = c(rep("DADV", 500), rep("ADV", 500)), read.7 = sort(rnorm(1000, mean = 200, sd = 40))) set.seed(333) mydat$unif <- runif(1000, min = 0, max = 100) head(mydat) MCAR <- mydat # Copy data frame. MCAR$read.7m <- MCAR$read.7 # Copy complete variable. MCAR <- MCAR[order(MCAR$unif), ] # Sort by random numbers. MCAR$read.7m[1:250] <- NA # Assign NAs. wt <- mean(MCAR$read.7m, na.rm = TRUE) unwt <- ddply(data.frame(MCAR$read.7m), .(MCAR$risk2), each(Mean = mean), na.rm = TRUE) unwt <- mean(unwt$Mean) myresults1 <- data.frame(complete = mean(MCAR$read.7), wt.mean = wt, unwt.mean = unwt) rownames(myresults1) <- "MCAR" round(myresults1, 1) MAR <- mydat MAR$read.7m <- MAR$read.7 ## Sort by risk2 (decreasing) and unif (increasing). MAR <- MAR[order(MAR$risk2, -MAR$unif, decreasing = TRUE), ] MAR$read.7m[1:250] <- NA head(MAR) wt <- mean(MAR$read.7m, na.rm = TRUE) unwt <- ddply(data.frame(MAR$read.7m ), .(MAR$risk2), each(Mean = mean), na.rm = TRUE) unwt <- mean(unwt$Mean) myresults2 <- data.frame(complete = mean(MAR$read.7), wt.mean = wt, unwt.mean = unwt) rownames(myresults2) <- "MAR" round(myresults2, 1) NMAR <- mydat NMAR$read.7m <- NMAR$read.7 NMAR$read.7m[1:250] <- NA head(NMAR) wt <- mean(NMAR$read.7m, na.rm = TRUE) unwt <- ddply(data.frame(NMAR$read.7m), .(NMAR$risk2), each(Mean = mean), na.rm = TRUE) unwt <- mean(unwt$Mean) myresults3 <- data.frame(complete = mean(NMAR$read.7), wt.mean = wt, unwt.mean = unwt) rownames(myresults3) <- "NMAR" round(myresults3, 1) round(rbind(myresults1, myresults2, myresults3), 1) ## Tailor for your own system: ## MPLSc1 <- read.table("C:\\Mine\\MPLScomp1.txt", header = TRUE, na.strings = c("-99")) MPLSc1.L <- reshape(data = MPLSc1, varying = list(2:5, 6:9), idvar = "subid", v.names = c("read","math"), timevar = "grade", times = 5:8, direction = "long") MPLSc1.L <- MPLSc1.L[order(MPLSc1.L$subid, MPLSc1.L$grade), ] head(MPLSc1.L, n = 8) ## Tailor for your own system: ## MPLSc2 <- read.table("C:\\Mine\\MPLScomp2.txt", header = TRUE, na.strings = c("-99")) MPLSc2.L <- reshape(data = MPLSc2, varying = list(2:5, 6:9), idvar = "subid", v.names = c("read", "grade"), timevar = "waves", times = 1:4, direction = "long") MPLSc2.L <- MPLSc2.L[order(MPLSc2.L$subid, MPLSc2.L$grade), ] head(MPLSc2.L, n = 8) MPLSc2.W <- reshape(data = MPLSc2.L, v.names = "read", idvar = "subid", timevar = "grade", direction = "wide", drop = "waves") MPLSc2.W