## Chapter 4 R code. ## Tailor to fit your system: ## load("C:/Mine/MPLS.LS.Rdata") require(ggplot2) g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read , group = subid)) + geom_line() print(g1) g1 <- ggplot(data = MPLS.LS , aes(x = grade, y = read, group = subid)) + geom_line () g2 <- g1 + theme_bw() + scale_x_continuous(breaks = 5:8, name = "Grade") g3 <- g2 + scale_y_continuous(name = "Reading Score") print(g3) theme_set(theme_bw()) myX <- scale_x_continuous(breaks = 5:8, name = "Grade") myY <- scale_y_continuous(name = " Reading Score ") ## png(filename = "C:/Mine/graph1.png") # Tailor to your system. g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read, group = subid)) + geom_line () print(g1) graphics.off() g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read, group = subid)) g2 <- g1 + geom_line() + geom_point() + facet_wrap(~ subid) + myX + myY print(g2) g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read, group = subid)) g2 <- g1 + geom_line() + geom_point() + myX + myY g3 <- g2 + facet_wrap(~ subid, nrow = 2, as.table = FALSE) print(g3) mysub6 <- subset(MPLS.LS, subid < 7) ## Allows reproduction of results (omit in practice). set.seed(123) ## Select random subset. mysub4 <- subset(MPLS.LS, subid %in% sample(unique(MPLS.LS$subid), size = 4)) ## Graph random subset. g1 <- ggplot(data = mysub4, aes(x = grade, y = read, group = subid)) + geom_line() g2 <- g1 + geom_point() + facet_wrap(~ subid) + myX + myY print(g2) g1 <- ggplot(data = mysub6, aes(x = grade, y = read, group = subid)) + geom_point() g2 <- g1 + stat_smooth(method = "lm", se = FALSE) + facet_wrap(~ subid) + myX + myY print(g2) g1 <- ggplot(data = mysub6, aes(x = grade, y = read, group = subid)) + geom_point() g2 <- g1 + stat_smooth(method = "lm", se = FALSE, formula = y ~ poly(x, 2)) g3 <- g2 + facet_wrap(~ subid) + myX + myY print(g3) mysel <- ddply(.data = data.frame(as.numeric(is.na(MPLS.LS$read))), .variables = .(subid = MPLS.LS$subid), each(missing = sum)) mysel ## Select subjects with no missing values. myids <- with(mysel, subid[missing == 0]) ## Create subset data frame. mysub.comp <- subset(MPLS.LS, subid %in% myids) head(mysub.comp) MPLS.LS2 <- merge(MPLS.LS, mysel, by = "subid") head(MPLS.LS2) MPLS.LS2$missing.f <- factor(MPLS.LS2$missing, labels = c("Not Missing", "Missing")) g1 <- ggplot(MPLS.LS2, aes(x = grade, y = read, group = subid)) + geom_point() g2 <- g1 + geom_line() + facet_grid(. ~ missing.f) + myX print(g2) g1 <- ggplot(data = mysub6, aes(x = grade, y = read, group = subid)) + geom_point() g2 <- g1 + stat_smooth(se = FALSE) + facet_wrap(~ subid) + myX + myY print(g2) g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read)) + geom_point(shape = 1) + myX + myY g2 <- g1 + stat_summary(fun.y = mean, geom = "line", lwd = 1.5, linetype = 5) g3 <- g2 + stat_summary(fun.y = mean, geom = "point", size=3, shape = 19) print(g3) ## Jitter grade. MPLS.LS$jgrade <- jitter(MPLS.LS$grade) ## Print variables. with(MPLS.LS, head(cbind(subid, grade, jgrade), n = 8)) g1 <- ggplot(data = MPLS.LS, aes(x = jgrade, y = read)) + geom_point() print(g1) g2 <- g1 + stat_summary(fun.y = mean, geom = "line") print(g2) g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read)) + geom_point(shape = 1) g2 <- g1 + stat_smooth(method = "lm", se = FALSE, lwd = 2) + myX + myY g3 <- g2 + stat_summary(fun.y = "mean", geom = "point", size = 4, shape = 19) g4 <- g3 + opts("aspect.ratio" = 1) print(g4) g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read)) + geom_point(shape = 1) + myX + myY g2 <- g1 + stat_smooth(method = "lm", formula = y ~ poly(x,2), se = FALSE, lwd = 2) g3 <- g2 + stat_summary(fun.y = "mean", geom = "point", size = 4, shape = 19) g4 <- g3 + opts("aspect.ratio" = 1) print(g4) g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read)) + geom_point(shape = 1) + myX + myY g2 <- g1 + stat_summary(fun.y = "mean", geom = "point", size = 4, shape = 19) g3 <- g2 + stat_smooth(se = FALSE, lwd = 2) print(g3) g3 <- g2 + stat_smooth(se = FALSE, lwd = 2, span = .5) print(g3) g3 <- g2 + stat_smooth(se = FALSE, lwd = 2, span = .9) print(g3) g1 <- ggplot(data = MPLS.LS, aes(x = jgrade, y = read)) + geom_point() g2 <- g1 + stat_smooth(se = FALSE, lwd = 2) + myX + myY print(g2) g1 <- ggplot(data = mysub6, aes(x = grade, y = read, group = subid)) + geom_line() g2 <- g1 + stat_summary(aes(data = MPLS.LS, group = 1), fun.y = "mean", geom = "line", lwd = 2) g3 <- g2 + stat_summary(aes(data = MPLS.LS, group = 1), fun.y = "mean", geom = "point", size = 4) + myX + myY print(g3) g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read, line = risk2)) + geom_point() g2 <- g1 + stat_summary(fun.y = mean, aes(line = risk2), geom="line") + myX + myY g3 <- g2 + scale_shape_manual(values = c(1, 19)) print(g3) ## New variable. MPLS.LS$Risk <- MPLS.LS$risk2 levels(MPLS.LS$Risk) <- c("Advantaged", "Disadvantaged") ## Custom graph. g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read, shape = Risk)) + geom_point() g2 <- g1 + stat_summary(fun.y = mean, aes(line = Risk), geom = "line") + myX + myY g3 <- g2 + stat_summary(aes(shape = Risk), fun.y = mean, geom = "point", size = 3) g4 <- g3 + opts(legend.position = c(.7, .22), legend.background = theme_rect()) g5 <- g4 + scale_shape_manual(values = c(1, 19)) print(g5) ## Dichotomous ethnicity. MPLS.LS$eth2 <- factor(ifelse(MPLS.LS$eth == "Whi", yes = "W", no = "NW")) ## Save the data frame. ## save(MPLS.LS, file="C:/Mine/MPLS.LS.Rdata") ## Tailor to your system. ## Cross-tabulation. with(MPLS.LS[MPLS.LS$grade == 5, ], table(gen, eth2)) g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read)) + myX + myY g2 <- g1 + stat_summary(fun.y = mean, aes(line = gen : eth2), geom = "line") g3 <- g2 + stat_summary(fun.y = mean, aes(shape = gen : eth2), geom = "point", size = 3) g4 <- g3 + opts(legend.position = c(.55, .35), legend.background = theme_rect()) print(g4) g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read, group = subid)) + geom_line() g2 <- g1 + stat_summary(fun.y = mean, aes(group = 1), geom = "line", lwd = 3) g3 <- g2 + facet_grid(. ~ gen) + myX + myY print(g3) ## Make copy of gen2. MPLS.LS$gen2 <- MPLS.LS$gen levels(MPLS.LS$gen2) <- c("Female", "Male") ## New graph. g3 <- g2 + facet_grid(. ~ gen2, margins = TRUE) print(g3) levels(MPLS.LS$eth2) <- c("Non-White", "White") g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read, group = subid)) + geom_line() g2 <- g1 + stat_summary(fun.y = mean, geom = "line", lwd = 2, aes(group = 1)) g3 <- g2 + facet_grid(eth2 ~ gen2) + myX + myY print(g3) g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read)) + myY + myX g2 <- g1 + stat_summary(fun.y = mean, aes(line = gen2), geom = "line") g3 <- g2 + stat_summary(fun.y = mean, aes(shape = gen2), geom = "point", size = 3) g4 <- g3 + opts(legend.position = c(.78,.3), legend.background = theme_rect()) g5 <- g4 + scale_linetype(name = "Gender") + scale_shape(name = "Gender", solid = FALSE) g6 <- g5 + facet_grid(. ~ eth2) print(g6) ## For "(all)" panel: MPLS.LSa <- MPLS.LS # Copy data frame. MPLS.LSa$gen2 <- "(all)" # Relabel gen2. plotdata <- rbind(MPLS.LS, MPLS.LSa) # Stack data frames. set.seed(123) # Enables replication of results. x <- rnorm(n = 100, mean = 100, sd = 15) # 100 scores from normal distribution. table(cut_interval(x, n = 4)) table(cut_number(x, n = 4)) with(MPLS.LS[MPLS.LS$grade == 5,], median(att)) with(MPLS.LS[MPLS.LS$grade == 5,], table(att)) with(MPLS.LS[MPLS.LS$grade == 5,], table(cut_number(att, n = 2))) MPLS.LS$att2 <- cut_number(MPLS.LS$att, n = 2) # Create groups. levels(MPLS.LS$att2) <- c("Low Attendance", "High Attendance") # Name levels. g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read, group = subid)) + geom_line() g2 <- g1 + stat_summary(fun.y = mean, geom = "line", lwd = 3, aes(group = 1)) g3 <- g2 + facet_grid(. ~ att2) + myY + myX print(g3) MPLS.LS$att4 <- cut_number(MPLS.LS$att, n = 4) levels(MPLS.LS$att4) <- c("Attend 1", "Attend 2", "Attend 3", "Attend 4") g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read, group = subid)) + geom_line() g2 <- g1 + stat_summary(fun.y = mean, geom = "line", lwd = 3, aes(group = 1)) g3 <- g2 + facet_grid(. ~ att4) + myY + myX print(g3) g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read, group = subid)) + geom_points() g2 <- g1 + stat_summary(fun.y = mean, geom = "line", lwd = 3, aes(group = 1)) g3 <- g2 + scale_y_continuous(limits = c(200, 220)) g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read, group = subid)) + geom_points() g2 <- g1 + stat_summary(fun.y = mean, geom = "line", lwd = 3, aes(group = 1)) g3 <- g2 + coord_cartesian(ylim = c(200, 220)) g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read)) g2 <- g1 + stat_summary(fun.y = mean, geom = "line", lwd = 3, aes(colour = gen)) g3 <- g2 + scale_colour_brewer(palette = 3) g1 <- ggplot(data = MPLS.LS, aes(x = grade, y = read, shape = risk2)) + geom_point() g2 <- g1 + stat_summary(fun.y = mean, aes(line = risk2), geom = "line") + myX + myY #################################################################################### g3 <- g2 + scale_linetype_manual(name = "Risk Group", values = c(1, 2), breaks = c("ADV", "DADV"), labels = c("Advantaged", "Disadvantaged")) g4 <- g3 + scale_shape_manual(name = "Risk Group", values = c(1, 19), breaks = c("ADV", "DADV"), labels = c("Advantaged", "Disadvantaged")) #################################################################################### g5 <- g4 + opts(legend.position = c(.7,.22), legend.background = theme_rect()) print(g5)