R version 2.9.2 (2009-08-24) Copyright (C) 2009 The R Foundation for Statistical Computing ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # import basic r functions including stuff i've written > source("~/r/my.r") > source("~/r/useful.r") > > # import car which contains linear.hypothesis and the plotting library > library(car) > library(xtable) > library(ggplot2) > > # set the theme > theme_set(theme_bw()) > > # import the dataset > students <- read.table("CAREER_ACADS.txt") > > # set the column names > names(students) <- c("id", "earnings", "careeracad", "female", + "hisp", "black", "white", "math", "office", + "sc02", "sc03", "sc04", "sc05", "sc06", "sc07", + "sc08", "sc09", "sc10", "sc11", "sc12", "sc13", + "sc14", "sc15", "sc16", "sc17", "sc18") > > # the names of the variables for the sc > sc.vars <- colnames(students)[grep('^sc[[:digit:]]{2}$', + colnames(students))] > > # DAM QUESTION 1 > > # print out a random subset of cases to check > students[sort(as.integer(sample(rownames(students), 10))),] id earnings careeracad female hisp black white math office sc02 sc03 sc04 4 4 994.03 0 1 0 1 0 40 1 0 0 0 45 45 1626.86 1 0 0 1 0 91 0 1 0 0 186 186 1547.34 1 1 0 1 0 42 0 0 0 0 201 201 2142.47 1 1 0 1 0 8 0 0 0 0 238 238 4517.96 0 1 0 0 1 72 0 0 0 0 315 315 1774.84 0 1 1 0 0 9 0 0 0 0 346 346 2657.33 0 0 1 0 0 12 1 0 0 0 348 348 791.53 1 1 1 0 0 15 0 0 0 0 438 438 575.68 1 1 1 0 0 44 0 0 0 0 445 445 1834.84 0 1 1 0 0 15 0 0 0 0 sc05 sc06 sc07 sc08 sc09 sc10 sc11 sc12 sc13 sc14 sc15 sc16 sc17 sc18 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 45 0 0 0 0 0 0 0 0 0 0 0 0 0 0 186 0 1 0 0 0 0 0 0 0 0 0 0 0 0 201 0 1 0 0 0 0 0 0 0 0 0 0 0 0 238 0 0 0 0 1 0 0 0 0 0 0 0 0 0 315 0 0 0 0 0 0 0 0 1 0 0 0 0 0 346 0 0 0 0 0 0 0 0 0 1 0 0 0 0 348 0 0 0 0 0 0 0 0 0 1 0 0 0 0 438 0 0 0 0 0 0 0 0 0 0 0 0 1 0 445 0 0 0 0 0 0 0 0 0 0 0 0 1 0 > > # transform natural log after adding $500 > students$ln.earnings <- log(students$earnings + 500) > > # generate univariate stats > describe.full(students$ln.earnings) Min. 1st Qu. Median Mean 3rd Qu. Max. 6.215 7.434 7.792 7.713 8.040 9.351 V n mean sd median min max range skew se 1 1 487 7.71 0.57 7.79 6.21 9.35 3.14 -0.56 0.03 0% 25% 50% 75% 100% 6.214608 7.434083 7.792188 8.040234 9.350979 The decimal point is 1 digit(s) to the left of the | 62 | 111111111811344899 64 | 0223511369 66 | 0047880012399 68 | 01556680123578889 70 | 011445668000013456678 72 | 025558888899901122333344566788999 74 | 000122233334455667788888900011112223334444555666788888999 76 | 00001122223333344555666777778888990111222222333444556666677777777788 78 | 00000011111122333455566666666777777777888889999900000011112222222233+19 80 | 0000001111122222333334445556666667777899001123344445556778888999 82 | 000112233346778880113455567889 84 | 00123334456677788912234567889 86 | 01788990136888 88 | 27784 90 | 7 92 | 5 > > # DAM QUESTION 2 > > # create a categorical variable from the three dichotomies for race > students[students$black == 1, "race"] <- 1 > students[students$hisp == 1, "race"] <- 2 > students[students$white == 1, "race"] <- 3 > > # create a new "SC" categorical variable that produces > students[students$sc02 == 1, "sc"] <- 2 > students[students$sc03 == 1, "sc"] <- 3 > students[students$sc04 == 1, "sc"] <- 4 > students[students$sc05 == 1, "sc"] <- 5 > students[students$sc06 == 1, "sc"] <- 6 > students[students$sc07 == 1, "sc"] <- 7 > students[students$sc08 == 1, "sc"] <- 8 > students[students$sc09 == 1, "sc"] <- 9 > students[students$sc10 == 1, "sc"] <- 10 > students[students$sc11 == 1, "sc"] <- 11 > students[students$sc12 == 1, "sc"] <- 12 > students[students$sc13 == 1, "sc"] <- 13 > students[students$sc14 == 1, "sc"] <- 14 > students[students$sc15 == 1, "sc"] <- 15 > students[students$sc16 == 1, "sc"] <- 16 > students[students$sc17 == 1, "sc"] <- 17 > students[students$sc18 == 1, "sc"] <- 18 > students[is.na(students$sc), "sc"] <- 1 > > # create two tables showing the tabulation > table(students$race) 1 2 3 201 247 39 > table(students$sc) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 37 45 26 44 27 24 10 18 9 13 24 25 41 36 24 20 30 34 > > # TODO cleanup and latexify these for submission > > # DAM QUESTION 3 > > # provide a function to run the model > # create model function > run.reg <- function(d, dropped.obs=c()) { + d <- d[!rownames(d) %in% dropped.obs,] + + tmp.m <- lm(ln.earnings ~ sc02 + sc03 + sc04 + sc05 + sc06 + sc07 + sc08 + + sc09 + sc10 + sc11 + sc12 + sc13 + sc14 + sc15 + + sc16 + sc17 + sc18 + female + black + hisp + + math + office + careeracad + careeracad*office, + data=d) + return(tmp.m) + } > > # refit the final model from DAM 1 > m <- run.reg(students) > summary(m) Call: lm(formula = ln.earnings ~ sc02 + sc03 + sc04 + sc05 + sc06 + sc07 + sc08 + sc09 + sc10 + sc11 + sc12 + sc13 + sc14 + sc15 + sc16 + sc17 + sc18 + female + black + hisp + math + office + careeracad + careeracad * office, data = d) Residuals: Min 1Q Median 3Q Max -1.72763 -0.27657 0.06318 0.34029 1.51418 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 7.984132 0.181686 43.945 < 2e-16 *** sc02 -0.050279 0.119511 -0.421 0.67416 sc03 -0.032973 0.139740 -0.236 0.81357 sc04 0.065403 0.121350 0.539 0.59017 sc05 -0.197298 0.149717 -1.318 0.18822 sc06 -0.064877 0.160985 -0.403 0.68713 sc07 -0.055034 0.209078 -0.263 0.79250 sc08 -0.428153 0.156844 -2.730 0.00658 ** sc09 0.095136 0.228841 0.416 0.67780 sc10 0.043460 0.207491 0.209 0.83419 sc11 -0.395744 0.176346 -2.244 0.02530 * sc12 -0.185573 0.162935 -1.139 0.25532 sc13 -0.104051 0.165915 -0.627 0.53088 sc14 -0.147089 0.170201 -0.864 0.38792 sc15 -0.126564 0.181940 -0.696 0.48701 sc16 0.068455 0.189033 0.362 0.71742 sc17 -0.002024 0.173056 -0.012 0.99067 sc18 -0.132753 0.169722 -0.782 0.43451 female -0.157611 0.052159 -3.022 0.00265 ** black -0.328443 0.137706 -2.385 0.01748 * hisp -0.152257 0.100218 -1.519 0.12938 math 0.003794 0.001200 3.162 0.00167 ** office -0.462190 0.097346 -4.748 2.75e-06 *** careeracad 0.039827 0.054778 0.727 0.46755 office:careeracad 0.420557 0.132884 3.165 0.00165 ** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 0.5369 on 462 degrees of freedom Multiple R-squared: 0.1598, Adjusted R-squared: 0.1161 F-statistic: 3.66 on 24 and 462 DF, p-value: 2.602e-08 > > # make sure the model has 487 valued fitted > dim(m$model)[1] [1] 487 > > # estimate normal influence statistics > students$hat.resids <- hatvalues(m) > students$press.resids <- residuals(m)/(1-hatvalues(m)) > students$cook.resids <- cooks.distance(m) > > # DAM QUESTION 4 > > # (a) shapiro wilks > shapiro.test(students$press.resids) Shapiro-Wilk normality test data: students$press.resids W = 0.9753, p-value = 2.518e-07 > > # (b) histogram of unstandardized press resids > pdf("dam2_writeup/figures/q4_hist_press_resids.pdf", width = 6, height = 6) > plot.1 <- qplot(press.resids, data=students, geom="histogram") > plot.1 <- plot.1 + xlab("press residuals") > print(plot.1) > dev.off() pdf 2 > > # (c) plot qq normal probability plot > pdf("dam2_writeup/figures/q4_qq_plot.pdf", width = 6, height = 6) > plot.2 <- qplot(sample=students$press.resids, stat="qq") + + geom_abline(slope=gen.qq.points(students$press.resids)["slope"], + intercept=gen.qq.points(students$press.resids)["int"], + colour="blue") > print(plot.2) > dev.off() pdf 2 > > # DAM QUESTION 5 > students$predicted <- predict(m) > > # render the plot along with lines showing the two standard deviation > # mark > pdf("dam2_writeup/figures/q5_press_by_predicted.pdf", width = 9.5, height = 6) > plot.3 <- qplot(predicted, press.resids, data=students) + + geom_hline(y=sd(students$ln.earnings)*c(2,-2), colour="red") > plot.3 <- plot.3 + ylab("Raw PRESS residuals") + xlab("Predicted LN.EARNINGS") > print(plot.3) > dev.off() pdf 2 > > # DAM QUESTION 6 > > # create studentized resids > students$std.press.resids <- students$press.resids / sd(students$ln.earnings) > > # univariate stats on each of the three variables > describe.full(students$hat.resids) Min. 1st Qu. Median Mean 3rd Qu. Max. 0.02576 0.03381 0.04831 0.05133 0.06114 0.16030 V n mean sd median min max range skew se 1 1 487 0.05 0.02 0.05 0.03 0.16 0.13 1.6 0 0% 25% 50% 75% 100% 0.02576403 0.03381038 0.04831061 0.06113535 0.16027615 The decimal point is 2 digit(s) to the left of the | 2 | 66666666666777788888888888888999999999 3 | 00000000000000000000000111111111111111111111122222222222222333333333+58 4 | 00000000001111222222223333344444445555555566666666667777777778888888+1 5 | 00000000011111111111111222222222222222223333333333334444444445555555+15 6 | 0000000001111111122222222222333344444445555566666666777778888 7 | 001111122222334555667777889 8 | 000011223333444445556779 9 | 26 10 | 2677 11 | 13579 12 | 0013334 13 | 019 14 | 5 15 | 16 | 0 > describe.full(students$press.resids) Min. 1st Qu. Median Mean 3rd Qu. Max. -1.826e+00 -2.925e-01 6.882e-02 -1.578e-05 3.575e-01 1.562e+00 V n mean sd median min max range skew se 1 1 487 0 0.55 0.07 -1.83 1.56 3.39 -0.58 0.03 0% 25% 50% 75% 100% -1.82635488 -0.29245318 0.06881915 0.35751308 1.56235217 The decimal point is 1 digit(s) to the left of the | -18 | 3 -16 | 274 -14 | 70861 -12 | 933217210 -10 | 74308887433 -8 | 7664444399642110 -6 | 98887533210088776432211 -4 | 9665431111000999988775554422222221000 -2 | 987766544444420009888777765554433311100 -0 | 99999988888777554433322211111100000999988777766665555554444333222211 0 | 00112222222333344445555556677889999900111222233333444445666677777777 2 | 00000000111122233333444555566666677888889999000011111122233344444555+1 4 | 0000001111223333444555667778888889990013345555566667779 6 | 0011222334566667777881223378 8 | 0134600233555899 10 | 012636778 12 | 1 14 | 6 > describe.full(students$cook.resids) Min. 1st Qu. Median Mean 3rd Qu. Max. 9.474e-08 1.627e-04 7.446e-04 2.313e-03 2.616e-03 5.502e-02 V n mean sd median min max range skew se 1 1 487 0 0 0 0 0.06 0.06 5.31 0 0% 25% 50% 75% 100% 9.474196e-08 1.627355e-04 7.445982e-04 2.616373e-03 5.501629e-02 The decimal point is 2 digit(s) to the left of the | 0 | 00000000000000000000000000000000000000000000000000000000000000000000+333 0 | 5555555555555566666666666666777777778888999999 1 | 0000001122223333444 1 | 568 2 | 02 2 | 568 3 | 3 | 4 | 4 | 5 | 5 | 5 > > # list the 30 high and 30 low on each set of resids > extremes.hat <-rbind(students[sort.list(students$hat.resids)[0:30],], + students[rev(sort.list(students$hat.resids))[0:30],]) > extremes.press <- rbind(students[sort.list(students$press.resids)[0:30],], + students[rev(sort.list(students$press.resids))[0:30],]) > extremes.cook <- rbind(students[sort.list(students$cook.resid)[0:30],], + students[rev(sort.list(students$cook.resids))[0:30],]) > > #output the three versions dropping the sc variables > extremes.hat[,!colnames(extremes.hat) %in% sc.vars] id earnings careeracad female hisp black white math office ln.earnings 133 133 2158.76 1 1 0 1 0 30 0 7.885615 110 110 1658.73 1 1 0 1 0 34 0 7.677275 38 38 2574.78 1 1 0 1 0 48 0 8.030989 69 69 2571.82 1 1 0 1 0 48 0 8.030025 72 72 1813.45 1 1 0 1 0 48 0 7.746495 117 117 1500.40 1 1 0 1 0 24 0 7.601102 46 46 2095.56 1 1 0 1 0 55 0 7.861558 76 76 1954.22 1 1 0 1 0 55 0 7.805564 68 68 2580.49 1 1 0 1 0 38 0 8.032844 71 71 1865.12 1 1 0 1 0 38 0 7.768584 109 109 3113.84 1 1 0 1 0 20 0 8.192526 148 148 1730.05 1 1 0 1 0 17 0 7.709779 55 55 34.75 1 1 0 1 0 61 0 6.281799 116 116 2205.38 1 1 0 1 0 48 0 7.902998 137 137 1674.38 1 1 0 1 0 48 0 7.684499 119 119 2216.25 1 1 0 1 0 13 0 7.907008 123 123 392.43 1 1 0 1 0 51 0 6.793948 48 48 1968.34 1 1 0 1 0 65 0 7.811301 44 44 604.97 0 1 0 1 0 41 0 7.007573 80 80 1595.98 0 1 0 1 0 41 0 7.647777 58 58 1928.50 1 1 0 1 0 27 0 7.795029 136 136 2195.21 1 1 0 1 0 10 0 7.899231 42 42 909.33 0 1 0 1 0 55 0 7.250870 70 70 331.16 0 1 0 1 0 55 0 6.722822 311 311 1005.52 1 1 1 0 0 24 0 7.316894 317 317 2407.63 1 1 1 0 0 24 0 7.975094 324 324 2330.57 1 1 1 0 0 24 0 7.948233 339 339 525.82 1 1 1 0 0 24 0 6.933248 318 318 1984.24 1 1 1 0 0 18 0 7.817722 122 122 2485.27 0 1 0 1 0 38 0 8.001445 232 232 0.00 0 1 0 0 1 20 1 6.214608 210 210 117.49 0 1 0 1 0 43 1 6.425663 238 238 4517.96 0 1 0 0 1 72 0 8.520779 206 206 1411.43 0 0 0 0 1 74 0 7.555607 234 234 5354.68 1 0 1 0 0 77 0 8.674997 207 207 3540.76 0 1 0 0 1 39 0 8.304188 209 209 1733.07 0 1 0 1 0 10 0 7.711133 204 204 1036.17 0 1 0 1 0 13 0 7.337048 236 236 2566.01 0 0 1 0 0 10 0 8.028132 240 240 2238.59 1 1 1 0 0 15 0 7.915198 205 205 3736.49 1 1 1 0 0 72 0 8.351490 237 237 1934.55 0 1 1 0 0 65 0 7.797517 235 235 8212.65 0 0 1 0 0 33 0 9.072531 239 239 1379.52 0 1 1 0 0 13 0 7.538772 233 233 2096.85 0 1 1 0 0 39 0 7.862054 211 211 2606.43 1 1 1 0 0 23 0 8.041229 208 208 1761.08 0 1 1 0 0 2 0 7.723598 213 213 2396.39 0 1 1 0 0 20 0 7.971220 212 212 1396.29 0 1 1 0 0 27 0 7.547655 248 248 102.45 0 0 1 0 0 46 1 6.401005 158 158 1482.06 0 0 0 0 1 16 1 7.591892 414 414 2473.72 1 0 0 0 1 4 1 7.997569 330 330 3110.32 0 1 0 1 0 51 1 8.191552 345 345 1493.82 1 0 0 1 0 34 1 7.597808 215 215 1873.23 1 1 0 1 0 51 1 7.772007 250 250 1787.16 0 0 1 0 0 6 0 7.735066 221 221 753.66 0 1 0 1 0 39 1 7.133823 43 43 6300.01 0 0 0 0 1 84 0 8.824679 251 251 2541.89 1 1 1 0 0 57 0 8.020234 249 249 6598.71 1 1 1 0 0 57 0 8.867668 race sc hat.resids press.resids cook.resids predicted std.press.resids 133 1 4 0.02576403 0.17293964 1.069211e-04 7.717131 0.30282430 110 1 4 0.02576880 -0.05648790 1.140947e-05 7.732308 -0.09891259 38 1 2 0.02596608 0.37087527 4.955901e-04 7.669744 0.64941760 69 1 2 0.02596608 0.36988646 4.929510e-04 7.669744 0.64768615 72 1 2 0.02596608 0.07879773 2.237146e-05 7.669744 0.13797801 117 1 4 0.02605659 -0.09575904 3.315414e-05 7.694366 -0.16767796 46 1 2 0.02629289 0.16971748 1.050877e-04 7.696302 0.29718218 76 1 2 0.02629289 0.11221220 4.593874e-05 7.696302 0.19648810 68 1 2 0.02634842 0.41189450 6.202792e-04 7.631802 0.72124392 71 1 2 0.02634842 0.14048333 7.215473e-05 7.631802 0.24599199 109 1 4 0.02645146 0.52728382 1.020467e-03 7.679190 0.92329529 148 1 4 0.02685251 0.04313002 6.931142e-06 7.667807 0.07552242 55 1 2 0.02696266 -1.47709429 8.162812e-03 7.719067 -2.58645182 116 1 4 0.02704436 0.12084024 5.479747e-05 7.785425 0.21159615 137 1 4 0.02704436 -0.10373199 4.037967e-05 7.785425 -0.18163891 119 1 4 0.02752711 0.26157711 2.613489e-04 7.652631 0.45803209 123 1 4 0.02757247 -1.03129513 4.069135e-03 7.796808 -1.80583948 48 1 2 0.02760900 0.07924526 2.405790e-05 7.734244 0.13876165 44 1 2 0.02775979 -0.61279485 1.446462e-03 7.603357 -1.07302856 80 1 2 0.02775979 0.04568752 8.040293e-06 7.603357 0.08000069 58 1 2 0.02792296 0.21084983 1.722537e-04 7.590067 0.36920657 136 1 4 0.02813797 0.26545219 2.751225e-04 7.641248 0.46481753 42 1 2 0.02815284 -0.41735513 6.804471e-04 7.656475 -0.73080571 70 1 2 0.02815284 -0.96069919 3.605435e-03 7.656475 -1.68222313 311 2 13 0.02844965 -0.39545643 6.173547e-04 7.701099 -0.69246021 317 2 13 0.02844965 0.28201742 3.139709e-04 7.701099 0.49382391 324 2 13 0.02844965 0.25437067 2.554298e-04 7.701099 0.44541333 339 2 13 0.02844965 -0.79033670 2.465822e-03 7.701099 -1.38391152 318 2 13 0.02848802 0.14347468 8.137169e-05 7.678335 0.25122997 122 1 4 0.02851136 0.30241082 3.618039e-04 7.707657 0.52953357 232 3 9 0.16027615 -1.57282690 5.501629e-02 7.535348 -2.75408350 210 1 7 0.14457594 -0.83974418 1.414655e-02 7.144000 -1.47042602 238 3 9 0.13867723 0.37842388 2.755642e-03 8.194834 0.66263552 206 3 7 0.13055824 -0.75250173 1.025841e-02 8.209863 -1.31766096 234 2 9 0.13029654 0.47833577 4.136749e-03 8.258986 0.83758527 207 3 7 0.12383745 0.43910868 3.313268e-03 7.919457 0.76889706 209 1 7 0.12315083 0.26247182 1.177235e-03 7.480984 0.45959877 204 1 7 0.12277142 -0.17705672 5.340509e-04 7.492367 -0.31003347 236 2 9 0.12272951 0.07201896 8.832899e-05 7.964952 0.12610811 240 2 9 0.12089811 0.05580642 5.224544e-05 7.866139 0.09771930 205 2 7 0.12009342 0.47647764 3.783248e-03 7.932235 0.83433162 237 2 9 0.11955384 -0.24817067 1.021704e-03 8.016018 -0.43455688 235 2 9 0.11903109 1.15817259 2.215479e-02 8.052217 2.02800704 239 2 9 0.11679322 -0.31697177 1.628244e-03 7.818723 -0.55503038 233 2 9 0.11479661 -0.06248990 6.220271e-05 7.917371 -0.10942234 211 2 7 0.11285340 0.33242227 1.730435e-03 7.746322 0.58208483 208 2 7 0.11099305 0.10886283 1.825222e-04 7.626818 0.19062322 213 2 7 0.10738303 0.30932418 1.425686e-03 7.695112 0.54163915 212 2 7 0.10685334 -0.19483551 5.628399e-04 7.721671 -0.34116486 248 2 10 0.10562834 -1.32681940 2.580267e-02 7.587674 -2.32331442 158 3 5 0.10218668 0.23005039 7.504138e-04 7.385350 0.40282754 414 3 16 0.09558544 -0.07561555 7.583585e-05 8.065957 -0.13240589 330 1 13 0.09185219 1.17405469 1.756818e-02 7.125336 2.05581724 345 1 14 0.08893257 -0.04169369 2.145171e-05 7.635793 -0.07300733 215 1 8 0.08726765 0.55918849 3.786433e-03 7.261618 0.97916166 250 2 10 0.08687453 -0.17854439 3.842779e-04 7.898100 -0.31263844 221 1 8 0.08639526 0.41387516 2.053474e-03 6.755704 0.72471214 43 3 2 0.08464998 0.62502855 4.588673e-03 8.252559 1.09445026 251 2 10 0.08452807 0.05070389 3.015394e-05 7.973816 0.08878456 249 2 10 0.08452807 0.97638388 1.118157e-02 7.973816 1.70968765 > extremes.press[,!colnames(extremes.press) %in% sc.vars] id earnings careeracad female hisp black white math office ln.earnings 21 21 0.00 1 0 0 1 0 76 1 6.214608 258 258 0.00 1 1 0 0 1 92 0 6.214608 338 338 0.00 1 0 1 0 0 18 0 6.214608 183 183 97.06 1 0 1 0 0 37 0 6.392018 232 232 0.00 0 1 0 0 1 20 1 6.214608 403 403 0.00 1 1 1 0 0 15 0 6.214608 55 55 34.75 1 1 0 1 0 61 0 6.281799 319 319 0.00 1 1 1 0 0 6 0 6.214608 104 104 112.40 1 0 0 1 0 27 0 6.417386 2 2 0.00 0 1 0 1 0 14 0 6.214608 427 427 173.09 0 1 1 0 0 32 0 6.511879 248 248 102.45 0 0 1 0 0 46 1 6.401005 422 422 299.05 1 1 1 0 0 38 0 6.683424 161 161 58.88 1 1 1 0 0 13 0 6.325935 120 120 310.02 1 1 0 1 0 82 0 6.697059 454 454 133.28 1 1 1 0 0 13 0 6.450913 366 366 112.07 1 1 1 0 0 6 0 6.416847 179 179 96.04 0 0 0 1 0 15 0 6.390308 257 257 49.38 1 1 0 1 0 63 0 6.308790 228 228 0.00 1 0 0 1 0 10 1 6.214608 436 436 0.00 0 1 1 0 0 18 1 6.214608 362 362 172.73 1 1 1 0 0 1 0 6.511344 352 352 443.26 0 0 1 0 0 54 0 6.849342 163 163 67.38 0 1 0 1 0 16 0 6.341029 176 176 234.75 1 1 1 0 0 27 0 6.599530 296 296 295.86 1 1 1 0 0 43 0 6.679423 302 302 204.13 0 1 1 0 0 11 0 6.556963 245 245 647.42 1 0 1 0 0 20 0 7.045271 123 123 392.43 1 1 0 1 0 51 0 6.793948 59 59 90.48 0 1 0 1 0 82 1 6.380936 141 141 11010.09 1 0 0 1 0 20 0 9.350979 482 482 4128.71 0 0 1 0 0 18 1 8.440033 3 3 6020.09 1 1 0 1 0 28 0 8.782643 330 330 3110.32 0 1 0 1 0 51 1 8.191552 273 273 4846.32 1 1 1 0 0 40 0 8.584164 235 235 8212.65 0 0 1 0 0 33 0 9.072531 275 275 6030.95 1 0 0 0 1 29 0 8.784308 425 425 5394.50 0 0 0 0 1 47 1 8.681775 280 280 7159.54 1 0 1 0 0 92 1 8.943707 260 260 5842.34 1 0 0 0 1 49 0 8.755003 164 164 4823.12 1 1 0 1 0 78 0 8.579815 168 168 4992.02 0 1 1 0 0 52 0 8.611051 87 87 4231.27 1 1 0 1 0 4 0 8.461949 249 249 6598.71 1 1 1 0 0 57 0 8.867668 487 487 6002.78 1 0 1 0 0 32 0 8.779985 39 39 3297.58 0 0 0 1 0 51 1 8.242119 294 294 4503.95 0 1 1 0 0 34 0 8.517983 476 476 4944.08 1 1 1 0 0 32 0 8.602284 331 331 5592.27 1 0 1 0 0 12 0 8.714776 428 428 6626.98 0 1 1 0 0 88 0 8.871643 157 157 4721.92 0 1 1 0 0 61 0 8.560620 323 323 5407.29 1 0 1 0 0 12 0 8.683942 170 170 4174.27 1 1 1 0 0 30 0 8.449828 346 346 2657.33 0 0 1 0 0 12 1 8.057482 429 429 5443.73 1 0 1 0 0 7 0 8.690092 47 47 2352.50 0 1 0 1 0 51 1 7.955951 243 243 5667.45 0 0 1 0 0 32 0 8.727041 263 263 4378.59 0 1 0 0 1 89 0 8.492612 268 268 4788.24 1 0 0 0 1 60 0 8.573241 185 185 3630.82 0 1 1 0 0 7 0 8.326231 race sc hat.resids press.resids cook.resids predicted std.press.resids 21 1 1 0.05405701 -1.8263549 0.025019749 7.942236 -3.198021 258 3 11 0.06810324 -1.7223535 0.028033218 7.819664 -3.015911 338 2 13 0.03073212 -1.6727449 0.011931978 7.835946 -2.929044 183 2 6 0.05372747 -1.6434907 0.020136845 7.947208 -2.877819 232 3 9 0.16027615 -1.5728269 0.055016292 7.535348 -2.754084 403 2 15 0.04652682 -1.4996029 0.014518323 7.644439 -2.625865 55 1 2 0.02696266 -1.4770943 0.008162812 7.719067 -2.586452 319 2 13 0.02964378 -1.4615220 0.008786276 7.632805 -2.559184 104 1 3 0.04373518 -1.4092312 0.012051914 7.764984 -2.467621 2 1 1 0.03715193 -1.3881595 0.009933920 7.551195 -2.430723 427 2 17 0.03858077 -1.3332087 0.009515415 7.793652 -2.334502 248 2 10 0.10562834 -1.3268194 0.025802670 7.587674 -2.323314 422 2 16 0.05618148 -1.3173080 0.013527839 7.926723 -2.306660 161 2 5 0.05495458 -1.3122993 0.013131983 7.566117 -2.297889 120 1 4 0.03829473 -1.2658421 0.008514490 7.914426 -2.216541 454 2 18 0.03654338 -1.2244964 0.007602988 7.630662 -2.144143 366 2 14 0.03256712 -1.2124049 0.006642556 7.589767 -2.122970 179 1 5 0.06324703 -1.2009516 0.012657610 7.515303 -2.102915 257 1 11 0.08356187 -1.1701830 0.015877287 7.381191 -2.049038 228 1 8 0.07824700 -1.1381157 0.014063748 7.263670 -1.992887 436 2 17 0.06257731 -1.1347443 0.011180815 7.278343 -1.986983 362 2 14 0.03362095 -1.0963114 0.005607100 7.570796 -1.919686 352 2 14 0.04085973 -1.0846456 0.006670092 7.889669 -1.899258 163 1 5 0.05715694 -1.0823183 0.009290509 7.361486 -1.895183 176 2 5 0.05168297 -1.0752783 0.008291820 7.619235 -1.882856 296 2 12 0.05207311 -1.0678485 0.008239359 7.691666 -1.869846 302 2 12 0.06107704 -1.0367869 0.009109983 7.530426 -1.815456 245 2 10 0.08429527 -1.0328368 0.012477489 7.991045 -1.808539 123 1 4 0.02757247 -1.0312951 0.004069135 7.796808 -1.805839 59 1 2 0.05143493 -0.9654482 0.006652374 7.296726 -1.690539 141 1 4 0.03083424 1.5623522 0.010443635 7.836801 2.735742 482 2 18 0.06208995 1.2099322 0.012612580 7.305226 2.118640 3 1 1 0.03263183 1.1769081 0.006271727 7.644140 2.060814 330 1 13 0.09185219 1.1740547 0.017568178 7.125336 2.055817 273 2 11 0.05086985 1.1737604 0.009724784 7.470112 2.055302 235 2 9 0.11903109 1.1581726 0.022154785 8.052217 2.028007 275 3 11 0.07205024 1.1272840 0.012704648 7.738245 1.973920 425 3 17 0.07580609 1.0642075 0.011912895 7.698241 1.863470 280 2 12 0.06741621 1.0188380 0.009710357 7.993555 1.784027 260 3 11 0.06471883 1.0059816 0.009088063 7.814127 1.761514 164 1 5 0.06096411 1.0045046 0.008535690 7.636549 1.758928 168 2 5 0.05124029 0.9873843 0.006931776 7.674261 1.728950 87 1 3 0.04477948 0.9859936 0.006040708 7.520108 1.726515 249 2 10 0.08452807 0.9763839 0.011181569 7.973816 1.709688 487 2 18 0.03449104 0.9524753 0.004341845 7.860362 1.667823 39 1 2 0.04838542 0.9514355 0.006077623 7.336719 1.666002 294 2 12 0.05249794 0.9501740 0.006576716 7.617691 1.663793 476 2 18 0.03437250 0.9315536 0.004138923 7.702750 1.631188 331 2 13 0.03136445 0.9307885 0.003770512 7.813181 1.629848 428 2 17 0.05439755 0.9153107 0.006323784 8.006123 1.602746 157 2 5 0.05256632 0.8994954 0.005901550 7.708408 1.575053 323 2 13 0.03136445 0.8989566 0.003517027 7.813181 1.574109 170 2 5 0.05123668 0.8634512 0.005300504 7.630617 1.511938 346 2 14 0.05627804 0.8364293 0.005463338 7.268125 1.464622 429 2 17 0.04734009 0.8333040 0.004561383 7.896237 1.459149 47 1 2 0.04549593 0.8138708 0.004181615 7.179108 1.425121 243 2 10 0.08333154 0.7966825 0.007339062 7.996747 1.395023 263 3 11 0.07020657 0.7788370 0.005909237 7.768454 1.363775 268 3 11 0.06239001 0.7651133 0.005067890 7.855863 1.339744 185 2 6 0.05462342 0.7301705 0.004040992 7.635945 1.278558 > extremes.cook[,!colnames(extremes.cook) %in% sc.vars] id earnings careeracad female hisp black white math office ln.earnings 52 52 1322.20 0 1 0 1 0 17 0 7.507800 328 328 1241.54 0 0 1 0 0 51 1 7.462525 97 97 1748.85 0 0 0 1 0 24 0 7.718174 107 107 1450.63 1 1 0 1 0 20 0 7.575908 61 61 1848.37 1 1 0 1 0 74 0 7.761477 274 274 1591.68 0 0 1 0 0 57 0 7.645723 114 114 1498.68 1 1 0 1 0 2 0 7.600242 480 480 2336.68 1 0 1 0 0 53 0 7.950390 186 186 1547.34 1 1 0 1 0 42 0 7.624297 336 336 1838.59 0 0 1 0 0 4 0 7.757303 326 326 2194.03 0 0 1 0 0 41 0 7.898793 60 60 1678.89 0 1 0 1 0 68 0 7.686571 85 85 1757.54 1 0 0 1 0 20 0 7.722031 129 129 2375.63 1 0 0 1 0 48 0 7.964027 443 443 2246.14 0 0 1 0 0 28 0 7.917952 321 321 1572.00 1 1 1 0 0 1 0 7.636270 485 485 2176.41 1 0 1 0 0 46 0 7.892232 407 407 2576.04 0 0 1 0 0 30 0 8.031398 171 171 1270.21 0 1 0 1 0 42 0 7.478853 402 402 1526.49 0 1 1 0 0 12 0 7.614061 474 474 2798.23 1 0 0 0 1 50 0 8.101141 138 138 2311.77 1 0 0 1 0 55 0 7.941569 445 445 1834.84 0 1 1 0 0 15 0 7.755699 88 88 1569.21 1 1 0 1 0 27 0 7.634922 190 190 1621.58 0 1 1 0 0 7 0 7.659916 380 380 1931.22 0 0 1 0 0 31 0 7.796148 399 399 1782.04 0 0 1 0 0 15 0 7.732825 369 369 1838.50 1 0 1 0 0 18 0 7.757265 148 148 1730.05 1 1 0 1 0 17 0 7.709779 231 231 958.97 1 1 0 1 0 39 0 7.285486 232 232 0.00 0 1 0 0 1 20 1 6.214608 258 258 0.00 1 1 0 0 1 92 0 6.214608 248 248 102.45 0 0 1 0 0 46 1 6.401005 21 21 0.00 1 0 0 1 0 76 1 6.214608 235 235 8212.65 0 0 1 0 0 33 0 9.072531 183 183 97.06 1 0 1 0 0 37 0 6.392018 330 330 3110.32 0 1 0 1 0 51 1 8.191552 257 257 49.38 1 1 0 1 0 63 0 6.308790 403 403 0.00 1 1 1 0 0 15 0 6.214608 210 210 117.49 0 1 0 1 0 43 1 6.425663 228 228 0.00 1 0 0 1 0 10 1 6.214608 422 422 299.05 1 1 1 0 0 38 0 6.683424 161 161 58.88 1 1 1 0 0 13 0 6.325935 275 275 6030.95 1 0 0 0 1 29 0 8.784308 179 179 96.04 0 0 0 1 0 15 0 6.390308 482 482 4128.71 0 0 1 0 0 18 1 8.440033 245 245 647.42 1 0 1 0 0 20 0 7.045271 104 104 112.40 1 0 0 1 0 27 0 6.417386 338 338 0.00 1 0 1 0 0 18 0 6.214608 425 425 5394.50 0 0 0 0 1 47 1 8.681775 249 249 6598.71 1 1 1 0 0 57 0 8.867668 436 436 0.00 0 1 1 0 0 18 1 6.214608 141 141 11010.09 1 0 0 1 0 20 0 9.350979 206 206 1411.43 0 0 0 0 1 74 0 7.555607 2 2 0.00 0 1 0 1 0 14 0 6.214608 273 273 4846.32 1 1 1 0 0 40 0 8.584164 280 280 7159.54 1 0 1 0 0 92 1 8.943707 427 427 173.09 0 1 1 0 0 32 0 6.511879 163 163 67.38 0 1 0 1 0 16 0 6.341029 302 302 204.13 0 1 1 0 0 11 0 6.556963 race sc hat.resids press.resids cook.resids predicted std.press.resids 52 1 2 0.03164183 -0.004645261 9.474196e-08 7.512298 -0.008134039 328 2 13 0.05430270 0.003585286 9.685671e-08 7.459134 0.006277981 97 1 3 0.04601777 0.004612377 1.358425e-07 7.713774 0.008076459 107 1 3 0.04255240 -0.005124037 1.550277e-07 7.580814 -0.008972396 61 1 2 0.02964771 -0.007125443 2.088697e-07 7.768391 -0.012476939 274 2 11 0.05231643 -0.007042083 3.599987e-07 7.652397 -0.012330972 114 1 4 0.03020653 -0.010985025 5.057823e-07 7.610895 -0.019235223 480 2 18 0.03546779 0.010731898 5.668234e-07 7.940038 0.018791988 186 1 6 0.06220807 -0.008619954 6.413826e-07 7.632380 -0.015093887 336 2 13 0.03351691 0.014798688 1.018523e-06 7.743001 0.025913100 326 2 13 0.03299513 0.015935667 1.162655e-06 7.883384 0.027903997 60 1 2 0.03027123 -0.019828166 1.651413e-06 7.705799 -0.034719920 85 1 3 0.04417861 -0.017151698 1.803378e-06 7.738425 -0.030033317 129 1 4 0.03033377 0.021646933 1.972331e-06 7.943037 0.037904655 443 2 17 0.04210385 -0.018931800 2.093950e-06 7.936086 -0.033150347 321 2 13 0.03054995 0.023142137 2.270273e-06 7.613834 0.040522817 485 2 18 0.03465265 -0.022010556 2.329480e-06 7.913479 -0.038541374 407 2 16 0.06034142 0.018351584 2.819832e-06 8.014154 0.032134366 171 1 5 0.05564053 0.019823472 3.033970e-06 7.460133 0.034711701 402 2 15 0.04908608 0.021906292 3.268562e-06 7.593230 0.038358804 474 3 18 0.05697663 0.021450854 3.637864e-06 8.080913 0.037561312 138 1 4 0.03143254 -0.028935743 3.651820e-06 7.969596 -0.050667656 445 2 17 0.03997860 0.027652895 4.241988e-06 7.729151 0.048421337 88 1 3 0.04238232 0.028768873 4.867333e-06 7.607373 0.050375460 190 2 6 0.05462342 0.025356286 4.873161e-06 7.635945 0.044399883 380 2 15 0.04832981 -0.028140866 5.310685e-06 7.822929 -0.049275793 399 2 15 0.04951858 -0.030929734 6.573264e-06 7.762223 -0.054159214 369 2 14 0.03521676 -0.036943988 6.669565e-06 7.792908 -0.064690417 148 1 4 0.02685251 0.043130024 6.931142e-06 7.667807 0.075522416 231 1 8 0.06218052 0.029604751 7.562019e-06 7.257722 0.051839116 232 3 9 0.16027615 -1.572826901 5.501629e-02 7.535348 -2.754083503 258 3 11 0.06810324 -1.722353533 2.803322e-02 7.819664 -3.015910680 248 2 10 0.10562834 -1.326819399 2.580267e-02 7.587674 -2.323314418 21 1 1 0.05405701 -1.826354879 2.501975e-02 7.942236 -3.198021243 235 2 9 0.11903109 1.158172590 2.215479e-02 8.052217 2.028007036 183 2 6 0.05372747 -1.643490690 2.013684e-02 7.947208 -2.877818653 330 1 13 0.09185219 1.174054693 1.756818e-02 7.125336 2.055817240 257 1 11 0.08356187 -1.170182997 1.587729e-02 7.381191 -2.049037746 403 2 15 0.04652682 -1.499602916 1.451832e-02 7.644439 -2.625865343 210 1 7 0.14457594 -0.839744184 1.414655e-02 7.144000 -1.470426022 228 1 8 0.07824700 -1.138115713 1.406375e-02 7.263670 -1.992886634 422 2 16 0.05618148 -1.317308024 1.352784e-02 7.926723 -2.306659616 161 2 5 0.05495458 -1.312299309 1.313198e-02 7.566117 -2.297889153 275 3 11 0.07205024 1.127283954 1.270465e-02 7.738245 1.973919786 179 1 5 0.06324703 -1.200951576 1.265761e-02 7.515303 -2.102914771 482 2 18 0.06208995 1.209932160 1.261258e-02 7.305226 2.118640136 245 2 10 0.08429527 -1.032836806 1.247749e-02 7.991045 -1.808539010 104 1 3 0.04373518 -1.409231203 1.205191e-02 7.764984 -2.467620820 338 2 13 0.03073212 -1.672744865 1.193198e-02 7.835946 -2.929043897 425 3 17 0.07580609 1.064207515 1.191289e-02 7.698241 1.863470392 249 2 10 0.08452807 0.976383878 1.118157e-02 7.973816 1.709687652 436 2 17 0.06257731 -1.134744340 1.118082e-02 7.278343 -1.986983223 141 1 4 0.03083424 1.562352169 1.044364e-02 7.836801 2.735741823 206 3 7 0.13055824 -0.752501733 1.025841e-02 8.209863 -1.317660962 2 1 1 0.03715193 -1.388159518 9.933920e-03 7.551195 -2.430723448 273 2 11 0.05086985 1.173760411 9.724784e-03 7.470112 2.055301942 280 2 12 0.06741621 1.018838001 9.710357e-03 7.993555 1.784026537 427 2 17 0.03858077 -1.333208668 9.515415e-03 7.793652 -2.334502288 163 1 5 0.05715694 -1.082318301 9.290509e-03 7.361486 -1.895183109 302 2 12 0.06107704 -1.036786921 9.109983e-03 7.530426 -1.815455822 > > # plot the three major things > pdf("dam2_writeup/figures/q6_hat_by_id.pdf", width = 9.5, height = 6) > plot.4 <- qplot(id, hat.resids, data=extremes.hat) > > outliers.hat <- students[c(158,248),] > labels <- rownames(extremes.hat) > labels[!extremes.hat$id %in% outliers.hat$id] <- "" > plot.4 <- plot.4 + geom_point(data=outliers.hat, colour="red") > > outliers.groups <- extremes.hat[extremes.hat$sc == 7 | extremes.hat$sc ==9,] > plot.4 <- plot.4 + geom_point(data=outliers.hat, colour="red") > plot.4 <- plot.4 + geom_point(data=outliers.groups, colour="red") > plot.4 <- plot.4 + geom_text(label=labels, hjust=-.2, size=4) > print(plot.4) > dev.off() pdf 2 > > pdf("dam2_writeup/figures/q6_press_by_id.pdf", width = 9.5, height = 6) > plot.5 <- qplot(id, std.press.resids, data=extremes.press) > plot.5 <- plot.5 + geom_hline(y=c(2.5,-2.5), colour="red") > > labels <- as.character(extremes.press$id) > labels[abs(extremes.press$std.press.resids) < 2.5] <- "" > plot.5 <- plot.5 + geom_text(label=labels, hjust=-.2, size=4) > print(plot.5) > dev.off() pdf 2 > > pdf("dam2_writeup/figures/q6_cook_by_id.pdf", width = 9.5, height = 6) > plot.6 <- qplot(id, cook.resids, data=extremes.cook) > > outliers.cook <- students[c(330, 248, 258, 21, 183, 232, 235),] > labels <- rownames(extremes.cook) > labels[!extremes.cook$id %in% outliers.cook$id] <- "" > plot.6 <- plot.6 + geom_point(data=outliers.cook, colour="red") > plot.6 <- plot.6 + geom_text(label=labels, hjust=-.2, size=4) > print(plot.6) > dev.off() pdf 2 > > # DAM QUESTION 7 > > # select the major outlier (id=232) and print information > major.outlier <- students[rev(sort.list(students$cook.resids))[1],] > major.outlier id earnings careeracad female hisp black white math office sc02 sc03 sc04 232 232 0 0 1 0 0 1 20 1 0 0 0 sc05 sc06 sc07 sc08 sc09 sc10 sc11 sc12 sc13 sc14 sc15 sc16 sc17 sc18 232 0 0 0 0 1 0 0 0 0 0 0 0 0 0 ln.earnings race sc hat.resids press.resids cook.resids predicted 232 6.214608 3 9 0.1602761 -1.572827 0.05501629 7.535348 std.press.resids 232 -2.754084 > > # rerun model > m.decap <- run.reg(students, major.outlier$id) > > # DAM QUESTION 8 > > # remove groups 7 and 9 > students.trimmed <- students[!students$sc %in% c(7,9),] > # refit model > m.trimmed <- run.reg(students.trimmed) > summary(m.trimmed) Call: lm(formula = ln.earnings ~ sc02 + sc03 + sc04 + sc05 + sc06 + sc07 + sc08 + sc09 + sc10 + sc11 + sc12 + sc13 + sc14 + sc15 + sc16 + sc17 + sc18 + female + black + hisp + math + office + careeracad + careeracad * office, data = d) Residuals: Min 1Q Median 3Q Max -1.71629 -0.28185 0.06844 0.32960 1.52546 Coefficients: (2 not defined because of singularities) Estimate Std. Error t value Pr(>|t|) (Intercept) 7.995981 0.187210 42.711 < 2e-16 *** sc02 -0.052689 0.119493 -0.441 0.65947 sc03 -0.035762 0.139805 -0.256 0.79823 sc04 0.064511 0.121381 0.531 0.59536 sc05 -0.180820 0.150917 -1.198 0.23150 sc06 -0.042623 0.162659 -0.262 0.79341 sc07 NA NA NA NA sc08 -0.425708 0.156907 -2.713 0.00692 ** sc09 NA NA NA NA sc10 0.078337 0.210208 0.373 0.70958 sc11 -0.371533 0.179422 -2.071 0.03896 * sc12 -0.162759 0.165016 -0.986 0.32451 sc13 -0.073632 0.169108 -0.435 0.66347 sc14 -0.117565 0.173448 -0.678 0.49824 sc15 -0.091227 0.185105 -0.493 0.62237 sc16 0.094066 0.192211 0.489 0.62481 sc17 0.019565 0.176257 0.111 0.91166 sc18 -0.100317 0.173080 -0.580 0.56248 female -0.145841 0.052900 -2.757 0.00608 ** black -0.347409 0.144714 -2.401 0.01678 * hisp -0.206289 0.105437 -1.957 0.05103 . math 0.003647 0.001229 2.967 0.00317 ** office -0.399901 0.100086 -3.996 7.55e-05 *** careeracad 0.039495 0.055712 0.709 0.47874 office:careeracad 0.365542 0.134965 2.708 0.00702 ** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 0.5367 on 445 degrees of freedom Multiple R-squared: 0.1444, Adjusted R-squared: 0.1021 F-statistic: 3.414 on 22 and 445 DF, p-value: 4.702e-07 > > # DAM QUESTION 9 > # identify a set of atypical groups of points and create a table > > # draw hat resids and take a look at outliers > # 221?, 330, 158, 248, 414, 189?, 410? > > # draw press residuals and then pick out outliers > # results in: > # 141 (one group, high); 21, 183, 258, 338 (low), 55, 319, 403 > > # draw the cook resids and identify potential outliers > # results in: > # 330, 248, 258, 21, 183 > > # raw list of suspicious points > suspicious.obs <- + sort(unique(c(330, 158, 248, 21, 183, 258, + 338, 141, 248, 258, 21, 183, 55, 403, 319))) > > outliers <- students[suspicious.obs,!colnames(students) %in% sc.vars] > > # 141 has ulsa super high earnings > group.1 <- 141 > > # 21, 258, 338, 183, 248 all have decent math scores and, in some cases, > # other predictor of success, but have essential zero earnings. 21, 258, > # and 338 have 0 income and 183 and 248 have under 100 > # they are all completely overpredicted > group.2 <- c(21, 258, 319, 338, 403, 55, 183) > > # group 3 is similar to group.2 but has been > # , but has not been in the career academies > group.3 <- c(248) > > # 330 and 158 are both badly behaviong kids outside of carrer acadmy but > # who are makeing more than the average > group.4 <- c(330, 158) > > # print the table of outliers > xtable(students[c(group.1, group.2, group.3, group.4), + c("id", "earnings","careeracad","female", + "math","office","race","sc")]) % latex table generated in R 2.9.2 by xtable 1.5-5 package % Thu Sep 24 00:23:55 2009 \begin{table}[ht] \begin{center} \begin{tabular}{rrrrrrrrr} \hline & id & earnings & careeracad & female & math & office & race & sc \\ \hline 141 & 141 & 11010.09 & 1 & 0 & 20 & 0 & 1.00 & 4.00 \\ 21 & 21 & 0.00 & 1 & 0 & 76 & 1 & 1.00 & 1.00 \\ 258 & 258 & 0.00 & 1 & 1 & 92 & 0 & 3.00 & 11.00 \\ 319 & 319 & 0.00 & 1 & 1 & 6 & 0 & 2.00 & 13.00 \\ 338 & 338 & 0.00 & 1 & 0 & 18 & 0 & 2.00 & 13.00 \\ 403 & 403 & 0.00 & 1 & 1 & 15 & 0 & 2.00 & 15.00 \\ 55 & 55 & 34.75 & 1 & 1 & 61 & 0 & 1.00 & 2.00 \\ 183 & 183 & 97.06 & 1 & 0 & 37 & 0 & 2.00 & 6.00 \\ 248 & 248 & 102.45 & 0 & 0 & 46 & 1 & 2.00 & 10.00 \\ 330 & 330 & 3110.32 & 0 & 1 & 51 & 1 & 1.00 & 13.00 \\ 158 & 158 & 1482.06 & 0 & 0 & 16 & 1 & 3.00 & 5.00 \\ \hline \end{tabular} \end{center} \end{table} > > # DAM QUESTION 10 > > # generate the new models and see how they line up > m1 <- run.reg(students.trimmed, group.1) > m2 <- run.reg(students.trimmed, group.2) > m3 <- run.reg(students.trimmed, group.3) > m4 <- run.reg(students.trimmed, group.4) > > #output the table > library(apsrtable) > apsrtable(m, m.decap, m.trimmed, m1, m2, m3, stars="default") \begin{table}[!ht] \caption{} \label{} \begin{tabular}{lD{.}{.}{2}D{.}{.}{2}D{.}{.}{2}D{.}{.}{2}D{.}{.}{2}D{.}{.}{2}} \hline & \multicolumn{ 1 }{ c }{ Model 1 } & \multicolumn{ 1 }{ c }{ Model 2 } & \multicolumn{ 1 }{ c }{ Model 3 } & \multicolumn{ 1 }{ c }{ Model 4 } & \multicolumn{ 1 }{ c }{ Model 5 } & \multicolumn{ 1 }{ c }{ Model 6 } \\ \hline % & Model 1 & Model 2 & Model 3 & Model 4 & Model 5 & Model 6 \\ (Intercept) & 7.98 ^{***} & 8.02 ^{***} & 8.00 ^{***} & 7.99 ^{***} & 8.07 ^{***} & 7.99 ^{***} \\ & (0.18) & (0.18) & (0.19) & (0.19) & (0.18) & (0.19) \\ sc02 & -0.05 & -0.05 & -0.05 & -0.05 & -0.07 & -0.05 \\ & (0.12) & (0.12) & (0.12) & (0.12) & (0.11) & (0.12) \\ sc03 & -0.03 & -0.04 & -0.04 & -0.03 & -0.08 & -0.03 \\ & (0.14) & (0.14) & (0.14) & (0.14) & (0.13) & (0.14) \\ sc04 & 0.07 & 0.06 & 0.06 & 0.03 & 0.02 & 0.07 \\ & (0.12) & (0.12) & (0.12) & (0.12) & (0.11) & (0.12) \\ sc05 & -0.20 & -0.20 & -0.18 & -0.18 & -0.25 ^\dagger & -0.18 \\ & (0.15) & (0.15) & (0.15) & (0.15) & (0.14) & (0.15) \\ sc06 & -0.06 & -0.07 & -0.04 & -0.04 & -0.05 & -0.04 \\ & (0.16) & (0.16) & (0.16) & (0.16) & (0.15) & (0.16) \\ sc07 & -0.06 & -0.06 & & & & \\ & (0.21) & (0.21) & & & & \\ sc08 & -0.43 ^{**} & -0.43 ^{**} & -0.43 ^{**} & -0.42 ^{**} & -0.47 ^{**} & -0.42 ^{**} \\ & (0.16) & (0.16) & (0.16) & (0.16) & (0.15) & (0.16) \\ sc09 & 0.10 & 0.26 & & & & \\ & (0.23) & (0.24) & & & & \\ sc10 & 0.04 & 0.04 & 0.08 & 0.08 & 0.01 & 0.19 \\ & (0.21) & (0.21) & (0.21) & (0.21) & (0.20) & (0.21) \\ sc11 & -0.40 ^* & -0.40 ^* & -0.37 ^* & -0.37 ^* & -0.40 ^* & -0.37 ^* \\ & (0.18) & (0.18) & (0.18) & (0.18) & (0.17) & (0.18) \\ sc12 & -0.19 & -0.19 & -0.16 & -0.17 & -0.25 & -0.16 \\ & (0.16) & (0.16) & (0.17) & (0.16) & (0.16) & (0.16) \\ sc13 & -0.10 & -0.11 & -0.07 & -0.07 & -0.07 & -0.07 \\ & (0.17) & (0.16) & (0.17) & (0.17) & (0.16) & (0.17) \\ sc14 & -0.15 & -0.15 & -0.12 & -0.12 & -0.19 & -0.12 \\ & (0.17) & (0.17) & (0.17) & (0.17) & (0.16) & (0.17) \\ sc15 & -0.13 & -0.13 & -0.09 & -0.09 & -0.11 & -0.09 \\ & (0.18) & (0.18) & (0.19) & (0.18) & (0.18) & (0.18) \\ sc16 & 0.07 & 0.06 & 0.09 & 0.10 & 0.00 & 0.10 \\ & (0.19) & (0.19) & (0.19) & (0.19) & (0.18) & (0.19) \\ sc17 & -0.00 & -0.01 & 0.02 & 0.02 & -0.06 & 0.02 \\ & (0.17) & (0.17) & (0.18) & (0.17) & (0.17) & (0.18) \\ sc18 & -0.13 & -0.13 & -0.10 & -0.10 & -0.19 & -0.10 \\ & (0.17) & (0.17) & (0.17) & (0.17) & (0.16) & (0.17) \\ female & -0.16 ^{**} & -0.15 ^{**} & -0.15 ^{**} & -0.13 ^* & -0.14 ^{**} & -0.15 ^{**} \\ & (0.05) & (0.05) & (0.05) & (0.05) & (0.05) & (0.05) \\ black & -0.33 ^* & -0.37 ^{**} & -0.35 ^* & -0.35 ^* & -0.42 ^{**} & -0.34 ^* \\ & (0.14) & (0.14) & (0.14) & (0.14) & (0.14) & (0.14) \\ hisp & -0.15 & -0.20 ^\dagger & -0.21 ^\dagger & -0.21 ^* & -0.24 ^* & -0.20 ^\dagger \\ & (0.10) & (0.10) & (0.11) & (0.10) & (0.10) & (0.10) \\ math & 0.00 ^{**} & 0.00 ^{**} & 0.00 ^{**} & 0.00 ^{**} & 0.00 ^{***} & 0.00 ^{**} \\ & (0.00) & (0.00) & (0.00) & (0.00) & (0.00) & (0.00) \\ office & -0.46 ^{***} & -0.42 ^{***} & -0.40 ^{***} & -0.40 ^{***} & -0.40 ^{***} & -0.36 ^{***} \\ & (0.10) & (0.10) & (0.10) & (0.10) & (0.09) & (0.10) \\ careeracad & 0.04 & 0.05 & 0.04 & 0.03 & 0.08 & 0.04 \\ & (0.05) & (0.05) & (0.06) & (0.06) & (0.05) & (0.06) \\ office:careeracad & 0.42 ^{**} & 0.38 ^{**} & 0.37 ^{**} & 0.38 ^{**} & 0.38 ^{**} & 0.33 ^* \\ & (0.13) & (0.13) & (0.13) & (0.13) & (0.13) & (0.14) \\ $N$ & 487 & 486 & 468 & 467 & 461 & 467 \\ $R^2$ & 0.16 & 0.16 & 0.14 & 0.15 & 0.18 & 0.15 \\ adj. $R^2$ & 0.12 & 0.12 & 0.10 & 0.10 & 0.14 & 0.10 \\ Resid. sd & 0.54 & 0.53 & 0.54 & 0.53 & 0.50 & 0.53 \\ \hline \multicolumn{7}{l}{\footnotesize{Standard errors in parentheses}}\\ \multicolumn{7}{l}{\footnotesize{$^\dagger$ significant at $p<.10$; $^* p<.05$; $^{**} p<.01$; $^{***} p<.001$}} \end{tabular} \end{table} > >