// Data setup set obs 200 generate resid = rnormal(0,5) generate color = runiformint(1,10) generate black = runiform() < 0.5 generate white = runiform() < 0.5 & black == 0 generate latino = runiform() < 0.5 & black == 0 & white == 0 generate asian = black == 0 & white == 0 & latino == 0 generate race = 1 if white replace race = 2 if black replace race = 3 if latino replace race = 4 if asian generate grade = 12 - color - 2*black + 2*asian - latino + resid replace grade = round(grade) replace grade = 0 if grade < 0 replace grade = 20 if grade > 20 save "grade race color.dta" use "grade race color.dta", clear regress grade color i.race // This graph emphasizes the relation between color and attainment, // while showing individual variation. // Individual points overplot each other. // Differences between races are not particularly clear in this example twoway (scatter grade color) (lfit grade color), by(race) contract grade color race, freq(count) // in order to weight markers by size // This helps a little with the overplotting, but won't strike most people // as much different. twoway (scatter grade color [fw=count])(lfit grade color [fw=count]), by(race) // Box plots show variation as well as descriptive summary information // However, they hide the trend use "grade race color.dta", clear graph hbox grade, over(color) by( race) graph dot grade, over(color) by( race) separate grade, by(color) scatter grade1-grade10 color, by(race, legend(off))