/* This is quite a long Stata program file that provides a number of examples of visualizing data and statistics using the General Social Survey. Many commands, options, and sub-options can be used in different graphics. These examples are not intended to be exhaustive, just illustrative. You will need to read the online help file for more details. */ /* Read in a subset of the GSS data set */ #delimit ; use year marital spkath spkrac spkhomo tvhours wordsum occ80 abany sex race age educ degree marital prestige sexfreq partners nummen numwomen using http://terpconnect.umd.edu/~smilex3/GSS-Cumulative-72-12.dta, clear; #delimit cr /* Create two histograms, one bare-bones minimum, the other publication ready. */ /* Histogram #1--bare-bones */ hist marital, name(hist1, replace) /* Histogram #2--publiction ready */ #delimit ; hist marital, discrete percent gap(15) title("Distribution of Marital Status, 1972-2012", color(black)) ytitle("Percent") xtitle("") note("Created for SOCY699C") caption("Source: General Social Survey") xlabel(1 "Married" 2 "Widowed" 3 "Separated" 4 "Divorced" 5 "NM", angle(0)) ylabel(0(10)50, angle(0) nogrid) ymtick(0(5)50) color(black) fintensity(35) lwidth(vthin) lcolor(black) graphregion(color(white)) name(hist2, replace); #delimit cr /* Bar garaph showing averages, which in this case is equal to percentages due to the recoding. Colors were determined using advice from colorbrewer (colorbrewer.org). You can see the permanent link at: http://colorbrewer2.org/index.php?type=qualitative&scheme=Set1&n=3 */ capture drop pctspkath pctspkrac pctspkhomo recode spkath spkrac spkhomo (1=100) (2=0), pre(pct) #delimit ; graph bar pctspkath pctspkrac pctspkhomo if race<3 & year==2012, over(race) title("Percentage of Respondents Tolerant of Different Types" "of People Speaking in Their Community", color(black)) note("Source: General Social Survey, 2010") ylabel(, angle(0)) bar(1, color(228 26 28)) bar(2, color( 55 126 184)) bar(3, color( 77 175 74)) legend(label(1 "Anti-religionist") label(2 "Racist") label(3 "Homosexual") rows(1)) graphregion(color(white)) name(bar1, replace); #delimit cr /* Bar graph using a gray scale color scheme. Note the colors -gs14-. The -gs- series runs from -gs0- (black) to -gs16- (white). Also, the -glp- suboption (grid line pattern) uses a dashed line for the y-grid. Also, note the use of the -gap(#)- suboption to the -over- option to control the distance between the bars. */ #delimit ; graph bar tvhours if race<3, over(race, gap(20)) over(sex, gap(80) relabel(1 "Men" 2 "Women")) title("Average Hours of TV Watched by Gender and Race", color(black)) ytitle("Hours") note("Source: General Social Survey, 1975-2010") ylabel(, angle(0) glcolor(gs14) glp(dash)) bar(1, color(gs14) lcolor(black)) outergap(*.8) graphregion(color(white)) name(bar2, replace); #delimit cr /* This example demonstrates how to manipulate the bar overlap, the legend, showing the values of each bar, and using the -asyvars- option which treats treats the first over() group as yvars. */ #delimit ; graph bar wordsum if race<3, asy over(race) over(degree, relabel(1 " 989 replace numwomen=. if numwomen > 989 gen partners1=nummen+numwomen label variable partners1 "Number of sexual partners since age 18" preserve collapse partners1, by(year rs) drop if partners1==. list #delimit ; graph twoway (line partners1 year if rs==1, lw(thick)) (line partners1 year if rs==2, lw(thick)) (line partners1 year if rs==3, lw(thick)) (line partners1 year if rs==4, lw(thick)), title("Number of sexual partners since age 18") subtitle("Comparing race and gender") xtitle("") ytitle("# of partners") ylabel(, angle(0)) ymtick(0(25)30) legend(region(lcolor(white)) row(1) label(1 "nw men") label(2 "nw women") label(3 "w men") label(4 "w women")) graphregion(color(white)) name(partners1, replace); restore /* Some graphics related to regression analyses. Here are linear and quadratic fitted lines with 95% confidence intervals for the predicted values. The first example creates two graphs but suppresses their output. Then, they are combined into a single graph to allow better comparison. */ capture drop sexfreq1 recode sexfreq (0=0) (1=2) (2=12) (3=36) (4=52) (5=156) (6=208), gen(sexfreq1) regress sexfreq1 age #delimit ; graph twoway (lfitci sexfreq1 age if year==2010) (lfit sexfreq1 age if year==2010), title("Linear Fit") ylabel(0(25)100) yscale(range(0 100)) legend(off) aspectratio(1) nodraw name(regress1, replace); graph twoway (qfitci sexfreq1 age if year==2010) (qfit sexfreq1 age if year==2010), title("Quadratic Fit") ylabel(0(25)100) yscale(range(0 100)) legend(off) aspectratio(1) nodraw name(regress2, replace); graph combine regress1 regress2, nocopies; #delimit cr /* Estimate a multiple regresison model and show a number of plots of predicted values. */ regress sexfreq1 c.age##c.age i.marital if year==2010 margins, over(marital) marginsplot, recast(scatter) name(margins1, replace) regress sexfreq1 c.age##c.age i.marital margins, over(year) marginsplot, recastci(rarea) recast(line) name(margins2, replace) margins, over(year marital) marginsplot, recastci(rarea) recast(line) name(margins3, replace) /* Margins can be used to estimate the expected probabilities in a logistic regression model. */ logistic abanydum racedum educ age i.marital margins, over(year racedum) marginsplot, recast(line) recastci(rarea) xlabel(1980(10)2010) name(margins4, replace)