/********************************************* * Kleinbaum, Chapter 5, Problem #2, p. 65 * *********************************************/ /* Create the Stata dataset */ capture clear input person sbp quet age smk 1 135 2.876 45 0 2 122 3.251 41 0 3 130 3.100 49 0 4 148 3.768 52 0 5 146 2.979 54 1 6 129 2.790 47 1 7 162 3.668 60 1 8 160 3.612 48 1 9 144 2.368 44 1 10 180 4.637 64 1 11 166 3.877 59 1 12 138 4.032 51 1 13 152 4.116 64 0 14 138 3.673 56 0 15 140 3.562 54 1 16 134 2.998 50 1 17 145 3.360 49 1 18 142 3.024 46 1 19 135 3.171 57 0 20 142 3.401 56 0 21 150 3.628 56 1 22 144 3.751 58 0 23 137 3.296 53 0 24 132 3.210 50 0 25 149 3.301 54 1 26 132 3.017 48 1 27 120 2.789 43 0 28 126 2.956 43 1 29 161 3.800 63 0 30 170 4.132 63 1 31 152 3.962 62 0 32 164 4.010 65 0 end format quet %9.3f format smk %-9.0g label data "Klenbaum Chapter 5, Problem #2, p. 65" label variable person "ID" label variable sbp "Systolic Blood Pressure" label variable quet "Body Size" label variable age "Age" label variable smk "Smoker (0=no; 1=yes)" label define yesno 0 no 1 yes label values smk yesno list, noobs sep(11) /* Problem 2a */ #delimit ; graph twoway (lfit sbp quet) (scatter sbp quet), ytitle("Systolic Blood Pressure") xlabel( ,format(%4.1f)) name(g1, replace) nodraw ; #delimit cr #delimit ; graph twoway (lfit quet age) (scatter quet age), ytitle("Body Size") ylabel( ,format(%4.1f)) name(g2, replace) nodraw ; #delimit cr #delimit ; graph twoway (lfit sbp age) (scatter sbp age), ytitle("Systolic Blood Pressure") name(g3, replace) nodraw ; #delimit cr #delimit ; graph twoway (lfit sbp smk) (scatter sbp smk), ytitle("Systolic Blood Pressure") xlabel(0 1) name(g4, replace) nodraw ; #delimit cr #delimit ; graph twoway (scatteri 1 1 2 2 3 3 4 4 5 5, msymbol(i)), text(4.5 1.25 "sbp=70.6+21.5(quet)+e", placement(east) size(huge)) text(3.5 1.25 "quet=0.39+0.06(age)+e", placement(east) size(huge)) text(2.5 1.25 "sbp=50.1+1.6(age)+e", placement(east) size(huge)) xlabel("", nogrid) ylabel("", nogrid) xtitle("") ytitle("") yscale(lstyle(none)) xscale(lstyle(none)) plotregion(color("225 230 240")) name(g5, replace) nodraw ; #delimit cr graph combine g1 g2 g3 g5, title("Kleinbaum et al., Chapter 5 Problem 2") name(final1, replace) graph combine g1 g2 g3 g4, title("Kleinbaum et al., Chapter 5 Problem 2") name(final2, replace) scheme(alan) graph drop g1 g2 g3 g4 g5 /* Problem 2.b.1, p. 66 */ regress sbp quet #delimit ; graph twoway (lfitci sbp quet, stdf) (lfit sbp quet, pstyle(p2)) (scatter sbp quet, pstyle(p2)), ytitle("Systolic Blood Pressure") xlabel( ,format(%4.1f)) ylabel(100(10)200) title("Kleinbaum et al., Chapter 5 Problem 2.b.5") text(100 3.93 "sbp=70.6+21.5(quet)+e", placement(east)) ; #delimit cr /* Problem 2.b.5--Prediction Interval, p. 66 */ quietly { regress sbp quet di (e(rss)/(e(N)-2))^.5 /* Calculate s_y|x */ di invttail(32,.05/2) /* calculate t-critical */ noisily di _n _col(2) "Lower 95% prediction band: " %5.2f 144.5313-(2.036933*9.8115967)*((1+1/32)^.5) /* Display lower limit */ noisily di _col(2) "Upper 95% prediction band: " %5.2f 144.5313+(2.036933*9.8115967)*((1+1/32)^.5) /* Display upper limit */ } /* Problem 2.c and 2.d */ /* Problem 2.e.1, 2.e.2, 2.e.3, and 2.e.4 */ regress sbp smk #delimit ; graph twoway (lfit sbp smk) (scatter sbp smk), ytitle("Systolic Blood Pressure") aspectratio(1) ylabel(120(10)180) ytick(120(5)180) ymtick(120(1)180) xlabel(0 1, nogrid) plotregion(lcolor(black)) ; #delimit cr /* Run a t-test to show relationship to regression */ ttest sbp, by(smk)