/* heteroskedasticity example with S&P500 data */ proc import datafile="g:\Teaching\ECN410\ECN410.Beamers\Lecture5\SP5006.xlsx" dbms=xlsx out=work.sandp replace; getnames=yes; run; /* create a lagged value */ data sandp; set sandp; n=_n_; x=lag(S_P); /* create a new variable called x, this is the lagged value of S_P, lagged means it is the value of the previous observation */ proc reg data=sandp; model s_p=x; /* current stock index value is a function of the value in the previous time period*/ output out=sandp rstudent=studentized r=residual p=fitted; run; quit; /* A baked example for using and understanding Box-Cox */ proc import datafile="g:\Teaching\ECN410\ECN410.Beamers\Lecture5\boxcox.xlsx" dbms=xlsx out=work.boxcox_example replace; getnames=yes; run; proc reg data=boxcox_example; model y1 = x; run; proc reg data=boxcox_example; model y2 = x; run; proc reg data=boxcox_example; model y2sq = x; run; /* Box-Cox analysis*/ proc transreg data=boxcox_example PBOXCOXTABLE ; model boxcox(y2)= /*class (classvar)*/ identity( x ); title "BOX-COX"; run; title ; /* Box-Cox analysis*/ proc transreg data=boxcox_example PBOXCOXTABLE ; model boxcox(y3)= identity( x ); title "BOX-COX"; run; title ; proc reg data=boxcox_example; model y3 = x; run; proc reg data=boxcox_example; model lny3 = x; run; /*example of Weighted Least Squares*/ proc import datafile="g:\Teaching\ECN410\ECN410.Beamers\Lecture5\p198.xlsx" dbms=xlsx out=work.educ replace; getnames=yes; run; /* data on state expenditures on education from Chatterjee and Hadi (page 198-199) Y: per capita expenditure on education projected for 1975 X1: per capita income in 1973 X2: number of residents per thousand undere 18 years of age in 1974 X3: number of residents per thousand living in urban areas in 1970 region : (1) northeast (2) north central (3) south and (4) west */ /*initial regression*/ proc reg data=educ; model y=x1 x2 x3/hcc; run; /* Box-Cox analysis*/ proc transreg data=educ PBOXCOXTABLE ; model boxcox(y)= /*class (region)*/ identity( x1 x2 x3 ); title "BOX-COX"; run; /* create log of y */ data educ; set educ; lny=log(y); lnx3=log(x3); run; proc reg data=educ; model lny=x1 x2 lnx3/hcc; /*run with x3 or lnx3*/ run; /* Alaska is an influential observation, omit from the regression*/ data educ_49; set educ; where state ne 'AK'; run; proc reg data=educ_49; model y=x1 x2 x3/hcc; output out=educ_49 r=residuals; run; /* create weights */ proc sql; create table weights as select region, sum(residuals*residuals)/(count(region)-1) as region_variance from educ_49 GROUP BY region ; QUIT; proc sql; create table mle_variance as select sum(residuals*residuals)/count(region) as error_variance from educ_49 ; QUIT; proc sql; create table educ_49a as select a.*, b.region_variance, c.error_variance from educ_49 as a , weights as b , mle_variance as c WHERE a.region=b.region ; QUIT; /*created weighted versions of the dependent and independent variables*/ data educ_49a; set educ_49a; weight_wls=sqrt(region_variance/error_variance); c=1/weight_wls; wy=y/weight_wls; wx1=x1/weight_wls; wx2=x2/weight_wls; wx3=x3/weight_wls; run; /* WLS */ proc reg data=educ_49a; model wy= c wx1 wx2 wx3/ noint hcc; run; /* proc print data=educ_49a; run; */ /* White's test for heteroskedasticity */ /* Housing expenditure data and example from Pindyck and Rubinfeld (1991) chapter 6 page 131. */ proc import datafile="G:\Teaching\ECN410\ECN410.Beamers\Lecture5\PR.housing.xlsx" dbms=xlsx out=work.housing replace; getnames=yes; run; proc means data=housing; run; proc reg data=housing; model housing_expenditure= income/hcc; /*use the hcc option to provide heteroskeasticity consistent (robust) standard errors*/ output out=housing r=residuals; run; proc transreg data=housing PBOXCOXTABLE ; model boxcox(housing_expenditure)= /*class*/ identity( income ); title "BOX-COX"; run; /* perform White's test for heteroskedasticity */ data housing; set housing; resid_sq=residuals**2; income_sq=income**2; /* take n*R^2 below and this is distributed chi square with p-1 degree of freedom*/ proc reg data=housing; model resid_sq = income income_sq; run; QUIT;