/* Import the Harris Bank data set */ proc import datafile="g:\Teaching\ECN410\ECN410.Beamers\Lecture7\HARRIS7.xlsx" dbms=xlsx out=work.bank replace; getnames=yes; run; proc contents data=bank; run; /* EDUCAT : years of schooling at time of hire EXPER : months of previous work experience MALES : indicator for male, male=1 MONTHS : number of months after Jan 1 1969 that the individual was hired SALARY : starting salary */ /* initial regression */ proc reg data=bank; model salary = educat males/vif; output out=bank2 p=salary_hat cookd=cd; run; quit; /* sort by Cook's Distance */ proc sort data=bank2; by descending cd; proc print data=bank2 (obs=10); run; proc means data=bank2; run; /* re-estimate the equation with out the high influence observation */ proc reg data=bank2; where cd le .1; model salary = educat males; run; quit; /* run a regression adding experience before and after joining the bank*/ proc reg data=bank; model salary = educat exper months males; output out=bank3 p=salary_hat cookd=cd; run; quit; /* sort by influence measure */ proc sort data=bank3; by descending cd; proc print data=bank3 (obs=10); run; /* run a regression using a model selection procedure, more examples will follow */ proc glmselect data=bank; model salary = educat exper males months/select=cp; run; quit; /** example involving an interaction term **/ data bank; set bank; maleeducat = MALES*EDUCAT; /*create the interaction variable*/ proc reg data=bank; model salary = educat males maleeducat; output out=bank4 p=salary_hat cookd=cd; F1:TEST MALES=MALEEDUCAT=0; /* use a partial F test, H0:MALES=MALEEDUCAT=0*/ run; quit; /* sort by Cook's Distance */ proc sort data=bank4; by descending cd; proc print data=bank4 (obs=10); run; /* example with more than one category for the categorical variable, in this example there are multiple dummy variables */ /* Import the BALANCES data set */ proc import datafile="g:\Teaching\ECN410\ECN410.Beamers\Lecture7\balances.xlsx" dbms=xlsx out=work.balances replace; getnames=yes; run; proc contents data=balances; run; /* TENURE: number of months since the credit card account was opened AVGPAY12 : average payment over the last 12 months Balance : current balance on the account Card : type of credit card Income : income of the account holder all dollar variables are measured in thousands of dollars */ /* run preliminary regression */ proc reg data=balances; model balance= tenure avgpay12 income; run; /**** NOTICE THE RESULTS OF THE F TEST AND COMPARE TO THE RESULTS OF THE T TESTS ****/ data balances; set balances; red=0; white=0; if card='red' then red=1; /*create indicator variables for red and white cards */ if card='white' then white=1; /* run regression with indicator variables */ proc reg data=balances; model balance= tenure avgpay12 income red white; run; /* check for possible tranformation */ proc transreg data=balances PBOXCOXTABLE ; model boxcox(balance) = identity(tenure avgpay12 income); run; data balances; set balances; sqrt_balance=balance**(.5); /* create new variable as the square root of balance */ proc reg data=balances; model sqrt_balance= tenure avgpay12 income red white; run; /*** Seasonal effects with time-series data ***/ /* read data from internet or save file and change path */ filename ts url "http://www.public.asu.edu/~rgcox2/ECN410/data/ABXSALES7.csv"; DATA abx_sales ; INFILE ts dsd firstobs=2; input SALES; run; proc means data=abx_sales; run; data abx_sales; set abx_sales; trend=_n_; /*create a tredn variable*/ qcalc=mod(trend,4); /*use the modulo operator to make first step in creating quarterly indicators*/ Q2=0; Q3=0; Q4=0; if qcalc=2 then Q2=1; if qcalc=3 then Q3=1; if qcalc=0 then Q4=1; run; proc print data=abx_sales (obs=10; run; /* regress sales on trend and seasonal indicators */ proc reg data=abx_sales; model sales = trend Q2 Q3 Q4; T1:TEST Q2=Q3=Q4=0; T2:TEST Q2=Q3; T3:TEST Q2=Q3=0; run; QUIT;