/* ECN410 students: use this file to practice using SAS */ /* These are data described in Chatterjee and Hadi (2012) Regression Analysis by Example. /* You can find the on Dr. Hadi's website /* http://www1.aucegypt.edu/faculty/hadi/RABE5/#Download */ proc import datafile="g:\Teaching\ECN410\ECN410.DATA\milk.production.xlsx" dbms=xlsx out=work.milk replace; getnames=yes; run; /* get summary statistics */ proc means data=milk; run; * check the definitions of the variables at: http://www.public.asu.edu/~rgcox2/ECN410/data/about.milk.production.txt; * verify that I79 means what the variable definition says; data milk; set milk; I79_mine=0; /* create new variable and set it to 0*/ if days gt 79 then I79_mine=1; /* set my new variable equat to 1 if Days is >79 */ I79_check=I79-I79_mine; /* create another variable as the difference bewteen the original I79 in the data and the variable I created*/ run; proc means data=milk; var I79_check; run; * What is another way I could have checked this? ; * If you want to take a better look at the current milk production you can use the proc univariate command. ; PROC UNIVARIATE data=milk; var currentmilk; histogram; run; data milk; set milk; constant=1; /* create a constant which I will need for the boxplot */ run; * create boxplots for current milk production; proc boxplot data=milk; plot currentmilk*I79; plot currentmilk*constant; run; * test whether there is a statistically significant difference in milk production between cows that have been lactating for at least 79 days and those that have not; proc TTEST data=milk; class I79; var currentmilk; run; * regress the current milk production on the number of days and the number of lactations; proc reg data=milk; model currentmilk = days lactation; output out=a p=predicted r=residual; run; proc means data=a mean sum std min max; var predicted residual; run;