/* ** Heckman's two-step estimation */ new ; @ LOADING DATA @ load dat[1962,25] = mwpsid82.db ; @ Data on married women from PSID 82 @ @ OPEN OUTPUT FILE @ output file = heckman1.out reset ; @ DEFINE VARIABLES @ nlf = dat[.,1] ; @ dummy variable for non-labor-force @ emp = dat[.,2] ; @ dummy varrable for employed workers @ wrate = dat[.,3] ; @ hourly wage rate @ lrate = dat[.,4] ; @ log of (hourly wage rate ($)+1) @ edu = dat[.,5] ; @ years of schooling @ urb = dat[.,6] ; @ dummy variable for urban resident @ minor = dat[.,7] ; @ dummy variable for minority race @ age = dat[.,8] ; @ age @ tenure = dat[.,9] ; @ # of months under current employer @ expp = dat[.,10] ; @ years of experience since age 18 @ regs = dat[.,11] ; @ dummy variable for South @ occw = dat[.,12] ; @ dummy variable for white collar @ occb = dat[.,13] ; @ dummy variable for blue collar @ indumg = dat[.,14] ; @ dummy variable for manufacturing industry @ indumn = dat[.,15] ; @ dummy variable for non-manufactoring industry @ unionn = dat[.,16] ; @ dummy variable for union membership @ unempr = dat[.,17] ; @ local unemployment rate @ ofinc = dat[.,18] ; @ other family income in 1980 ($) @ lofinc = dat[.,19] ; @ log of (other family income + 1) @ kids = dat[.,20] ; @ # of children of age <= 17 @ wune80 = dat[.,21] ; @ UNKNOWN @ hwork = dat[.,22] ; @ hours of house work per week @ uspell = dat[.,23] ; @ # of unemployed weeks in 1980 @ search = dat[.,24] ; @ # of weeks looking for job in 1980 @ kids5 = dat[.,25] ; @ # of children of age <= 5 @ @ DEFINE # of OBSERVATIONS @ n = rows(dat) ; @ DEFINE DEPENDENT VARIABLE AND REGRESSORS @ @ Dependent variable @ yy1 = lrate ; vny1 = {"lrate"} ; yy2 = emp ; vny2 = {"emp"} ; @ Regressors @ xx1 = ones(n,1)~edu~urb~minor~age~regs~unempr~expp~occw~occb~unionn~tenure ; vnx1 = {"cons", "edu", "urb", "minor", "age", "regs", "unempr", "expp", "occw", "occb", "unionn", "tenure" }; xx2 = ones(n,1)~edu~urb~minor~age~regs~unempr~expp~lofinc~kids5 ; vnx2 = {"cons", "edu", "urb", "minor", "age", "regs", "unempr", "expp", "lofinc", "kids5" }; /* ** DO NOT CHANGE FROM HERE */ /* Begin the first Step */ @ DEFINE INITIAL VALUE @ bb = invpd(xx2'xx2)*(xx2'yy2) ; library optmum; #include optmum.ext; optset ; proc f(b) ; local ltt ; ltt = yy2.*ln( cdfn(xx2*b) ) + (1-yy2).*ln( 1-cdfn(xx2*b) ) ; RETP( -sumc(ltt) ) ; ENDP ; b0 = bb ; __title = "Heckman's Two-Step Estimator"; _opgtol = 1e-4; _opstmth = "newton, half"; __output = 0 ; {b,func,grad,retcode} = optmum(&f,b0) ; @ value of likelihood function @ logl = -func ; @ likelihood ratio test @ rlogl = sumc(yy2)*ln(sumc(yy2)/n) + (n-sumc(yy2))*ln(1-sumc(yy2)/n) ; lrt = 2*(logl-rlogl); lrdf = cols(xx2)-1 ; @ Covariance matrix by BHHH @ proc ff(b) ; local ltt ; ltt = yy2.*ln(cdfn(xx2*b)) + (1-yy2).*ln(1-cdfn(xx2*b)); RETP( ltt ) ; ENDP ; gtt = gradp(&ff,b); covbhhh = invpd(gtt'gtt) ; @ Computing Standard errors @ sebhhh = sqrt(diag(covbhhh)); statbhhh = b~sebhhh~(b./sebhhh); yybhhh = vnx2~statbhhh; let mask[1,4] = 0 1 1 1; let fmt[4,3] = "-*.*s" 8 8 "*.*lf" 10 4 "*.*lf" 10 4 "*.*lf" 10 4; format /rd 10,4 ; "" ; "Probit Estimation Result" ; "------------------------" ; " dependent variable: " $vny2 ; "" ; " # of observations: " rows(yy2) ; "" ; " log likelihood: " logl ; "" ; " Pseudo R-square: " (1-logl/rlogl) ; "" ; " LR test, df, p-val: " lrt lrdf cdfchic(lrt,lrdf) ; "" ; " Using BHHH "; "" ; "variable coeff. std. err. t-st " ; yyprin = printfm(yybhhh,mask,fmt); "" ; /* Begin the second Step */ @ Data selection @ yyy = yy2 ; zzz1 = yy1~xx1; zzz1 = delif(zzz1,yyy .== 0); yy1 = zzz1[.,1]; xx1 = zzz1[.,2:cols(zzz1)]; zzz2 = yy2~xx2; zzz2 = delif(zzz2,yyy .== 0); yy2 = zzz2[.,1]; xx2 = zzz2[.,2:cols(zzz2)]; @ Generating the selectivity regressor @ xb2 = xx2*b ; lam = pdfn(xb2)./cdfn(xb2) ; @ Define the list of regressors at the second step @ zz = xx1~lam ; @ OLS regression with the selectivity regressor @ gam = invpd(zz'zz)*(zz'yy1) ; sele = yy1 - zz*gam; rsq = 1 - (sele'sele)/( yy1'yy1 - rows(yy1)*meanc(yy1)^2 ) ; @ Residual from two-step @ vv = yy1 - zz*gam ; @ Calculating corrected variance of errors in eq. 1 @ @ Define sig12 = c @ sig12 = gam[rows(gam)] ; hh = sig12*xx2.*(-xb2.*lam - lam^2 ); xi = sig12^2*(xb2.*lam + lam^2) ; vv2 = vv^2 ; @ Corrected variance of error in the first equation @ sig11 = meanc(xi) + meanc(vv2) ; pai = sig11 - xi ; @ Covariance Matrices @ cov1 = invpd(zz'zz)*(zz'hh)*covbhhh*(hh'zz)*invpd(zz'zz) + invpd(zz'zz)*( zz'(zz.*pai) )*invpd(zz'zz) ; cov2 = invpd(zz'zz)*(zz'hh)*covbhhh*(hh'zz)*invpd(zz'zz) + invpd(zz'zz)*( zz'(zz.*vv2) )*invpd(zz'zz) ; @ Standard errors @ se1 = sqrt(diag(cov1)) ; se2 = sqrt(diag(cov2)) ; @ Statistics @ res1 = gam~se1~(gam./se1); res2 = gam~se2~(gam./se2); vnxx1 = {"sigma12"} ; vnx1 = vnx1|vnxx1; res1 = vnx1~res1 ; res2 = vnx1~res2 ; "" ; "Two-Step Estimation Result" ; "------------------------" ; " dependent variable: " $vny1 ; "" ; " # of observations : " rows(yy1) ; "" ; " R-Square: " rsq ; "" ; " sigma_11 " sig11 ; "" ; "Estimation results with Greene's methods"; ""; "variable coeff. std. err. t-st " ; yyprin = printfm(res1,mask,fmt); "" ; "Estimation results with Ahn's methods"; "" ; "variable coeff. std. err. t-st " ; yyprin = printfm(res2,mask,fmt); "" ; output off ;