# This code is used to examine the performance of GMM method for 3-level correlated binary simulated data. Here we use balanced data for simulation study. # Key parameters for this code: # n=number of primary cluster; # ni=number of secondary cluster within each primary cluster; # nij=number of observations within each secondary cluster; # beta=covariates of interest; # sigma1=variance of random effect at level 3; # sigma2=variance of random effect at level 2; ################################################################ library(rootSolve) # Package used for root-finding by Newton Raphson method library(compiler) # Package used for speeding up the code enableJIT(3) # From the library "complier" library(foreach) # Package used for parallel processing library(doParallel) # Package used for parallel processing # Find out how many cores are available (if you don't already know) detectCores() ## [1] 4 # Create cluster with desired number of cores cl <- makeCluster(3) # Register cluster registerDoParallel(cl) # Find out how many cores are being used getDoParWorkers() random1<-function(n){ # Function used to generate realizations of standard normal distribution for random effect at level 3. r<-matrix(0,n,2500) for (i in 1:n){ r[i,]<-rnorm(2500) } return(r) } random2<-function(n,ni){ # Function used to generate realizations of standard normal distribution for random effect at level 2. r<-rep(0,n*ni*2500) dim(r)<-c(n,ni,2500) for (i in 1:n){ for (j in 1:ni){ r[i,j,]<-rnorm(2500) } } return (r) } gendata<-function(n,ni,nij,sigma1,sigma2,seed){ # Function used to generate simulated data. set.seed(seed) N<-n*ni*nij x1<-vector(length=N) x2<-vector(length=N) x3<-vector(length=N) x4<-vector(length=N) x5<-vector(length=N) xmatrix<-cbind(x1,x2,x3,x4,x5) for (i in 1:n){ xmatrix[((i-1)*ni*nij+1):(i*ni*nij),1]<-i } x21<-vector(length=ni*nij) for (i in 1:ni){ x21[((i-1)*nij+1):(i*nij)]<-i } xmatrix[,2]<-rep(x21,n) for (i in 1:N){ xmatrix[i,3]<-i%%nij if (xmatrix[i,3]==0){ xmatrix[i,3]<-nij } } for (i in 1:N){ if ((xmatrix[i,2]<=(ni/2))&(xmatrix[i,3]<=(nij/2))){ xmatrix[i,4]<-1 xmatrix[i,5]<-0.5 }else if ((xmatrix[i,2]<=(ni/2))&(xmatrix[i,3]>(nij/2))){ xmatrix[i,4]<-0 xmatrix[i,5]<--0.5 }else if ((xmatrix[i,2]>(ni/2))&(xmatrix[i,3]<=(nij/2))){ xmatrix[i,4]<-0 xmatrix[i,5]<--1.5 }else if ((xmatrix[i,2]>(ni/2))&(xmatrix[i,3]>(nij/2))){ xmatrix[i,4]<-1 xmatrix[i,5]<--1 } } r1<-rnorm(n) r2<-rnorm(n*ni) ri1<-vector(length=N) ri2<-vector(length=N) p<-vector(length=N) for (i in 1:n){ ri1[((i-1)*ni*nij+1):(i*ni*nij)]<-r1[i] } for (i in 1:(n*ni)){ ri2[((i-1)*nij+1):(i*nij)]<-r2[i] } for (i in 1:N){ p[i]<-exp(xmatrix[i,4]+xmatrix[i,5]+sigma1*ri1[i]+sigma2*ri2[i])/(1+exp(xmatrix[i,4]+xmatrix[i,5]+sigma1*ri1[i]+sigma2*ri2[i])) # We generate simulated probabilities here. } y<-rbinom(N,1,p) # We generate our responses based on simulated probabilities. return (list(y=y,X=xmatrix)) } solve1<-function(data,beta,n,ni,nij,sigma1,sigma2,seed){ # This function corresponds to the first GMM estimating eqaution mentioned in the model. It will be used to update beta. set.seed(seed) r1<-random1(n) r2<-random2(n,ni) miu<-rep(0,n*ni*nij) dim(miu)<-c(n,ni,nij) devy<-rep(0,n*ni*(ncol(data$X)-3)*nij) dim(devy)<-c(n,ni,(ncol(data$X)-3),nij) miu1<-rep(0,2500) devyy<-rep(0,n*ni*nij) dim(devyy)<-c(n,ni,nij) result<-foreach(i=1:n, .combine='+') %dopar% { # We use parallel processing to increase the efficiency of the code. faiy<-matrix(0,ncol(data$X)-3,ncol(data$X)-2) for (j in 1:ni){ for (k in 1:nij){ miu1<-1-1/(1+exp(data$X[(i-1)*ni*nij+(j-1)*nij+k,-(1:3)]%*%beta+sigma1*r1[i,]+sigma2*r2[i,j,])) miu[i,j,k]<-sum(miu1)/2500 devyy[i,j,k]<-sum(miu1*(1-miu1)*(1-miu1)*(2-1/(1-miu1)))/2500 devy[i,j,,k]<-sum(miu1*(1-miu1))/2500*data$X[(i-1)*ni*nij+(j-1)*nij+k,-(1:3)] } faiy[,1]<-faiy[,1]+devy[i,j,,]%*%(data$y[((i-1)*ni*nij+(j-1)*nij+1):((i-1)*ni*nij+j*nij)]-miu[i,j,]) for (k in 1:nij){ faiy[,-1]<-faiy[,-1]-t(t(devy[i,j,,k]))%*%t(devy[i,j,,k])+devyy[i,j,k]*t(t(data$X[(i-1)*ni*nij+(j-1)*nij+k,-(1:3)]))%*%t(data$X[(i-1)*ni*nij+(j-1)*nij+k,-(1:3)])*(data$y[(i-1)*ni*nij+(j-1)*nij+k]-miu[i,j,k]) } } return(faiy) } return(beta-solve(result[,-1])%*%result[,1]) } solve2<-function(data,beta,n,ni,nij,sigma1,sigma2,seed){ # This function corresponds to the second GMM estimating eqaution mentioned in the model. It will be used to update sigma2. set.seed(seed) r1<-random1(n) r2<-random2(n,ni) miu<-rep(0,n*ni*nij) dim(miu)<-c(n,ni,nij) dijk<-rep(0,n*ni*nij) dim(dijk)<-c(n,ni,nij) thoij<-rep(NA,n*ni*nij*nij) dim(thoij)<-c(n,ni,nij,nij) dijj<-rep(NA,n*ni*nij*nij) dim(dijj)<-c(n,ni,nij,nij) theta<-vector(length=n*ni*nij*(nij+1)/2) devg<-rep(0,n*ni*nij*(nij+1)/2) dim(devg)<-c(n,ni,nij*(nij+1)/2) g<-vector(length=n*ni*nij*(nij+1)/2) corg<-matrix(0,n*ni*nij*(nij+1)/2,4) miu1<-rep(0,2500) miu2<-rep(0,2500) key<-rep(0,ni*nij*2500) dim(key)<-c(ni,nij,2500) devgg<-rep(0,n*ni*nij*(nij+1)/2) dim(devgg)<-c(n,ni,nij*(nij+1)/2) dgijj<-rep(NA,n*ni*nij*nij) dim(dgijj)<-c(n,ni,nij,nij) dgijk<-rep(0,n*ni*nij) dim(dgijk)<-c(n,ni,nij) seq<-seq(nij,1,by=-1) for (i in 1:(nij-1)){ seq[i+1]<-seq[i]+seq[i+1] } result<-foreach(i=1:n, .combine='+') %dopar% { faig<-c(0,0) for (j in 1:ni){ for (k in 1:nij){ key[j,k,]<-1-1/(1+exp(data$X[(i-1)*ni*nij+(j-1)*nij+k,-(1:3)]%*%beta+sigma1*r1[i,]+sigma2*r2[i,j,])) } } for (j in 1:ni){ rij<-r2[i,j,] for (k in 1:nij){ miu1<-key[j,k,] dijk[i,j,k]<-sum(miu1*(1-miu1)*rij)/2500 dgijk[i,j,k]<-sum(miu1*(1-miu1)*(1-miu1)*(2-1/(1-miu1))*rij*rij)/2500 miu[i,j,k]<-sum(miu1)/2500 for (l in 1:nij){ if (kseq[l-1])){ g[(i-1)*ni*nij*(nij+1)/2+(j-1)*nij*(nij+1)/2+k]<-data$y[(i-1)*ni*nij+(j-1)*nij+(l-1)]*data$y[(i-1)*ni*nij+(j-1)*nij+(k-seq[l-1]+l-1)] corg[k,]<-c(i,j,(l-1),(k-seq[l-1]+l-1)) break } } } if(corg[k,3]==corg[k,4]){ theta[(i-1)*ni*nij*(nij+1)/2+(j-1)*nij*(nij+1)/2+k]<-miu[i,j,corg[k,3]] devg[i,j,k]<-dijk[i,j,k] devgg[i,j,k]<-dgijk[i,j,k] } else { theta[(i-1)*ni*nij*(nij+1)/2+(j-1)*nij*(nij+1)/2+k]<-thoij[i,j,corg[k,3],corg[k,4]] devg[i,j,k]<-dijj[i,j,corg[k,3],corg[k,4]] devgg[i,j,k]<-dgijj[i,j,corg[k,3],corg[k,4]] } } faig[1]<-faig[1]+t(devg[i,j,])%*%(g[((i-1)*ni*nij*(nij+1)/2+(j-1)*nij*(nij+1)/2+1):((i-1)*ni*nij*(nij+1)/2+j*nij*(nij+1)/2)]-theta[((i-1)*ni*nij*(nij+1)/2+(j-1)*nij*(nij+1)/2+1):((i-1)*ni*nij*(nij+1)/2+j*nij*(nij+1)/2)]) faig[2]<-faig[2]-t(devg[i,j,])%*%t(t(devg[i,j,]))+t(devgg[i,j,])%*% (g[((i-1)*ni*nij*(nij+1)/2+(j-1)*nij*(nij+1)/2+1):((i-1)*ni*nij*(nij+1)/2+j*nij*(nij+1)/2)]-theta[((i-1)*ni*nij*(nij+1)/2+(j-1)*nij*(nij+1)/2+1):((i-1)*ni*nij*(nij+1)/2+j*nij*(nij+1)/2)]) } return(faig) } return(sigma2-result[1]/result[2]) } solve3<-function(data,beta,n,ni,nij,sigma1,sigma2,seed){ # This function corresponds to the third GMM estimating eqaution mentioned in the model. It will be used to update sigma1. set.seed(seed) r1<-random1(n) r2<-random2(n,ni) miu<-rep(0,n*ni*nij) dim(miu)<-c(n,ni,nij) dijk<-rep(0,n*ni*nij) dim(dijk)<-c(n,ni,nij) miu1<-rep(0,2500) miu2<-rep(0,2500) thoijj<-rep(NA,n*ni*ni*nij*nij) dim(thoijj)<-c(n,ni,ni,nij,nij) dijj<-rep(NA,n*ni*ni*nij*nij) dim(dijj)<-c(n,ni,ni,nij,nij) theta<-vector(length=n*ni*(ni+1)/2) devg<-rep(0,n*ni*(ni+1)/2) dim(devg)<-c(n,ni*(ni+1)/2) g<-vector(length=n*ni*(ni+1)/2) corg<-matrix(0,n*ni*(ni+1)/2,3) key<-rep(0,ni*nij*2500) dim(key)<-c(ni,nij,2500) devgg<-rep(0,n*ni*(ni+1)/2) dim(devgg)<-c(n,ni*(ni+1)/2) dgijj<-rep(NA,n*ni*ni*nij*nij) dim(dgijj)<-c(n,ni,ni,nij,nij) dgijk<-rep(0,n*ni*nij) dim(dgijk)<-c(n,ni,nij) seq<-seq(ni,1,by=-1) for (i in 1:(ni-1)){ seq[i+1]<-seq[i]+seq[i+1] } result<-foreach(i=1:n, .combine='+') %dopar% { faig<-c(0,0) for (j in 1:ni){ for (k in 1:nij){ key[j,k,]<-1-1/(1+exp(data$X[(i-1)*ni*nij+(j-1)*nij+k,-(1:3)]%*%beta+sigma1*r1[i,]+sigma2*r2[i,j,])) } } ri<-r1[i,] for (j in 1:ni){ for (l in 1:nij){ miu1<-key[j,l,] dijk[i,j,l]<-sum(miu1*(1-miu1)*ri)/2500 dgijk[i,j,l]<-sum(miu1*(1-miu1)*(1-miu1)*(2-1/(1-miu1))*ri*ri)/2500 miu[i,j,l]<-sum(miu1)/2500 for (k in 1:ni){ if (j<=k){ for (m in 1:nij){ miu2<-key[k,m,] thoijj[i,j,k,l,m]<-sum(miu1*miu2)/2500 dijj[i,j,k,l,m]<-sum(miu1*miu2*(1-miu1+1-miu2)*ri)/2500 dgijj[i,j,k,l,m]<-sum((miu1*(1-miu1)*miu2*(1-miu1+1-miu2)+miu1*(1-miu2)*miu2*(1-miu1+1-miu2)-miu1*miu2*(miu1*(1-miu1)+miu2*(1-miu2)))*ri*ri)/2500 } } } } } for (j in 1:(ni*(ni+1)/2)){ if (j<=ni){ g[(i-1)*ni*(ni+1)/2+j]<-sum(data$y[((i-1)*ni*nij+(j-1)*nij+1):((i-1)*ni*nij+j*nij )])/nij*sum(data$y[((i-1)*ni*nij+(j-1)*nij+1):((i-1)*ni*nij+j*nij )])/nij corg[j,]<-c(i,j,j) } else { for (l in 2:ni){ if ((j<=seq[l])&(j>seq[l-1])){ g[(i-1)*ni*(ni+1)/2+j]<-sum(data$y[((i-1)*ni*nij+(l-2)*nij+1):((i-1)*ni*nij+(l-1)*nij )])/nij*sum(data$y[((i-1)*ni*nij+(j-seq[l-1]+l-2)*nij+1):((i-1)*ni*nij+(j-seq[l-1]+l-1)*nij )])/nij corg[j,]<-c(i,(l-1),(j-seq[l-1]+l-1)) break } } } if(corg[j,2]==corg[j,3]){ theta[(i-1)*ni*(ni+1)/2+j]<-(sum(miu[i,j,])+sum(thoijj[i,j,j,,]*(rep(1,nij)%*%t(rep(1,nij))-diag(nij))))/nij/nij devg[i,j]<-(sum(dijk[i,j,])+sum(dijj[i,j,j,,]*(rep(1,nij)%*%t(rep(1,nij))-diag(nij))))/nij/nij devgg[i,j]<-(sum(dgijk[i,j,])+sum(dgijj[i,j,j,,]*(rep(1,nij)%*%t(rep(1,nij))-diag(nij))))/nij/nij } else { theta[(i-1)*ni*(ni+1)/2+j]<-sum(thoijj[i,corg[j,2],corg[j,3],,])/nij/nij devg[i,j]<-sum(dijj[i,corg[j,2],corg[j,3],,])/nij/nij devgg[i,j]<-sum(dgijj[i,corg[j,2],corg[j,3],,])/nij/nij } } faig[1]<-t(devg[i,])%*%(g[((i-1)*ni*(ni+1)/2+1):(i*ni*(ni+1)/2)]-theta[((i-1)*ni*(ni+1)/2+1):(i*ni*(ni+1)/2)]) faig[2]<--t(devg[i,])%*%t(t(devg[i,]))+t(devgg[i,])%*%(g[((i-1)*ni*(ni+1)/2+1):(i*ni*(ni+1)/2)]-theta[((i-1)*ni*(ni+1)/2+1):(i*ni*(ni+1)/2)]) return(faig) } return(sigma1-result[1]/result[2]) } root1<-function(data,beta,n,ni,nij,sigma1,sigma2,seed){ #Here we use Newton's method to find roots. diff<-1 niter<-0 while ((diff>=0.01) & (niter<=100)){ niter=niter+1 answer<-solve1(data,beta,n,ni,nij,sigma1,sigma2,seed) diff<-norm(answer-beta,type="2") beta<-answer print (beta) flush.console() } return(beta) } root2<-function(data,beta,n,ni,nij,sigma1,sigma2,seed){ diff<-1 niter<-0 while ((diff>=0.01) & (niter<=100)){ niter=niter+1 answer<-solve2(data,beta,n,ni,nij,sigma1,sigma2,seed) diff<-abs(answer-sigma2) sigma2<-answer print (sigma2) flush.console() } return(sigma2) } root3<-function(data,beta,n,ni,nij,sigma1,sigma2,seed){ diff<-1 niter<-0 while ((diff>=0.01) & (niter<=100)){ niter=niter+1 answer<-solve3(data,beta,n,ni,nij,sigma1,sigma2,seed) diff<-abs(answer-sigma1) sigma1<-answer print (sigma1) flush.console() } return(sigma1) } gmm<-function(data,beta0,n,ni,nij,sigma10,sigma20,seed,maxiter=300,tol=.Machine$double.eps^0.5){ # We use this function to get final estimates. We keep updating one parameter each time until they converge. niter1<-0 diff1<-tol+1 diff2<-tol+1 diff3<-tol+1 while ((max(diff1,diff2,diff3)>=tol) & (niter1<=maxiter)){ niter1<-niter1+1 solution1<-root1(data=data,beta=beta0,n=n,ni=ni,nij=nij,sigma1=sigma10,sigma2=sigma20,seed=seed) diff1<-norm(solution1-beta0,type="2") beta0<-solution1 time1<-proc.time() - ptm ptm <- proc.time() print (time1) flush.console() solution2<-root2(data=data,beta=beta0,n=n,ni=ni,nij=nij,sigma1=sigma10,sigma2=sigma20,seed=seed) diff2<-abs(solution2-sigma20) sigma20<-solution2 time2<-proc.time() - ptm ptm <- proc.time() print (time2) flush.console() solution3<-root3(data=data,beta=beta0,n=n,ni=ni,nij=nij,sigma1=sigma10,sigma2=sigma20,seed=seed) diff3<-abs(solution3-sigma10) sigma10<-solution3 time3<-proc.time() - ptm ptm <- proc.time() print (time3) flush.console() } if (niter1>maxiter){ max<-"Maximum number of iterations was reached." } else { max<-"OK" } return(list(rootbeta=solution1,rootsigma2=solution2,rootsigma1=solution3,niter1=niter1,diff1=diff1,diff2=diff2,diff3=diff3,max=max)) } final<-function(seed,num){ # Here we set the parameters for the simulations. For instance, in the following code, we want to perform 100 simulations. And for each simulation we want to obtain the estimates when n=100, ni=6, nij=6, sigma1=0.8 and sigma2=1.2. result<-matrix(0,num,4) for (i in 1:num){ data1<-gendata(100,6,6,0.8,1.2,seed+i) r.try<-try(gmm(data1,c(1,1),100,6,6,0.8,1.2,32,20,0.01)) if ('try-error' %in% class(r.try)){ r.try<-try(gmm(data1,c(1,1),100,6,6,-0.8,-1.2,32,20,0.01)) # We want to explore different starting values to make the estimates converge correctly. if ('try-error' %in% class(r.try)) next else r<-r.try } else r<-r.try result[i,]<-c(r$rootbeta,abs(r$rootsigma1),abs(r$rootsigma2)) print (result[i,]) flush.console() } return(result) } i<-NULL ptm<-proc.time() all<-final(1,100) stopCluster(cl)