# This code is used to examine the performance of GQL method for 3-level correlated binary simulated data. Here we use balanced data for simulation study. # Key parameters for this code: # n=number of primary cluster; # ni=number of secondary cluster within each primary cluster; # nij=number of observations within each secondary cluster; # beta=covariates of interest; # sigma1=variance of random effect at level 3; # sigma2=variance of random effect at level 2; ################################################################ library(rootSolve) # Package used for root-finding by Newton Raphson method library(compiler) # Package used for speeding up the code enableJIT(3) # From the library "complier" library(foreach) library(doParallel) # Find out how many cores are available (if you don't already know) detectCores() ## [1] 4 # Create cluster with desired number of cores cl <- makeCluster(3) # Register cluster registerDoParallel(cl) # Find out how many cores are being used getDoParWorkers() random1<-function(n){ # Function used to generate realizations of standard normal distribution for random effect at level 3. r<-matrix(0,n,2500) for (i in 1:n){ r[i,]<-rnorm(2500) } return(r) } random2<-function(n,ni){ # Function used to generate realizations of standard normal distribution for random effect at level 2. r<-rep(0,n*ni*2500) dim(r)<-c(n,ni,2500) for (i in 1:n){ for (j in 1:ni){ r[i,j,]<-rnorm(2500) } } return (r) } gendata<-function(n,ni,nij,sigma1,sigma2,seed){ # Function used to generate simulated data. set.seed(seed) N<-n*ni*nij x1<-vector(length=N) x2<-vector(length=N) x3<-vector(length=N) x4<-vector(length=N) x5<-vector(length=N) xmatrix<-cbind(x1,x2,x3,x4,x5) for (i in 1:n){ xmatrix[((i-1)*ni*nij+1):(i*ni*nij),1]<-i } x21<-vector(length=ni*nij) for (i in 1:ni){ x21[((i-1)*nij+1):(i*nij)]<-i } xmatrix[,2]<-rep(x21,n) for (i in 1:N){ xmatrix[i,3]<-i%%nij if (xmatrix[i,3]==0){ xmatrix[i,3]<-nij } } for (i in 1:N){ if ((xmatrix[i,2]<=(ni/2))&(xmatrix[i,3]<=(nij/2))){ xmatrix[i,4]<-1 xmatrix[i,5]<-0.5 }else if ((xmatrix[i,2]<=(ni/2))&(xmatrix[i,3]>(nij/2))){ xmatrix[i,4]<-0 xmatrix[i,5]<--0.5 }else if ((xmatrix[i,2]>(ni/2))&(xmatrix[i,3]<=(nij/2))){ xmatrix[i,4]<-0 xmatrix[i,5]<--1.5 }else if ((xmatrix[i,2]>(ni/2))&(xmatrix[i,3]>(nij/2))){ xmatrix[i,4]<-1 xmatrix[i,5]<--1 } } r1<-rnorm(n) r2<-rnorm(n*ni) ri1<-vector(length=N) ri2<-vector(length=N) p<-vector(length=N) for (i in 1:n){ ri1[((i-1)*ni*nij+1):(i*ni*nij)]<-r1[i] } for (i in 1:(n*ni)){ ri2[((i-1)*nij+1):(i*nij)]<-r2[i] } for (i in 1:N){ p[i]<-exp(xmatrix[i,4]+xmatrix[i,5]+sigma1*ri1[i]+sigma2*ri2[i])/(1+exp(xmatrix[i,4]+xmatrix[i,5]+sigma1*ri1[i]+sigma2*ri2[i])) # We generate simulated probabilities here. } y<-rbinom(N,1,p) # We generate our responses based on simulated probabilities. return (list(y=y,X=xmatrix)) } solve1<-function(data,beta,n,ni,nij,sigma1,sigma2,seed){ # This function corresponds to the first GQL estimating eqaution mentioned in the model. It will be used to update beta. set.seed(seed) r1<-random1(n) r2<-random2(n,ni) miu1<-rep(0,2500) miu<-rep(0,n*ni*nij) dim(miu)<-c(n,ni,nij) devy<-rep(0,n*ni*(ncol(data$X)-3)*nij) dim(devy)<-c(n,ni,(ncol(data$X)-3),nij) covy<-rep(0,n*ni*nij*nij) dim(covy)<-c(n,ni,nij,nij) thoij<-rep(0,n*ni*nij*nij) dim(thoij)<-c(n,ni,nij,nij) key<-rep(0,ni*nij*2500) dim(key)<-c(ni,nij,2500) result<-foreach(i=1:n, .combine='+') %dopar% { # We use parallel processing to increase the efficiency of the code. fun<-matrix(0,ncol(data$X)-3,1) for (j in 1:ni){ for (k in 1:nij){ key[j,k,]<-1-1/(1+exp(data$X[(i-1)*ni*nij+(j-1)*nij+k,-(1:3)]%*%beta+sigma1*r1[i,]+sigma2*r2[i,j,])) } } for (j in 1:ni){ for (k in 1:nij){ miu1<-key[j,k,] miu[i,j,k]<-sum(miu1)/2500 for (l in 1:nij){ if (kseq[l-1])){ g[(i-1)*ni*nij*(nij+1)/2+(j-1)*nij*(nij+1)/2+k]<-data$y[(i-1)*ni*nij+(j-1)*nij+(l-1)]*data$y[(i-1)*ni*nij+(j-1)*nij+(k-seq[l-1]+l-1)] corg[k,]<-c(i,j,(l-1),(k-seq[l-1]+l-1)) break } } } if(corg[k,3]==corg[k,4]){ theta[(i-1)*ni*nij*(nij+1)/2+(j-1)*nij*(nij+1)/2+k]<-miu[i,j,corg[k,3]] devg[i,j,k]<-dijk[i,j,k] } else { theta[(i-1)*ni*nij*(nij+1)/2+(j-1)*nij*(nij+1)/2+k]<-thoij[i,j,corg[k,3],corg[k,4]] devg[i,j,k]<-dijj[i,j,corg[k,3],corg[k,4]] } for (l in 1:(nij*(nij+1)/2)){ if (l<=k){ length<-length(unique(c(corg[k,3],corg[k,4],corg[l,3],corg[l,4]))) length1<-length(unique(c(corg[k,3],corg[k,4]))) length2<-length(unique(c(corg[l,3],corg[l,4]))) order<-sort(unique(c(corg[k,3],corg[k,4],corg[l,3],corg[l,4]))) if (length==1){ covg[i,j,k,l]<-miu[i,j,corg[k,3]]-miu[i,j,corg[k,3]]*miu[i,j,corg[k,3]] }else if ((length==2)&(length1==2)&(length2==2)){ covg[i,j,k,l]<-thoij[i,j,corg[k,3],corg[k,4]]-thoij[i,j,corg[k,3],corg[k,4]]*thoij[i,j,corg[l,3],corg[l,4]] }else if ((length==2)&(length1==1)&(length2==2)){ covg[i,j,k,l]<-thoij[i,j,corg[l,3],corg[l,4]]-miu[i,j,corg[k,3]]*thoij[i,j,corg[l,3],corg[l,4]] }else if ((length==2)&(length1==2)&(length2==1)){ covg[i,j,k,l]<-thoij[i,j,corg[k,3],corg[k,4]]-thoij[i,j,corg[k,3],corg[k,4]]*miu[i,j,corg[l,3]] }else if ((length==2)&(length1==1)&(length2==1)){ covg[i,j,k,l]<-thoij[i,j,order[1],order[2]]-miu[i,j,corg[k,3]]*miu[i,j,corg[l,3]] }else if ((length==3)&(length1==1)&(length2==2)){ covg[i,j,k,l]<-thoijk[i,j,order[1],order[2],order[3]]-miu[i,j,corg[k,3]]*thoij[i,j,corg[l,3],corg[l,4]] }else if ((length==3)&(length1==2)&(length2==1)){ covg[i,j,k,l]<-thoijk[i,j,order[1],order[2],order[3]]-thoij[i,j,corg[k,3],corg[k,4]]*miu[i,j,corg[l,3]] }else if ((length==3)&(length1==2)&(length2==2)){ covg[i,j,k,l]<-thoijk[i,j,order[1],order[2],order[3]]-thoij[i,j,corg[k,3],corg[k,4]]*thoij[i,j,corg[l,3],corg[l,4]] }else if (length==4){ covg[i,j,k,l]<-thoijkl[i,j,order[1],order[2],order[3],order[4]]-thoij[i,j,corg[k,3],corg[k,4]]*thoij[i,j,corg[l,3],corg[l,4]] } } } } covg[i,j,,][upper.tri(covg[i,j,,])]<-t(covg[i,j,,])[upper.tri(covg[i,j,,])] fun<-fun+devg[i,j,]%*%solve(covg[i,j,,])%*%(g[((i-1)*ni*nij*(nij+1)/2+(j-1)*nij*(nij+1)/2+1):((i-1)*ni*nij*(nij+1)/2+j*nij*(nij+1)/2)]-theta[((i-1)*ni*nij*(nij+1)/2+(j-1)*nij*(nij+1)/2+1):((i-1)*ni*nij*(nij+1)/2+j*nij*(nij+1)/2)]) } return(fun) } return(result) } solve3<-function(data,beta,n,ni,nij,sigma1,sigma2,seed){ # This function corresponds to the third GQL estimating eqaution mentioned in the model. It will be used to update sigma2. set.seed(seed) r1<-random1(n) r2<-random2(n,ni) miu<-rep(0,n*ni*nij) dim(miu)<-c(n,ni,nij) dijk<-rep(0,n*ni*nij) dim(dijk)<-c(n,ni,nij) miu1<-rep(0,2500) miu3<-rep(0,2500) miu2<-rep(0,n*ni*2500) dim(miu2)<-c(n,ni,2500) thoij1<-rep(0,2500) thoijk1<-rep(0,2500) thoijkl1<-rep(0,2500) thoij2<-rep(0,n*ni*2500) dim(thoij2)<-c(n,ni,2500) thoijk2<-rep(0,n*ni*2500) dim(thoijk2)<-c(n,ni,2500) thoijkl2<-rep(0,n*ni*2500) dim(thoijkl2)<-c(n,ni,2500) thoijj<-rep(NA,n*ni*ni*nij*nij) dim(thoijj)<-c(n,ni,ni,nij,nij) thoijj1<-rep(0,2500) dijj<-rep(NA,n*ni*ni*nij*nij) dim(dijj)<-c(n,ni,ni,nij,nij) thokj<-rep(NA,n*ni*ni) dim(thokj)<-c(n,ni,ni) thokj1<-rep(0,2500) thoj<-rep(NA,n*ni) dim(thoj)<-c(n,ni) thoj1<-rep(0,2500) thojj<-rep(NA,n*ni*ni) dim(thojj)<-c(n,ni,ni) thojj1<-rep(0,2500) thojjk<-rep(NA,n*ni*ni*ni) dim(thojjk)<-c(n,ni,ni,ni) thojjk1<-rep(0,2500) thojjkl<-rep(NA,n*ni*ni*ni*ni) dim(thojjkl)<-c(n,ni,ni,ni,ni) thojjkl1<-rep(0,2500) theta<-vector(length=n*ni*(ni+1)/2) covg<-rep(0,n*ni*(ni+1)/2*ni*(ni+1)/2) dim(covg)<-c(n,ni*(ni+1)/2,ni*(ni+1)/2) devg<-rep(0,n*ni*(ni+1)/2) dim(devg)<-c(n,ni*(ni+1)/2) g<-vector(length=n*ni*(ni+1)/2) corg<-matrix(0,n*ni*(ni+1)/2,3) key<-rep(0,ni*nij*2500) dim(key)<-c(ni,nij,2500) seq<-seq(ni,1,by=-1) for (i in 1:(ni-1)){ seq[i+1]<-seq[i]+seq[i+1] } result<-foreach(i=1:n, .combine='+') %dopar% { fun<-0 for (j in 1:ni){ for (k in 1:nij){ key[j,k,]<-1-1/(1+exp(data$X[(i-1)*ni*nij+(j-1)*nij+k,-(1:3)]%*%beta+sigma1*r1[i,]+sigma2*r2[i,j,])) } } for (j in 1:ni){ rij<-r2[i,j,] for (k in 1:nij){ miu1<-key[j,k,] dijk[i,j,k]<-sum(miu1*(1-miu1)*rij)/2500 miu[i,j,k]<-sum(miu1)/2500 miu2[i,j,]<-miu2[i,j,]+miu1 for (l in 1:nij){ for (m in 1:ni){ if(j<=m){ rim<-r2[i,m,] miu3<-key[m,l,] thoijj[i,j,m,k,l]<-sum(miu1*miu3)/2500 dijj[i,j,m,k,l]<-sum(miu1*miu3*((1-miu1)*rij+(1-miu3)*rim))/2500 if((j==m)&(kseq[l-1])){ g[(i-1)*ni*(ni+1)/2+j]<-sum(data$y[((i-1)*ni*nij+(l-2)*nij+1):((i-1)*ni*nij+(l-1)*nij )])/nij*sum(data$y[((i-1)*ni*nij+(j-seq[l-1]+l-2)*nij+1):((i-1)*ni*nij+(j-seq[l-1]+l-1)*nij )])/nij corg[j,]<-c(i,(l-1),(j-seq[l-1]+l-1)) break } } } if(corg[j,2]==corg[j,3]){ theta[(i-1)*ni*(ni+1)/2+j]<-(sum(miu[i,j,])+sum(thoijj[i,j,j,,]*(rep(1,nij)%*%t(rep(1,nij))-diag(nij))))/nij/nij devg[i,j]<-(sum(dijk[i,j,])+sum(dijj[i,j,j,,]*(rep(1,nij)%*%t(rep(1,nij))-diag(nij))))/nij/nij } else { theta[(i-1)*ni*(ni+1)/2+j]<-sum(thoijj[i,corg[j,2],corg[j,3],,])/nij/nij devg[i,j]<-sum(dijj[i,corg[j,2],corg[j,3],,])/nij/nij } for (k in 1:(ni*(ni+1)/2)){ if (k<=j){ length<-length(unique(c(corg[j,2],corg[j,3],corg[k,2],corg[k,3]))) order<- sort(unique(c(corg[j,2],corg[j,3],corg[k,2],corg[k,3]))) length1<-length(unique(c(corg[j,2],corg[j,3]))) length2<-length(unique(c(corg[k,2],corg[k,3]))) order1<- sort(unique(c(corg[j,2],corg[j,3]))) order2<- sort(unique(c(corg[k,2],corg[k,3]))) if (length==1){ covg[i,j,k]<-thoj[i,order[1]]-theta[(i-1)*ni*(ni+1)/2+j]*theta[(i-1)*ni*(ni+1)/2+k] }else if ((length==2)&(length1==2)&(length2==2)){ covg[i,j,k]<-thojj[i,order[1],order[2]]-theta[(i-1)*ni*(ni+1)/2+j]*theta[(i-1)*ni*(ni+1)/2+k] }else if ((length==2)&(length1==1)&(length2==2)){ covg[i,j,k]<-thokj[i,intersect(order1,order2),order[order!=intersect(order1,order2)]]-theta[(i-1)*ni*(ni+1)/2+j]*theta[(i-1)*ni*(ni+1)/2+k] }else if ((length==2)&(length1==2)&(length2==1)){ covg[i,j,k]<-thokj[i,intersect(order1,order2),order[order!=intersect(order1,order2)]]-theta[(i-1)*ni*(ni+1)/2+j]*theta[(i-1)*ni*(ni+1)/2+k] }else if ((length==2)&(length1==1)&(length2==1)){ covg[i,j,k]<-thojj[i,order[1],order[2]]-theta[(i-1)*ni*(ni+1)/2+j]*theta[(i-1)*ni*(ni+1)/2+k] }else if ((length==3)&(length1==1)&(length2==2)){ covg[i,j,k]<-thojjk[i,order1[1],order2[1],order2[2]]-theta[(i-1)*ni*(ni+1)/2+j]*theta[(i-1)*ni*(ni+1)/2+k] }else if ((length==3)&(length1==2)&(length2==1)){ covg[i,j,k]<-thojjk[i,order2[1],order1[1],order1[2]]-theta[(i-1)*ni*(ni+1)/2+j]*theta[(i-1)*ni*(ni+1)/2+k] }else if ((length==3)&(length1==2)&(length2==2)){ covg[i,j,k]<-thojjk[i,intersect(order1,order2),order[order!=intersect(order1,order2)][1],order[order!=intersect(order1,order2)][2]]-theta[(i-1)*ni*(ni+1)/2+j]*theta[(i-1)*ni*(ni+1)/2+k] }else if (length==4){ covg[i,j,k]<-thojjkl[i,order[1],order[2],order[3],order[4]]-theta[(i-1)*ni*(ni+1)/2+j]*theta[(i-1)*ni*(ni+1)/2+k] } } } } covg[i,,][upper.tri(covg[i,,])]<-t(covg[i,,])[upper.tri(covg[i,,])] fun<-devg[i,]%*%solve(covg[i,,])%*%(g[((i-1)*ni*(ni+1)/2+1):(i*ni*(ni+1)/2)]-theta[((i-1)*ni*(ni+1)/2+1):(i*ni*(ni+1)/2)]) return(fun) } return(result) } root1<-function(data,beta,n,ni,nij,sigma1,sigma2,seed){ #Here we use the secant method to find roots. diff<-1 niter<-0 while ((diff>=0.01) & (niter<=100)){ niter=niter+1 if (diff==1){ beta1=beta-0.01 beta2=beta beta3=beta-c(0,0.01) beta4=beta-c(0.01,0) rst1<-solve1(data,beta2,n,ni,nij,sigma1,sigma2,seed) rst2<-solve1(data,beta3,n,ni,nij,sigma1,sigma2,seed) rst3<-solve1(data,beta4,n,ni,nij,sigma1,sigma2,seed) answer<-beta2-solve(matrix(c((rst1[1]-rst3[1])/(beta2[1]-beta4[1]),(rst1[1]-rst2[1])/(beta2[2]-beta3[2]),(rst1[2]-rst3[2])/(beta2[1]-beta4[1]),(rst1[2]-rst2[2])/(beta2[2]-beta3[2])),2,2))%*%rst1 diff<-norm(answer-beta2,type="2") beta1<-beta2 beta2<-answer print (beta2) flush.console() } else { beta3=c(beta2[1],beta1[2]) beta4=c(beta1[1],beta2[2]) rst1<-solve1(data,beta2,n,ni,nij,sigma1,sigma2,seed) rst2<-solve1(data,beta3,n,ni,nij,sigma1,sigma2,seed) rst3<-solve1(data,beta4,n,ni,nij,sigma1,sigma2,seed) answer<-beta2-solve(matrix(c((rst1[1]-rst3[1])/(beta2[1]-beta4[1]),(rst1[1]-rst2[1])/(beta2[2]-beta3[2]),(rst1[2]-rst3[2])/(beta2[1]-beta4[1]),(rst1[2]-rst2[2])/(beta2[2]-beta3[2])),2,2))%*%rst1 diff<-norm(answer-beta2,type="2") beta1<-beta2 beta2<-answer print (beta2) flush.console() } } return(beta2) } root2<-function(data,beta,n,ni,nij,sigma1,sigma2,seed){ diff<-1 niter<-0 while ((diff>=0.01) & (niter<=100)){ niter=niter+1 if (diff==1){ sigma21=sigma2-0.01 sigma22=sigma2 rst1<-solve3(data,beta,n,ni,nij,sigma1,sigma21,seed) rst2<-solve3(data,beta,n,ni,nij,sigma1,sigma22,seed) answer<-sigma22-rst2*(sigma22-sigma21)/(rst2-rst1) diff<-abs(answer-sigma22) sigma21<-sigma22 sigma22<-answer rst1<-rst2 print (sigma22) flush.console() } else { rst2<-solve3(data,beta,n,ni,nij,sigma1,sigma22,seed) answer<-sigma22-rst2*(sigma22-sigma21)/(rst2-rst1) diff<-abs(answer-sigma22) sigma21<-sigma22 sigma22<-answer rst1<-rst2 print (sigma22) flush.console() } } return(sigma22) } root3<-function(data,beta,n,ni,nij,sigma1,sigma2,seed){ diff<-1 niter<-0 while ((diff>=0.01) & (niter<=100)){ niter=niter+1 if (diff==1){ sigma11=sigma1-0.01 sigma12=sigma1 rst1<-solve2(data,beta,n,ni,nij,sigma11,sigma2,seed) rst2<-solve2(data,beta,n,ni,nij,sigma12,sigma2,seed) answer<-sigma12-rst2*(sigma12-sigma11)/(rst2-rst1) diff<-abs(answer-sigma12) sigma11<-sigma12 sigma12<-answer rst1<-rst2 print (sigma12) flush.console() } else { rst2<-solve2(data,beta,n,ni,nij,sigma12,sigma2,seed) answer<-sigma12-rst2*(sigma12-sigma11)/(rst2-rst1) diff<-abs(answer-sigma12) sigma11<-sigma12 sigma12<-answer rst1<-rst2 print (sigma12) flush.console() } } return(sigma12) } gql<-function(data,beta0,n,ni,nij,sigma10,sigma20,seed,maxiter=50,tol=.Machine$double.eps^0.5){ # We use this function to get final estimates. We keep updating one parameter each time until they converge. niter1<-0 diff1<-tol+1 diff2<-tol+1 diff3<-tol+1 while ((max(diff1,diff2,diff3)>=tol) & (niter1<=maxiter)){ niter1<-niter1+1 solution1<-root1(data=data,beta=beta0,n=n,ni=ni,nij=nij,sigma1=sigma10,sigma2=sigma20,seed=seed) diff1<-norm(solution1-beta0,type="2") beta0<-solution1 time1<-proc.time() - ptm ptm <- proc.time() print (time1) flush.console() solution3<-root3(data=data,beta=beta0,n=n,ni=ni,nij=nij,sigma1=sigma10,sigma2=sigma20,seed=seed) diff3<-abs(solution3-sigma10) sigma10<-solution3 time3<-proc.time() - ptm ptm <- proc.time() print (time3) flush.console() solution2<-root2(data=data,beta=beta0,n=n,ni=ni,nij=nij,sigma1=sigma10,sigma2=sigma20,seed=seed) diff2<-abs(solution2-sigma20) sigma20<-solution2 time2<-proc.time() - ptm ptm <- proc.time() print (time2) flush.console() } if (niter1>maxiter){ max<-"Maximum number of iterations was reached." } else { max<-"OK" } return(list(rootbeta=solution1,rootsigma2=solution2,rootsigma1=solution3,niter1=niter1,diff1=diff1,diff2=diff2,diff3=diff3,max=max)) } final<-function(seed,num){ # Here we set the parameters for the simulations. For instance, in the following code, we want to perform 100 simulations. And for each simulation we want to obtain the estimates when n=100, ni=6, nij=6, sigma1=0.8 and sigma2=1.2. result<-matrix(0,num,4) for (i in 1:num){ data1<-gendata(100,6,6,0.8,1.2,seed+i) r.try<-try(gql(data1,c(1,1),100,6,6,0.8,1.2,32,20,0.01)) if ('try-error' %in% class(r.try)){ r.try<-try(gql(data1,c(1,1),100,6,6,-0.8,-1.2,32,20,0.01)) # We want to explore different starting values to make the estimates converge correctly. if ('try-error' %in% class(r.try)) next else r<-r.try } else r<-r.try result[i,]<-c(r$rootbeta,abs(r$rootsigma1),abs(r$rootsigma2)) print (result[i,]) flush.console() } return(result) } i<-NULL ptm <- proc.time() all<-final(1,100) stopCluster(cl)