R Program

 

#R functions for exact clustered logistic regression

 

###################################################

#s1calc function

#Calculates the value of sufficient statistic s1 for a given vector zvec

 

#Input: zvec, vector of observed z-values (sum of successses in cluster)

#Output: s1, real number

###################################################

s1calc=function(zvec){

s1 = sum(zvec);

s1;

} #end s1calc

 

###################################################

#s2calc function

#Calculates the value of the sufficient statistic s2 for given vectors

zvec and nvec

 

#Input: zvec, vector of observed z-values (sum of successes in cluster)

# nvec, number of observations per cluster

#Output: s2, real number

###################################################

s2calc=function(zvec, nvec){

s2 = sum(zvec*(nvec-zvec));

 

s2;

} #end s2calc

 

###################################################

#s3calc function

#Calculates the value of the sufficient statistic s3 for given vectors

zvec, nvec and jvec

 

#Input: zvec, vector of observed z-values (sum of successes in inner

cluster)

# nvec, number of observations per inner cluster

# jvec, number of inner clusters per outer cluster

#Output: s3, real number

###################################################

s3calc=function(zvec,nvec,jvec){

k = length(jvec); #number of outer clusters

s3 = 0;

 

count = 1;

 

for(i in 1:k) #run through all inner

{

nsum = 0;

zsum = 0;

 

for(j in count:(count+jvec[i]-1))

{

nsum = nsum + nvec[j];

zsum = zsum + zvec[j];

}

 

s3i = 0;

 

for(j in count:(count+jvec[i]-1))

{

s3i = s3i + zvec[j]*(nsum - zsum);

}

 

s3 = s3 + s3i;

count = count + jvec[i];

} #end for

 

s3;

} # end s3calc function

 

###################################################

#tcalc function

#Calculates the value of sufficient statistic t for a given vector zvec

 

#Input: zvec, a vector of observed counts for each cluster

# x, a vector of predictor values for each cluster

#Output: t, teh sufficient statsitic for the parameter of interest

###################################################

tcalc=function(zvec,x){

t = sum(zvec*x);

 

t;

}

###################################################

#admis function

#Initializes the admissibility test recursion

#Matrix of admissible vectors starts as observed values

#Final output is a matrix whose columns are z-vectors that give the same

sufficient statstics as those observed

 

#Input: nvec, vector of observations per cluster

# zvec, the vector of observed z-values

#Output: zmatfinal, a matrix whose columns represent admissible vectors

###################################################

admis=function(nvec,zvec){

#N is the number of clusters

N = length(nvec);

 

#s1 and s2 are the observed values of the sufficient statistics

s1 = s1calc(zvec);

s2 = s2calc(zvec,nvec);

 

#begin with first cluster

k = 1;

n1 = nvec[1];

 

#initially zmat is the observed vector of values

zmat = zvec;

 

#run through all possible numbers of successes in first cluster

for(z in 0:n1)

{

znew=as.vector(z);

 

if(N==1) #check case of one cluster

{

s1current = s1calc(znew);

s2current = s2calc(znew,nvec);

 

if(s1current==s1 && s2current==s2)

{

zmat = cbind(zmat,znew);

}

 

} #end one cluster check

 

else #more than one cluster

{

#apply recursive function to find full admissible vectors

zmat = admisrec(nvec,zmat,znew,s1,s2);

} #zmat will have all admissible vectors beginning with znew added

as columns

 

} #end for

 

#zmat will have all admissable vectors for all values of z added as

columns

#observed z-vector will occur twice due to starting the process with

this vector

#zmatfinal has this first vector removed

ncols=dim(zmat)[2];

zmatfinal=zmat[,2:ncols];

 

zmatfinal

 

} #end admis function

 

###################################################

#admisrec function

#Recursive function runs through all possible values of z, finds admissible

vectors

#Takes initialized zmat from admisinit and returns zmatfinal with all

admissible vectors

 

#Input: nvec, vector of observations per cluster

# zmat, the vector of observed z-values

# zvec, a (possibly incomplete) vector of z-values

# s1,s2, observed values of sufficient statistics

#Output: zmatfinal, a matrix whose columns represent admissible vectors

###################################################

admisrec=function(nvec,zmat,zvec,s1,s2){

N = length(nvec);

 

#k is the current cluster to be analyzed

k = length(zvec) + 1;

ni = nvec[k];

 

zmatfinal = zmat; #will be updated throughout function

 

#run through all possible numbers of successes in cluster i

for(z in 0:ni)

{

#add new z-value to vector

zvecnew = rbind(zvec,z);

 

#if this is not a full vector of potential observations, call again

if(length(zvecnew) < N)

{

zmatfinal = admisrec(nvec,zmatfinal,zvecnew,s1,s2);

}

 

else #length=N, zvecnew is a full vector of potential observations

{

s1current = s1calc(zvecnew);

s2current = s2calc(zvecnew,nvec);

 

if(s1current==s1 && s2current==s2)

{

zmatfinal = cbind(zmatfinal,zvecnew);

}

} #end else

#zmatfinal contains all admissable vectors with z in component i

} #end for

 

#zmatfinal contains all admissable vectors for all values of z in component i

zmatfinal

 

} #end admisrec function

 

###################################################

#extremez function

#Calculates all admissible z-vectors with corresponding t-statistics at

least as large as

# the observed value of t

 

#Input: zadmis, the matrix of admissible vectors

# nvec, the vector of number of observations per cluster

# z, the observed vector of z-values

# x, the predictor values

#Output: zextremefinal, matrix of desired z-vectors

###################################################

extremez=function(zadmis,nvec,z,x){

ncols = dim(zadmis)[2];

tobs = tcalc(z,x);

 

#initialize answer to include observed values

zextreme = z;

 

for(i in 1:ncols) #run through all admissible vectors

{

zcurrent = zadmis[,i];

tcurrent = tcalc(zcurrent,x);

 

if(tcurrent >= tobs)

{

zextreme = cbind(zextreme,zcurrent);

}

} #end for

 

#remove first column; observed z-values are recorded twice

zextremefinal = zextreme[,2:dim(zextreme)[2]];

 

zextremefinal;

} #end extremez function

 

###################################################

#comprod function

#Calculates product of combinations used in conditional probability

distribution

 

#Input: nvec, vector of observations per cluster

# zvec, a vector of counts for each cluster

#Output: prod, the desired product

###################################################

comprod=function(nvec,zvec){

prod=1;

N = length(nvec);

 

for(i in 1:N)

{

prod = prod*(choose(nvec[i],zvec[i]));

}

 

prod;

} #end comprod function

 

###################################################

#pcalc function

#Calculates the p-value for test of beta=0 vs beta>0

#Input: zadmis, matrix of admissible z-values

# nvec, number of observations for each cluster

# z, counts for each cluster

# x, observed predictor values for each cluster

#Output: p, p-value for the hypothesis test above

###################################################

pcalc=function(zadmis,nvec,z,x){

#calculate denominator

denom=0;

nadmis = dim(zadmis)[2];

 

for(i in 1:nadmis)

{

denom = denom + comprod(nvec, zadmis[,i]);

}

 

#calculate numberator

zextreme = extremez(zadmis,nvec,z,x);

num=0;

nextreme = dim(zextreme)[2];

 

for(i in 1:nextreme)

{

num = num + comprod(nvec,zextreme[,i]);

}

 

p = num/denom;

 

p;

} #end pcalc function

 

###################################################

#admis2 function

#

########### For two-level problem #################

#

#Initializes the admissibility test recursion

#Matrix of admissible vectors starts as observed values

#Final output is a matrix whose columns are z-vectors that give the

same sufficient statistics as those observed

 

#Input: nvec, vector of observations per cluster

# zvec, the vector of observed z-values

# jvec, number of inner clusters in each outer cluster

#Output: zmatfinal, a matrix whose columns represent admissible vectors

###################################################

admis2=function(nvec,zvec,jvec){

#N is the number of inner clusters

N = length(nvec);

 

#s1, s2, and s3 are the observed values of the sufficient statistics

s1 = s1calc(zvec);

s2 = s2calc(zvec,nvec);

s3 = s3calc(zvec,nvec,jvec);

 

#begin with first cluster

k = 1;

n1 = nvec[1];

 

#initially zmat is the observed vector of values

zmat = zvec;

 

#run through all possible numbers of successes in first cluster

for(z in 0:n1)

{

znew=as.vector(z);

 

if(N==1) #check case of one cluster

{

s1current = s1calc(znew);

s2current = s2calc(znew,nvec);

s3current = s3calc(znew,nvec,jvec);

 

if(s1current==s1 && s2current==s2 && s3current==s3)

{

zmat = cbind(zmat,znew);

}

 

} #end one cluster check

 

else #more than one cluster

{

#apply recursive function to find full admissible vectors

zmat = admisrec2(nvec,zmat,znew,jvec,s1,s2,s3);

} #zmat will have all admissible vectors beginning with znew added

as columns

 

} #end for

 

#zmat will have all admissible vectors for all values of z added

as columns

#observed z-vector will occur twice due to starting the process

with this vector

#zmatfinal has this first vector removed

ncols=dim(zmat)[2];

zmatfinal=zmat[,2:ncols];

 

zmatfinal

 

} #end admis2 function

 

###################################################

#admisrec2 function

#

########### For two-level problem #################

#

#Recursive function runs through all possible values of z, finds

admissible vectors

#Takes initialized zmat from admisinit and returns zmatfinal with

all admissible vectors

 

#Input: nvec, vector of observations per cluster

# zmat, the vector of observed z-values

# zvec, a (possibly incomplete) vector of z-values

# jvec, number of inner clusters in each outer cluster

# s1,s2,s3, observed values of sufficient statistics

#Output: zmatfinal, a matrix whose columns represent admissible vectors

###################################################

admisrec2=function(nvec,zmat,zvec,jvec,s1,s2,s3){

N = length(nvec);

 

#k is the current cluster to be analyzed

k = length(zvec) + 1;

ni = nvec[k];

 

zmatfinal = zmat; #will be updated throughout function

 

#run through all possible numbers of successes in cluster i

for(z in 0:ni)

{

#add new z-value to vector

zvecnew = rbind(zvec,z);

 

#if this is not a full vector of potential observations, call again

if(length(zvecnew) < N)

{

zmatfinal = admisrec2(nvec,zmatfinal,zvecnew,jvec,s1,s2,s3);

}

 

else #length=N, zvecnew is a full vector of potential observations

{

s1current = s1calc(zvecnew);

s2current = s2calc(zvecnew,nvec);

s3current = s3calc(zvecnew,nvec,jvec);

 

if(s1current==s1 && s2current==s2 && s3current==s3)

{

zmatfinal = cbind(zmatfinal,zvecnew);

}

} #end else

#zmatfinal contains all admissable vectors with z in component i

} #end for

 

#zmatfinal contains all admissable vectors for all values of z in component i

zmatfinal

 

} #end admisrec2 function