R Program
#R functions for exact clustered logistic regression
###################################################
#s1calc function
#Calculates the value of sufficient statistic s1 for a given vector zvec
#Input: zvec, vector of observed z-values (sum of successses in cluster)
#Output: s1, real number
###################################################
s1calc=function(zvec){
s1 = sum(zvec);
s1;
} #end s1calc
###################################################
#s2calc function
#Calculates the value of the sufficient statistic s2 for given vectors
zvec and nvec
#Input: zvec, vector of observed z-values (sum of successes in cluster)
# nvec, number of observations per cluster
#Output: s2, real number
###################################################
s2calc=function(zvec, nvec){
s2 = sum(zvec*(nvec-zvec));
s2;
} #end s2calc
###################################################
#s3calc function
#Calculates the value of the sufficient statistic s3 for given vectors
zvec, nvec and jvec
#Input: zvec, vector of observed z-values (sum of successes in inner
cluster)
# nvec, number of observations per inner cluster
# jvec, number of inner clusters per outer cluster
#Output: s3, real number
###################################################
s3calc=function(zvec,nvec,jvec){
k = length(jvec); #number of outer clusters
s3 = 0;
count = 1;
for(i in 1:k) #run through all inner
{
nsum = 0;
zsum = 0;
for(j in count:(count+jvec[i]-1))
{
nsum = nsum + nvec[j];
zsum = zsum + zvec[j];
}
s3i = 0;
for(j in count:(count+jvec[i]-1))
{
s3i = s3i + zvec[j]*(nsum - zsum);
}
s3 = s3 + s3i;
count = count + jvec[i];
} #end for
s3;
} # end s3calc function
###################################################
#tcalc function
#Calculates the value of sufficient statistic t for a given vector zvec
#Input: zvec, a vector of observed counts for each cluster
# x, a vector of predictor values for each cluster
#Output: t, teh sufficient statsitic for the parameter of interest
###################################################
tcalc=function(zvec,x){
t = sum(zvec*x);
t;
}
###################################################
#admis function
#Initializes the admissibility test recursion
#Matrix of admissible vectors starts as observed values
#Final output is a matrix whose columns are z-vectors that give the same
sufficient statstics as those observed
#Input: nvec, vector of observations per cluster
# zvec, the vector of observed z-values
#Output: zmatfinal, a matrix whose columns represent admissible vectors
###################################################
admis=function(nvec,zvec){
#N is the number of clusters
N = length(nvec);
#s1 and s2 are the observed values of the sufficient statistics
s1 = s1calc(zvec);
s2 = s2calc(zvec,nvec);
#begin with first cluster
k = 1;
n1 = nvec[1];
#initially zmat is the observed vector of values
zmat = zvec;
#run through all possible numbers of successes in first cluster
for(z in 0:n1)
{
znew=as.vector(z);
if(N==1) #check case of one cluster
{
s1current = s1calc(znew);
s2current = s2calc(znew,nvec);
if(s1current==s1 && s2current==s2)
{
zmat = cbind(zmat,znew);
}
} #end one cluster check
else #more than one cluster
{
#apply recursive function to find full admissible vectors
zmat = admisrec(nvec,zmat,znew,s1,s2);
} #zmat will have all admissible vectors beginning with znew added
as columns
} #end for
#zmat will have all admissable vectors for all values of z added as
columns
#observed z-vector will occur twice due to starting the process with
this vector
#zmatfinal has this first vector removed
ncols=dim(zmat)[2];
zmatfinal=zmat[,2:ncols];
zmatfinal
} #end admis function
###################################################
#admisrec function
#Recursive function runs through all possible values of z, finds admissible
vectors
#Takes initialized zmat from admisinit and returns zmatfinal with all
admissible vectors
#Input: nvec, vector of observations per cluster
# zmat, the vector of observed z-values
# zvec, a (possibly incomplete) vector of z-values
# s1,s2, observed values of sufficient statistics
#Output: zmatfinal, a matrix whose columns represent admissible vectors
###################################################
admisrec=function(nvec,zmat,zvec,s1,s2){
N = length(nvec);
#k is the current cluster to be analyzed
k = length(zvec) + 1;
ni = nvec[k];
zmatfinal = zmat; #will be updated throughout function
#run through all possible numbers of successes in cluster i
for(z in 0:ni)
{
#add new z-value to vector
zvecnew = rbind(zvec,z);
#if this is not a full vector of potential observations, call again
if(length(zvecnew) < N)
{
zmatfinal = admisrec(nvec,zmatfinal,zvecnew,s1,s2);
}
else #length=N, zvecnew is a full vector of potential observations
{
s1current = s1calc(zvecnew);
s2current = s2calc(zvecnew,nvec);
if(s1current==s1 && s2current==s2)
{
zmatfinal = cbind(zmatfinal,zvecnew);
}
} #end else
#zmatfinal contains all admissable vectors with z in component i
} #end for
#zmatfinal contains all admissable vectors for all values of z in component i
zmatfinal
} #end admisrec function
###################################################
#extremez function
#Calculates all admissible z-vectors with corresponding t-statistics at
least as large as
# the observed value of t
#Input: zadmis, the matrix of admissible vectors
# nvec, the vector of number of observations per cluster
# z, the observed vector of z-values
# x, the predictor values
#Output: zextremefinal, matrix of desired z-vectors
###################################################
extremez=function(zadmis,nvec,z,x){
ncols = dim(zadmis)[2];
tobs = tcalc(z,x);
#initialize answer to include observed values
zextreme = z;
for(i in 1:ncols) #run through all admissible vectors
{
zcurrent = zadmis[,i];
tcurrent = tcalc(zcurrent,x);
if(tcurrent >= tobs)
{
zextreme = cbind(zextreme,zcurrent);
}
} #end for
#remove first column; observed z-values are recorded twice
zextremefinal = zextreme[,2:dim(zextreme)[2]];
zextremefinal;
} #end extremez function
###################################################
#comprod function
#Calculates product of combinations used in conditional probability
distribution
#Input: nvec, vector of observations per cluster
# zvec, a vector of counts for each cluster
#Output: prod, the desired product
###################################################
comprod=function(nvec,zvec){
prod=1;
N = length(nvec);
for(i in 1:N)
{
prod = prod*(choose(nvec[i],zvec[i]));
}
prod;
} #end comprod function
###################################################
#pcalc function
#Calculates the p-value for test of beta=0 vs beta>0
#Input: zadmis, matrix of admissible z-values
# nvec, number of observations for each cluster
# z, counts for each cluster
# x, observed predictor values for each cluster
#Output: p, p-value for the hypothesis test above
###################################################
pcalc=function(zadmis,nvec,z,x){
#calculate denominator
denom=0;
nadmis = dim(zadmis)[2];
for(i in 1:nadmis)
{
denom = denom + comprod(nvec, zadmis[,i]);
}
#calculate numberator
zextreme = extremez(zadmis,nvec,z,x);
num=0;
nextreme = dim(zextreme)[2];
for(i in 1:nextreme)
{
num = num + comprod(nvec,zextreme[,i]);
}
p = num/denom;
p;
} #end pcalc function
###################################################
#admis2 function
#
########### For two-level problem #################
#
#Initializes the admissibility test recursion
#Matrix of admissible vectors starts as observed values
#Final output is a matrix whose columns are z-vectors that give the
same sufficient statistics as those observed
#Input: nvec, vector of observations per cluster
# zvec, the vector of observed z-values
# jvec, number of inner clusters in each outer cluster
#Output: zmatfinal, a matrix whose columns represent admissible vectors
###################################################
admis2=function(nvec,zvec,jvec){
#N is the number of inner clusters
N = length(nvec);
#s1, s2, and s3 are the observed values of the sufficient statistics
s1 = s1calc(zvec);
s2 = s2calc(zvec,nvec);
s3 = s3calc(zvec,nvec,jvec);
#begin with first cluster
k = 1;
n1 = nvec[1];
#initially zmat is the observed vector of values
zmat = zvec;
#run through all possible numbers of successes in first cluster
for(z in 0:n1)
{
znew=as.vector(z);
if(N==1) #check case of one cluster
{
s1current = s1calc(znew);
s2current = s2calc(znew,nvec);
s3current = s3calc(znew,nvec,jvec);
if(s1current==s1 && s2current==s2 && s3current==s3)
{
zmat = cbind(zmat,znew);
}
} #end one cluster check
else #more than one cluster
{
#apply recursive function to find full admissible vectors
zmat = admisrec2(nvec,zmat,znew,jvec,s1,s2,s3);
} #zmat will have all admissible vectors beginning with znew added
as columns
} #end for
#zmat will have all admissible vectors for all values of z added
as columns
#observed z-vector will occur twice due to starting the process
with this vector
#zmatfinal has this first vector removed
ncols=dim(zmat)[2];
zmatfinal=zmat[,2:ncols];
zmatfinal
} #end admis2 function
###################################################
#admisrec2 function
#
########### For two-level problem #################
#
#Recursive function runs through all possible values of z, finds
admissible vectors
#Takes initialized zmat from admisinit and returns zmatfinal with
all admissible vectors
#Input: nvec, vector of observations per cluster
# zmat, the vector of observed z-values
# zvec, a (possibly incomplete) vector of z-values
# jvec, number of inner clusters in each outer cluster
# s1,s2,s3, observed values of sufficient statistics
#Output: zmatfinal, a matrix whose columns represent admissible vectors
###################################################
admisrec2=function(nvec,zmat,zvec,jvec,s1,s2,s3){
N = length(nvec);
#k is the current cluster to be analyzed
k = length(zvec) + 1;
ni = nvec[k];
zmatfinal = zmat; #will be updated throughout function
#run through all possible numbers of successes in cluster i
for(z in 0:ni)
{
#add new z-value to vector
zvecnew = rbind(zvec,z);
#if this is not a full vector of potential observations, call again
if(length(zvecnew) < N)
{
zmatfinal = admisrec2(nvec,zmatfinal,zvecnew,jvec,s1,s2,s3);
}
else #length=N, zvecnew is a full vector of potential observations
{
s1current = s1calc(zvecnew);
s2current = s2calc(zvecnew,nvec);
s3current = s3calc(zvecnew,nvec,jvec);
if(s1current==s1 && s2current==s2 && s3current==s3)
{
zmatfinal = cbind(zmatfinal,zvecnew);
}
} #end else
#zmatfinal contains all admissable vectors with z in component i
} #end for
#zmatfinal contains all admissable vectors for all values of z in component i
zmatfinal
} #end admisrec2 function