My implementation of Berry and Berry’s hierarchical Bayes algorithm for adverse events

November 20, 2009
By

(This article was first published on Realizations in Biostatistics, and kindly contributed to R-bloggers)

I've been working on this for quite some time (see here for a little background), so I'm pleased that it looks close to done at least as far as the core algorithm. It uses global variables for now, and I'm sure there are a couple of other bugs lurking, but here it is, after the jump.



const.sqrt2pi <- sqrt(2*3.14159265358979)
logit <- function(x) return(x/(1-x))
thetacond.log <- function(b,j,thet=theta[b,j]) {
    res <- y[b,j]*(thet+gamm[b,j])-nt*log(1+exp(thet+gamm[b,j]))+
        log(pi[b]*(thet==0) +(1-pi[b])/
            (const.sqrt2pi*sigma.theta[b])*exp(-0.5*(thet-mu.theta[b])^2/sigma.theta[b]^2))
    return(res)
}
thetabj.update <- function() {
    # this is a mixture MH step, as described in Berry and Berry (2004)
    for (b in 1:nbodysys) {
        for (j in 1:nae[b]) {
                                        # simulate new candidate - point mass at 0 and normal mixture
            if (runif(1)<0.5) {
                thetabj.cand <- 0
            } else {
                thetabj.cand <- rnorm(1,mean=theta[b,j],sd=sigma.mh.theta)
            }
            #cat("b=",b,", j=",j,sep="")
            ccond.ratio <- exp(thetacond.log(b,j,thet=thetabj.cand)-thetacond.log(b,j))
            if (theta[b,j]==0 && thetabj.cand==0 && runif(1)
                theta[b,j]<<-thetabj.cand
                #cat("cond=1, acceptance probability=",ccond.ratio,'\n',sep="")
            }
            else if (thetabj.cand==0 && theta[b,j]!=0 && runif(1)<
                     ccond.ratio*exp(-0.5*theta[b,j]^2/sigma.mh.theta^2)/sigma.mh.theta/const.sqrt2pi)
            {
                theta[b,j]<<-thetabj.cand
                #cat("cond=2, acc prob=",ccond.ratio*exp(-0.5*theta[b,j]^2/sigma.mh.theta^2)/sigma.mh.theta/const.sqrt2pi,'\n',sep="")
            }
            else if (thetabj.cand!=0 && theta[b,j]==0 &&
                     runif(1)
            {
                theta[b,j]<<-thetabj.cand
                #cat("cond=3, acc prob=",ccond.ratio/exp(-0.5*theta[b,j]^2/sigma.mh.theta^2)*sigma.mh.theta*const.sqrt2pi,'\n',sep="")
            }
            else if (thetabj.cand!=0 && theta[b,j]!=0 && runif(1)
            {
                theta[b,j]<<-thetabj.cand
                #cat("cond=4, acceptance probability=",ccond.ratio,'\n',sep="")
            }
            #else cat('\n')
        }
    }
}
gammabj.cond.log <- function(b,j,g=gamm[b,j]) {
    tmp1 <- exp(theta[b,j]+g)
    tmp2 <- exp(g)
    return(y[b,j]*(theta[b,j]+g)-nt*log(1+tmp1)+x[b,j]*g-nc*log(1+tmp2)
           -0.5/sigma.gamma^2*(g-mu.gamma[b])^2)
}
gammabj.update <- function() {
    for (b in 1:nbodysys) {
        for (j in 1:nae[b]) {
            cand <- rnorm(1,mean=gamm[b,j],sd=sigma.mh.gamma)
            acc.prob <- exp(gammabj.cond.log(b,j,g=cand)-gammabj.cond.log(b,j))
                                        #if (acc.prob>1) then acc.prob<-1 # not sure we need this for program
            if (runif(1)<<-cand
        }
    }
}
pib.update <- function() {
    for (b in 1:nbodysys) {
        pi[b] <<- rbeta(1,alpha.pi+sum(theta[b,1:nae[b]]==0,na.rm=T),beta.pi+nae[b]-sum(theta[b,1:nae[b]]==0,na.rm=T))
    }
}
sigma.theta.update <- function() {
    for (b in 1:nbodysys) {
        sigma2.rand <- rgamma(1,nae[b]/2+alpha.theta,
                              scale=1/beta.theta+0.5*sum((theta[b,1:nae[b]]-mu.theta[b])^2,na.rm=T)) # fixed
        sigma.theta[b] <<- 1/sqrt(sigma2.rand)
    }
}
sigma.gamma.update <- function()
{
    tmp<-0
    for (b in 1:nbodysys) {
        tmp <- tmp+sum((gamm[b,1:nae[b]]-mu.gamma[b])^2,na.rm=T)
    }
    sigma2.rand <- rgamma(1,0.5*sum(nae)+alpha.sigma.gamma,scale=1/beta.sigma.gamma+
                          0.5*tmp) #fixed
    sigma.gamma <<- 1/sqrt(sigma.gamma)
}
tau.gamma0.update <- function()
{
    tau2.gamma0 <- rgamma(1,0.5*nbodysys+alpha.tau.gamma,scale=1/beta.tau.gamma+0.5*
                          sum((mu.gamma-mu.gamma0)^2)) #fixed
    tau.gamma0 <<- 1/sqrt(tau2.gamma0)
}
tau.theta0.update <- function()
{
    tau2.theta0 <- rgamma(1,0.5*nbodysys+alpha.tau.theta,scale=1/beta.tau.theta+0.5*
                          sum((mu.theta-mu.theta0)^2)) #fixed
    tau.theta0 <<- 1/sqrt(tau2.theta0)
}
mu.gamma.update <- function() {
    for (b in 1:nbodysys) {
        gibbs.mu <- (tau.gamma0^2*sum(gamm[b,1:nae[b]],na.rm=T)+sigma.gamma^2*mu.gamma0)/
            (tau.gamma0^2*nae[b]+sigma.gamma^2)
        gibbs.sigma2 <- tau.gamma0^2*sigma.gamma^2/(tau.gamma0^2*nae[b]+sigma.gamma^2)
        mu.gamma[b] <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2))
    }
}
mu.theta.update <- function() {
    for (b in 1:nbodysys) {
        gibbs.mu <- (tau.theta0^2*sum(theta[b,1:nae[b]],na.rm=T)+sigma.theta^2*mu.theta0)/
            (tau.theta0^2*nae[b]+sigma.theta^2)
        gibbs.sigma2 <- tau.theta0^2*sigma.theta^2/(tau.theta0^2*nae[b]+sigma.theta^2)
        mu.theta[b] <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2))
    }
}
mu.gamma0.update <- function()
{
    gibbs.mu <- (tau.gamma00^2*sum(mu.gamma)+tau.gamma0^2*mu.gamma00) /
        (tau.gamma00^2*nbodysys+tau.gamma0^2)
    gibbs.sigma2 <- tau.gamma00^2*tau.gamma0^2/(tau.gamma00^2*nbodysys +
                                                tau.gamma0^2)
    mu.gamma0 <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2))
}
mu.theta0.update <- function()
{
    gibbs.mu <- (tau.theta00^2*sum(mu.theta)+tau.theta0^2*mu.theta00) /
        (tau.theta00^2*nbodysys+tau.theta0^2)
    gibbs.sigma2 <- tau.theta00^2*tau.theta0^2/(tau.theta00^2*nbodysys +
                                                tau.theta0^2)
    mu.theta0 <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2))
}
alpha.pi.logcond <- function(alpha=alpha.pi) {
    if (alpha<=1) return(-Inf)
    else {
        return(nbodysys*(lgamma(alpha+beta.pi)-lgamma(alpha))+
               (alpha+1)*sum(log(pi))-alpha*lambda.alpha)
    }
}
alpha.pi.update <- function() {
    cand <- rnorm(1,mean=alpha.pi,sd=sigma.mh.theta)
    acc.prob <- exp(alpha.pi.logcond(alpha=cand)-alpha.pi.logcond())
    if (runif(1)<<-cand
}
beta.pi.logcond <- function(beta=beta.pi) {
    if (beta<=1) return(-Inf)
    else {
        return(nbodysys*(lgamma(beta+alpha.pi)-lgamma(beta))+
               (beta+1)*sum(log(1-pi))-beta*lambda.beta)
    }
}
beta.pi.update <- function() {
    cand <- rnorm(1,mean=beta.pi,sd=sigma.mh.theta)
    acc.prob <- exp(beta.pi.logcond(beta=cand)-beta.pi.logcond())
    if (runif(1)<<-cand
}
                                        # set up constants as in sec 4
mu.theta00<-0
tau.theta00 <- sqrt(10)
mu.gamma00 <- 0
tau.gamma00 <- sqrt(10)
alpha.theta <- 3
beta.theta <- 1
alpha.theta0 <- 3
beta.theta0 <- 1
alpha.tau.gamma<-3
beta.tau.gamma <- 1
alpha.tau.theta <- 3
beta.tau.theta <- 1
alpha.sigma.gamma<-3
beta.sigma.gamma <- 1
lambda.alpha<-1
lambda.beta <- 1
                                        # parms for the example in berry and berry
ae.dat <- read.table(file="ae.dat",header=T)
ae.dat$bodysysf <- factor(ae.dat$bodysys)
nt <- 148
nc <- 132
nae <- tapply(ae.dat$aenum,ae.dat$bodysysf,length)
nbodysys <- length(levels(ae.dat$bodysysf))
y <- matrix(NA,nrow=nbodysys,ncol=max(nae))
x <- matrix(NA,nrow=nbodysys,ncol=max(nae))
for (i in 1:nbodysys) {
y[i,1:nae[i]] <- ae.dat$trtct[ae.dat$bodysysf==as.numeric(levels(ae.dat$bodysysf)[i])]
x[i,1:nae[i]] <- ae.dat$contct[ae.dat$bodysysf==as.numeric(levels(ae.dat$bodysysf)[i])]
}
sigma.mh.theta <- 2
sigma.mh.gamma <- 2
                                        # set up containers
theta <- matrix(NA,nrow=nbodysys,ncol=max(nae))
gamm <- matrix(NA,nrow=nbodysys,ncol=max(nae))
pi <- rep(NA,length=nbodysys)
sigma.theta <- rep(NA,length=nbodysys)
sigma.gamma <- NA
tau.gamma0 <- NA
tau.theta0 <- NA
mu.gamma <- rep(NA,length=nbodysys)
mu.theta <- rep(NA,length=nbodysys)
mu.gamma0 <- NA
mu.theta0 <- NA
alpha.pi <- NA
beta.pi <- NA


                                        # initial values
gamm <- logit(x/nc)
theta <- logit(y/nt)-gamm
pi <- rep(.5,length=nbodysys)
sigma.theta <- rep(1,length=nbodysys)
sigma.gamma <- 1
tau.gamma0 <- 1
tau.theta0 <- 1
mu.gamma <- rep(0,length=nbodysys)
mu.theta <- rep(0,length=nbodysys)
mu.gamma0 <- 0
mu.theta0 <- 1
alpha.pi <- 1.5
beta.pi <- 1.5
                                        # chains
n.iter <- 11000
n.burnin <- 1000
theta.chain <- array(NA,dim=c(nbodysys,max(nae),n.iter))
gammabj.chain <- array(NA,dim=c(nbodysys,max(nae),n.iter))
for (i in 1:n.iter)
{
    thetabj.update()
    theta.chain[,,i] <- theta
    gammabj.update()
    gammabj.chain[,,i] <- gamm
    pib.update()
    sigma.theta.update()
    sigma.gamma.update()
    tau.gamma0.update()
    tau.theta0.update()
    mu.gamma.update()
    mu.theta.update()
    mu.gamma0.update()
    mu.theta0.update()
    alpha.pi.update()
    beta.pi.update()
}
for (b in 1:nbodysys)
{
    for (j in 1:nae[b])
    {
        if (b==1 && j==1) {
            cat(sprintf("%5s%5s%10s%10s\n","b","j","P(th=0)","P(th>0)"))
            cat(sprintf("%5s%5s%10s%10s\n","---","---","------","------"))
        }
        cat(sprintf("%5s%5d%10.3f%10.3f\n",names(nae)[b],j,sum(theta.chain[b,j,(n.burnin+1):n.iter]==0)/sum(!is.na(theta.chain[b,j,(n.burnin+1):n.iter])),sum(theta.chain[b,j,(n.burnin+1):n.iter]>0)/sum(!is.na(theta.chain[b,j,(n.burnin+1):n.iter]))))
    }
}
#mean(theta.chain[1,1,(n.burnin+1):n.iter],na.rm=T)
plot(theta.chain[1,1,(n.burnin+1):n.iter],type='b',pch=0)
plot(theta.chain[2,4,(n.burnin+1):n.iter],type='b')
plot(theta.chain[2,4,1:n.iter],type='b')
sum(theta.chain[2,4,])/n.iter
gammabj.chain[1,1,(n.burnin+1):n.iter]
plot(theta.chain[6,2,(n.burnin+1):n.iter],type='b')
hist(theta.chain[1,1,(n.burnin+1):n.iter])
#acceptance rate
foo <- theta.chain[1,1,(n.burnin+1):n.iter]
sum(foo[2:(n.iter-n.burnin)]!=foo[1:(n.iter-n.burnin-1)])/(n.iter-n.burnin+1)

The datafile ae.dat copies the vaccine data from the article and looks like the following:

# data for Berry and Berry AE model
bodysys aenum ae trtct contct
1 1 astenia/fatigue 57 40
1 2 fever 34 26
1 3 infection/fungal 2 0
1 4 infection/viral 3 1
1 5 malaise 27 20
3 1 anorexia 7 2
3 2 cendisiasis/oral 2 0
3 3 constipation 2 0
3 4 diarrhea 24 10
3 5 castroenteritis 3 1
3 6 nausea 2 7
3 7 vomiting 19 19
5 1 lymphadenopathy 3 2
6 1 dehydration 0 2
8 1 crying 2 0
8 2 insomnia 2 2
8 3 irritability 75 43
9 1 bronchitis 4 1
9 2 congestion/nasal 4 2
9 3 congestion/respiratory 1 2
9 4 cough 13 8
9 5 infection/upper_respiratory 28 20
9 6 laryngotracheobronchitis 2 1
9 7 pharyngitis 13 8
9 8 rhinorrhea 15 14
9 9 sinusitis 3 1
9 10 tonsillitis 2 1
9 11 wheezing 3 1
10 1 bite/sting 4 0
10 2 eczema 2 0
10 3 pruritis 2 1
10 4 rash 13 3
10 5 rash/diaper 6 2
10 6 rash/measles/rubella-like 8 1
10 7 rash/varicella-like 4 2
10 8 urticaria 0 2
10 9 viral/exanthema 1 2
11 1 conjunctivitis 0 2
11 2 otitis/media 18 14
11 3 otorrhea 2 1

I make no claims to the correctness of this code, but I have gotten results that are close to the Berry and Berry article.

To leave a comment for the author, please follow the link and comment on his blog: Realizations in Biostatistics.

R-bloggers.com offers daily e-mail updates about R news and tutorials on topics such as: visualization (ggplot2, Boxplots, maps, animation), programming (RStudio, Sweave, LaTeX, SQL, Eclipse, git, hadoop, Web Scraping) statistics (regression, PCA, time series, trading) and more...



If you got this far, why not subscribe for updates from the site? Choose your flavor: e-mail, twitter, RSS, or facebook...

Comments are closed.