My implementation of Berry and Berry’s hierarchical Bayes algorithm for adverse events

November 20, 2009
By

[This article was first published on Realizations in Biostatistics, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

I’ve been working on this for quite some time (see here for a little background), so I’m pleased that it looks close to done at least as far as the core algorithm. It uses global variables for now, and I’m sure there are a couple of other bugs lurking, but here it is, after the jump.

`const.sqrt2pi <- sqrt(2*3.14159265358979)logit <- function(x) return(x/(1-x))thetacond.log <- function(b,j,thet=theta[b,j]) {    res <- y[b,j]*(thet+gamm[b,j])-nt*log(1+exp(thet+gamm[b,j]))+        log(pi[b]*(thet==0) +(1-pi[b])/            (const.sqrt2pi*sigma.theta[b])*exp(-0.5*(thet-mu.theta[b])^2/sigma.theta[b]^2))    return(res)}thetabj.update <- function() {    # this is a mixture MH step, as described in Berry and Berry (2004)    for (b in 1:nbodysys) {        for (j in 1:nae[b]) {                                        # simulate new candidate - point mass at 0 and normal mixture            if (runif(1)<0.5) {                thetabj.cand <- 0            } else {                thetabj.cand <- rnorm(1,mean=theta[b,j],sd=sigma.mh.theta)            }            #cat("b=",b,", j=",j,sep="")            ccond.ratio <- exp(thetacond.log(b,j,thet=thetabj.cand)-thetacond.log(b,j))            if (theta[b,j]==0 && thetabj.cand==0 && runif(1)                theta[b,j]<<-thetabj.cand                #cat("cond=1, acceptance probability=",ccond.ratio,'\n',sep="")            }            else if (thetabj.cand==0 && theta[b,j]!=0 && runif(1)<                     ccond.ratio*exp(-0.5*theta[b,j]^2/sigma.mh.theta^2)/sigma.mh.theta/const.sqrt2pi)            {                theta[b,j]<<-thetabj.cand                #cat("cond=2, acc prob=",ccond.ratio*exp(-0.5*theta[b,j]^2/sigma.mh.theta^2)/sigma.mh.theta/const.sqrt2pi,'\n',sep="")            }            else if (thetabj.cand!=0 && theta[b,j]==0 &&                     runif(1)            {                theta[b,j]<<-thetabj.cand                #cat("cond=3, acc prob=",ccond.ratio/exp(-0.5*theta[b,j]^2/sigma.mh.theta^2)*sigma.mh.theta*const.sqrt2pi,'\n',sep="")            }            else if (thetabj.cand!=0 && theta[b,j]!=0 && runif(1)            {                theta[b,j]<<-thetabj.cand                #cat("cond=4, acceptance probability=",ccond.ratio,'\n',sep="")            }            #else cat('\n')        }    }}gammabj.cond.log <- function(b,j,g=gamm[b,j]) {    tmp1 <- exp(theta[b,j]+g)    tmp2 <- exp(g)    return(y[b,j]*(theta[b,j]+g)-nt*log(1+tmp1)+x[b,j]*g-nc*log(1+tmp2)           -0.5/sigma.gamma^2*(g-mu.gamma[b])^2)}gammabj.update <- function() {    for (b in 1:nbodysys) {        for (j in 1:nae[b]) {            cand <- rnorm(1,mean=gamm[b,j],sd=sigma.mh.gamma)            acc.prob <- exp(gammabj.cond.log(b,j,g=cand)-gammabj.cond.log(b,j))                                        #if (acc.prob>1) then acc.prob<-1 # not sure we need this for program            if (runif(1)<<-cand        }    }}pib.update <- function() {    for (b in 1:nbodysys) {        pi[b] <<- rbeta(1,alpha.pi+sum(theta[b,1:nae[b]]==0,na.rm=T),beta.pi+nae[b]-sum(theta[b,1:nae[b]]==0,na.rm=T))    }}sigma.theta.update <- function() {    for (b in 1:nbodysys) {        sigma2.rand <- rgamma(1,nae[b]/2+alpha.theta,                              scale=1/beta.theta+0.5*sum((theta[b,1:nae[b]]-mu.theta[b])^2,na.rm=T)) # fixed        sigma.theta[b] <<- 1/sqrt(sigma2.rand)    }}sigma.gamma.update <- function(){    tmp<-0    for (b in 1:nbodysys) {        tmp <- tmp+sum((gamm[b,1:nae[b]]-mu.gamma[b])^2,na.rm=T)    }    sigma2.rand <- rgamma(1,0.5*sum(nae)+alpha.sigma.gamma,scale=1/beta.sigma.gamma+                          0.5*tmp) #fixed    sigma.gamma <<- 1/sqrt(sigma.gamma)}tau.gamma0.update <- function(){    tau2.gamma0 <- rgamma(1,0.5*nbodysys+alpha.tau.gamma,scale=1/beta.tau.gamma+0.5*                          sum((mu.gamma-mu.gamma0)^2)) #fixed    tau.gamma0 <<- 1/sqrt(tau2.gamma0)}tau.theta0.update <- function(){    tau2.theta0 <- rgamma(1,0.5*nbodysys+alpha.tau.theta,scale=1/beta.tau.theta+0.5*                          sum((mu.theta-mu.theta0)^2)) #fixed    tau.theta0 <<- 1/sqrt(tau2.theta0)}mu.gamma.update <- function() {    for (b in 1:nbodysys) {        gibbs.mu <- (tau.gamma0^2*sum(gamm[b,1:nae[b]],na.rm=T)+sigma.gamma^2*mu.gamma0)/            (tau.gamma0^2*nae[b]+sigma.gamma^2)        gibbs.sigma2 <- tau.gamma0^2*sigma.gamma^2/(tau.gamma0^2*nae[b]+sigma.gamma^2)        mu.gamma[b] <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2))    }}mu.theta.update <- function() {    for (b in 1:nbodysys) {        gibbs.mu <- (tau.theta0^2*sum(theta[b,1:nae[b]],na.rm=T)+sigma.theta^2*mu.theta0)/            (tau.theta0^2*nae[b]+sigma.theta^2)        gibbs.sigma2 <- tau.theta0^2*sigma.theta^2/(tau.theta0^2*nae[b]+sigma.theta^2)        mu.theta[b] <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2))    }}mu.gamma0.update <- function(){    gibbs.mu <- (tau.gamma00^2*sum(mu.gamma)+tau.gamma0^2*mu.gamma00) /        (tau.gamma00^2*nbodysys+tau.gamma0^2)    gibbs.sigma2 <- tau.gamma00^2*tau.gamma0^2/(tau.gamma00^2*nbodysys +                                                tau.gamma0^2)    mu.gamma0 <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2))}mu.theta0.update <- function(){    gibbs.mu <- (tau.theta00^2*sum(mu.theta)+tau.theta0^2*mu.theta00) /        (tau.theta00^2*nbodysys+tau.theta0^2)    gibbs.sigma2 <- tau.theta00^2*tau.theta0^2/(tau.theta00^2*nbodysys +                                                tau.theta0^2)    mu.theta0 <<- rnorm(1,mean=gibbs.mu,sd=sqrt(gibbs.sigma2))}alpha.pi.logcond <- function(alpha=alpha.pi) {    if (alpha<=1) return(-Inf)    else {        return(nbodysys*(lgamma(alpha+beta.pi)-lgamma(alpha))+               (alpha+1)*sum(log(pi))-alpha*lambda.alpha)    }}alpha.pi.update <- function() {    cand <- rnorm(1,mean=alpha.pi,sd=sigma.mh.theta)    acc.prob <- exp(alpha.pi.logcond(alpha=cand)-alpha.pi.logcond())    if (runif(1)<<-cand}beta.pi.logcond <- function(beta=beta.pi) {    if (beta<=1) return(-Inf)    else {        return(nbodysys*(lgamma(beta+alpha.pi)-lgamma(beta))+               (beta+1)*sum(log(1-pi))-beta*lambda.beta)    }}beta.pi.update <- function() {    cand <- rnorm(1,mean=beta.pi,sd=sigma.mh.theta)    acc.prob <- exp(beta.pi.logcond(beta=cand)-beta.pi.logcond())    if (runif(1)<<-cand}                                        # set up constants as in sec 4mu.theta00<-0tau.theta00 <- sqrt(10)mu.gamma00 <- 0tau.gamma00 <- sqrt(10)alpha.theta <- 3beta.theta <- 1alpha.theta0 <- 3beta.theta0 <- 1alpha.tau.gamma<-3beta.tau.gamma <- 1alpha.tau.theta <- 3beta.tau.theta <- 1alpha.sigma.gamma<-3beta.sigma.gamma <- 1lambda.alpha<-1lambda.beta <- 1                                        # parms for the example in berry and berryae.dat <- read.table(file="ae.dat",header=T)ae.dat\$bodysysf <- factor(ae.dat\$bodysys)nt <- 148nc <- 132nae <- tapply(ae.dat\$aenum,ae.dat\$bodysysf,length)nbodysys <- length(levels(ae.dat\$bodysysf))y <- matrix(NA,nrow=nbodysys,ncol=max(nae))x <- matrix(NA,nrow=nbodysys,ncol=max(nae))for (i in 1:nbodysys) {y[i,1:nae[i]] <- ae.dat\$trtct[ae.dat\$bodysysf==as.numeric(levels(ae.dat\$bodysysf)[i])]x[i,1:nae[i]] <- ae.dat\$contct[ae.dat\$bodysysf==as.numeric(levels(ae.dat\$bodysysf)[i])]}sigma.mh.theta <- 2sigma.mh.gamma <- 2                                        # set up containerstheta <- matrix(NA,nrow=nbodysys,ncol=max(nae))gamm <- matrix(NA,nrow=nbodysys,ncol=max(nae))pi <- rep(NA,length=nbodysys)sigma.theta <- rep(NA,length=nbodysys)sigma.gamma <- NAtau.gamma0 <- NAtau.theta0 <- NAmu.gamma <- rep(NA,length=nbodysys)mu.theta <- rep(NA,length=nbodysys)mu.gamma0 <- NAmu.theta0 <- NAalpha.pi <- NAbeta.pi <- NA                                        # initial valuesgamm <- logit(x/nc)theta <- logit(y/nt)-gammpi <- rep(.5,length=nbodysys)sigma.theta <- rep(1,length=nbodysys)sigma.gamma <- 1tau.gamma0 <- 1tau.theta0 <- 1mu.gamma <- rep(0,length=nbodysys)mu.theta <- rep(0,length=nbodysys)mu.gamma0 <- 0mu.theta0 <- 1alpha.pi <- 1.5beta.pi <- 1.5                                        # chainsn.iter <- 11000n.burnin <- 1000theta.chain <- array(NA,dim=c(nbodysys,max(nae),n.iter))gammabj.chain <- array(NA,dim=c(nbodysys,max(nae),n.iter))for (i in 1:n.iter){    thetabj.update()    theta.chain[,,i] <- theta    gammabj.update()    gammabj.chain[,,i] <- gamm    pib.update()    sigma.theta.update()    sigma.gamma.update()    tau.gamma0.update()    tau.theta0.update()    mu.gamma.update()    mu.theta.update()    mu.gamma0.update()    mu.theta0.update()    alpha.pi.update()    beta.pi.update()}for (b in 1:nbodysys){    for (j in 1:nae[b])    {        if (b==1 && j==1) {            cat(sprintf("%5s%5s%10s%10s\n","b","j","P(th=0)","P(th>0)"))            cat(sprintf("%5s%5s%10s%10s\n","---","---","------","------"))        }        cat(sprintf("%5s%5d%10.3f%10.3f\n",names(nae)[b],j,sum(theta.chain[b,j,(n.burnin+1):n.iter]==0)/sum(!is.na(theta.chain[b,j,(n.burnin+1):n.iter])),sum(theta.chain[b,j,(n.burnin+1):n.iter]>0)/sum(!is.na(theta.chain[b,j,(n.burnin+1):n.iter]))))    }}#mean(theta.chain[1,1,(n.burnin+1):n.iter],na.rm=T)plot(theta.chain[1,1,(n.burnin+1):n.iter],type='b',pch=0)plot(theta.chain[2,4,(n.burnin+1):n.iter],type='b')plot(theta.chain[2,4,1:n.iter],type='b')sum(theta.chain[2,4,])/n.itergammabj.chain[1,1,(n.burnin+1):n.iter]plot(theta.chain[6,2,(n.burnin+1):n.iter],type='b')hist(theta.chain[1,1,(n.burnin+1):n.iter])#acceptance ratefoo <- theta.chain[1,1,(n.burnin+1):n.iter]sum(foo[2:(n.iter-n.burnin)]!=foo[1:(n.iter-n.burnin-1)])/(n.iter-n.burnin+1)`

The datafile ae.dat copies the vaccine data from the article and looks like the following:

`# data for Berry and Berry AE modelbodysys aenum ae trtct contct1 1 astenia/fatigue 57 401 2 fever 34 261 3 infection/fungal 2 01 4 infection/viral 3 11 5 malaise 27 203 1 anorexia 7 23 2 cendisiasis/oral 2 03 3 constipation 2 03 4 diarrhea 24 103 5 castroenteritis 3 13 6 nausea 2 73 7 vomiting 19 195 1 lymphadenopathy 3 26 1 dehydration 0 28 1 crying 2 08 2 insomnia 2 28 3 irritability  75 439 1 bronchitis 4 19 2 congestion/nasal 4 29 3 congestion/respiratory 1 29 4 cough 13 89 5 infection/upper_respiratory 28 209 6 laryngotracheobronchitis 2 19 7 pharyngitis 13 89 8 rhinorrhea 15 149 9 sinusitis 3 19 10 tonsillitis 2 19 11 wheezing 3 110 1 bite/sting 4 010 2 eczema 2 010 3 pruritis 2 110 4 rash 13 310 5 rash/diaper 6 210 6 rash/measles/rubella-like 8 110 7 rash/varicella-like 4 210 8 urticaria 0 210 9 viral/exanthema 1 211 1 conjunctivitis 0 211 2 otitis/media 18 1411 3 otorrhea 2 1`

I make no claims to the correctness of this code, but I have gotten results that are close to the Berry and Berry article.

R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

If you got this far, why not subscribe for updates from the site? Choose your flavor: e-mail, twitter, RSS, or facebook...