# Additive vs. dominance two-allele genetic model by DIC

August 7, 2009
By

(This article was first published on Gregor Gorjanc (gg), and kindly contributed to R-bloggers)

Two-allele model has alleles A1 and A2 and genotypes A1A1, A1A2, and A2A2. This model can be fitted using only additive effect (restricted model) or additve+dominance effect. (full model) The later model has one parameter more than the former. I was using this model lately and got weird DIC results – the full model had less number of parameters. Bellow is a simulation (using R and BUGS), that shows expected DIC results.
`## Needed packages and functionslibrary(package="e1071")library(package="doBy")library(package="R2WinBUGS")rvalue <- function(n, p, value, mean=0, sdE=0){  if(!is.matrix(value)) value <- as.matrix(value)   ## Frequencies  k <- length(p)  q <- 1 - p  P <- p * p  Q <- q * q  H <- 2 * p * q  ## Allocate matrix of arbitrary values by loci  x <- matrix(data=1, nrow=n, ncol=k)  y <- matrix(data=1, nrow=n, ncol=k)  ## Simulate arbitrary values  for(i in 1:k) {    x[, i] <- rdiscrete(n=n, probs=c(P[i], H[i], Q[i]))    y[, i] <- value[x[, i], i]  }  x <- x - 1 ## convert 1:3 to 0:2, i.e., number of A1 alleles  ## Sum all values  y <- rowSums(y)  ## Add mean and normal deviate (error)  y <- y + rnorm(n=n, mean=mean, sd=sdE)  return(as.data.frame(cbind(y, x)))}prepairData <- function(x, dominance=FALSE){  ret <- list()  ret\$n <- nrow(x)  ret\$y <- x\$y  if(!dominance) {    ret\$x <- x\$V2 - 1 ## scale to the mean  } else {    ret\$x <- x\$V2 + 1 ## BUGS does not allow zero indexing!  }  ret}### SIMULATE DATA###-----------------------------------------------------------------------## --- Purely additive ---set.seed(seed=19791123)## Model parametersn <- 1000mu <- 100sigma <- 1value <- c(-1, 0, 1)p <- 0.5## SimulatepodatkiA <- rvalue(n=n, p=p, value=value, mean=mu, sdE=sigma)par(mfrow=c(2, 1))hist(podatkiA\$y, xlab="Phenotype")plot(podatkiA\$y ~ jitter(podatkiA\$V2), xlab="Genotype", ylab="Phenotype")tmpA1 <- lm(podatkiA\$y ~ podatkiA\$V2)abline(coef(tmpA1), lwd="2", col="blue")tmpA2 <- summaryBy(y ~ V2, data=podatkiA)points(tmpA2\$y.mean ~ tmpA2\$V2, pch=19, col="red")## --- Additive + Dominance ---set.seed(seed=19791123)## Model parametersn <- 1000mu <- 100sigma <- 1value <- c(-1, 1, -1)p <- 0.5## SimulatepodatkiD <- rvalue(n=n, p=p, value=value, mean=mu, sdE=sigma)par(mfrow=c(2, 1))hist(podatkiD\$y, xlab="Phenotype")plot(podatkiD\$y ~ jitter(podatkiD\$V2), xlab="Genotype", ylab="Phenotype")tmpD1 <- lm(podatkiD\$y ~ podatkiD\$V2)abline(coef(tmpD1), lwd="2", col="blue")tmpD2 <- summaryBy(y ~ V2, data=podatkiD)points(tmpD2\$y.mean ~ tmpD2\$V2, pch=19, col="red")### ESTIMATE MODEL PARAMETERS AND DIC###-----------------------------------------------------------------------modelA <- function(){  ## --- Additive model (regression on gene content) ---  ## Phenotypes  for(i in 1:n) {    y[i] ~ dnorm(mu[i], tau2)    mu[i] <- a + b * x[i]  }  ## Location prior  a ~ dnorm(0, 1.0E-6)    b ~ dnorm(0, 1.0E-6)  ## Variance prior  lsigma ~ dunif(-10, 10) ## sigma between 4.539993e-05 and 2.202647e+04  tau2 <- pow(sigma, -2)  sigma <- exp(lsigma)}modelD <- function(){  ## --- Additive + Dominance model (genotype as a factor) ---  ## Phenotypes  for(i in 1:n) {    y[i] ~ dnorm(mu[i], tau2)    mu[i] <- g[x[i]]  }  ## Location prior  g[1] ~ dnorm(0, 1.0E-6)  g[2] ~ dnorm(0, 1.0E-6)  g[3] ~ dnorm(0, 1.0E-6)    ## Variance prior  lsigma ~ dunif(-10, 10) ## sigma between 4.539993e-05 and 2.202647e+04  tau2 <- pow(sigma, -2)  sigma <- exp(lsigma)}tmpAA <- prepairData(x=podatkiA)tmpAD <- prepairData(x=podatkiA, dominance=TRUE)tmpDA <- prepairData(x=podatkiD)tmpDD <- prepairData(x=podatkiD, dominance=TRUE)initsA <- function() list(a=rnorm(n=1, mean=mu), b=rnorm(n=1), lsigma=0)initsD <- function() list(g=rnorm(n=3, mean=mu),               lsigma=0)(fitAA <- bugs(model=modelA, data=tmpAA, inits=initsA,               n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,               bugs.seed=19791123, parameters=c("a", "b", "sigma"),               bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))##            mean  sd   2.5%    25%  50%  75%  97.5% Rhat n.eff## a          99.9 0.0   99.9   99.9  100  100  100.0    1  1500## b           1.0 0.0    0.9    1.0    1    1    1.1    1  1500## sigma       1.0 0.0    1.0    1.0    1    1    1.0    1   400## deviance 2828.3 2.4 2826.0 2826.0 2828 2829 2834.0    1  1500## pD = 2.9 and DIC = 2831.2(fitAD <- bugs(model=modelD, data=tmpAD, inits=initsD,               n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,               bugs.seed=19791123, parameters=c("g", "sigma"),               bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))##            mean  sd   2.5%    25%    50%  75%  97.5% Rhat n.eff## g[1]       98.9 0.1   98.8   98.9   98.9   99   99.1    1   660## g[2]       99.9 0.0   99.8   99.9   99.9  100  100.0    1  1500## g[3]      101.0 0.1  100.8  100.9  101.0  101  101.1    1  1500## sigma       1.0 0.0    1.0    1.0    1.0    1    1.0    1  1300## deviance 2829.2 2.9 2826.0 2827.0 2829.0 2831 2837.0    1  1500## pD = 4.0 and DIC = 2833.2## pD seem OK and DIC agrees with simulated data - it is worse## for AD model for data following additive model(fitDA <- bugs(model=modelA, data=tmpDA, inits=initsA,               n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,               bugs.seed=19791123, parameters=c("a", "b", "sigma"),               bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))##            mean  sd   2.5%    25%    50%    75%  97.5% Rhat n.eff## a         100.0 0.0   99.9  100.0  100.0  100.0  100.1    1   570## b           0.0 0.1   -0.1    0.0    0.0    0.1    0.2    1  1500## sigma       1.4 0.0    1.3    1.4    1.4    1.4    1.5    1  1300## deviance 3512.9 2.6 3510.0 3511.0 3512.0 3514.0 3520.0    1  1500## pD = 3.0 and DIC = 3515.9(fitDD <- bugs(model=modelD, data=tmpDD, inits=initsD,               n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,               bugs.seed=19791123, parameters=c("g", "sigma"),               bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))##            mean  sd   2.5%    25%    50%  75%  97.5% Rhat n.eff## g[1]       98.9 0.1   98.8   98.9   99.0   99   99.1    1  1500## g[2]      100.9 0.1  100.8  100.9  100.9  101  101.0    1  1500## g[3]       99.0 0.1   98.8   98.9   99.0   99   99.1    1  1500## sigma       1.0 0.0    1.0    1.0    1.0    1    1.0    1   530## deviance 2829.2 2.9 2826.0 2827.0 2829.0 2831 2837.0    1  1500## pD = 4.1 and DIC = 2833.3## pD seems OK and DIC also agrees with the model --> dominance## model is better for this dataset`

R-bloggers.com offers daily e-mail updates about R news and tutorials on topics such as: Data science, Big Data, R jobs, visualization (ggplot2, Boxplots, maps, animation), programming (RStudio, Sweave, LaTeX, SQL, Eclipse, git, hadoop, Web Scraping) statistics (regression, PCA, time series, trading) and more...