Additive vs. dominance two-allele genetic model by DIC

August 7, 2009
By

(This article was first published on Gregor Gorjanc (gg), and kindly contributed to R-bloggers)

Two-allele model has alleles A1 and A2 and genotypes A1A1, A1A2, and A2A2. This model can be fitted using only additive effect (restricted model) or additve+dominance effect. (full model) The later model has one parameter more than the former. I was using this model lately and got weird DIC results - the full model had less number of parameters. Bellow is a simulation (using R and BUGS), that shows expected DIC results.
## Needed packages and functions
library(package="e1071")
library(package="doBy")
library(package="R2WinBUGS")

rvalue <- function(n, p, value, mean=0, sdE=0)
{
if(!is.matrix(value)) value <- as.matrix(value)

## Frequencies
k <- length(p)
q <- 1 - p
P <- p * p
Q <- q * q
H <- 2 * p * q

## Allocate matrix of arbitrary values by loci
x <- matrix(data=1, nrow=n, ncol=k)
y <- matrix(data=1, nrow=n, ncol=k)

## Simulate arbitrary values
for(i in 1:k) {
x[, i] <- rdiscrete(n=n, probs=c(P[i], H[i], Q[i]))
y[, i] <- value[x[, i], i]
}
x <- x - 1 ## convert 1:3 to 0:2, i.e., number of A1 alleles

## Sum all values
y <- rowSums(y)

## Add mean and normal deviate (error)
y <- y + rnorm(n=n, mean=mean, sd=sdE)

return(as.data.frame(cbind(y, x)))
}

prepairData <- function(x, dominance=FALSE)
{
ret <- list()
ret$n <- nrow(x)
ret$y <- x$y
if(!dominance) {
ret$x <- x$V2 - 1 ## scale to the mean
} else {
ret$x <- x$V2 + 1 ## BUGS does not allow zero indexing!
}
ret
}

### SIMULATE DATA
###-----------------------------------------------------------------------

## --- Purely additive ---

set.seed(seed=19791123)

## Model parameters
n <- 1000
mu <- <- 100
sigma <- 1
value <- c(-1, 0, 1)
p <- 0.5

## Simulate
podatkiA <- rvalue(n=n, p=p, value=value, mean=mu, sdE=sigma)

par(mfrow=c(2, 1))
hist(podatkiA$y, xlab="Phenotype")

plot(podatkiA$y ~ jitter(podatkiA$V2), xlab="Genotype", ylab="Phenotype")

tmpA1 <- lm(podatkiA$y ~ podatkiA$V2)
abline(coef(tmpA1), lwd="2", col="blue")
tmpA2 <- summaryBy(y ~ V2, data=podatkiA)
points(tmpA2$y.mean ~ tmpA2$V2, pch=19, col="red")

## --- Additive + Dominance ---

set.seed(seed=19791123)

## Model parameters
n <- 1000
mu <- 100
sigma <- 1
value <- c(-1, 1, -1)
p <- 0.5

## Simulate
podatkiD <- rvalue(n=n, p=p, value=value, mean=mu, sdE=sigma)

par(mfrow=c(2, 1))
hist(podatkiD$y, xlab="Phenotype")

plot(podatkiD$y ~ jitter(podatkiD$V2), xlab="Genotype", ylab="Phenotype")

tmpD1 <- lm(podatkiD$y ~ podatkiD$V2)
abline(coef(tmpD1), lwd="2", col="blue")
tmpD2 <- summaryBy(y ~ V2, data=podatkiD)
points(tmpD2$y.mean ~ tmpD2$V2, pch=19, col="red")

### ESTIMATE MODEL PARAMETERS AND DIC
###-----------------------------------------------------------------------

modelA <- function()
{
## --- Additive model (regression on gene content) ---
## Phenotypes
for(i in 1:n) {
y[i] ~ dnorm(mu[i], tau2)
mu[i] <- a + b * x[i]
}
## Location prior
a ~ dnorm(0, 1.0E-6)
b ~ dnorm(0, 1.0E-6)
## Variance prior
lsigma ~ dunif(-10, 10) ## sigma between 4.539993e-05 and 2.202647e+04
tau2 <- pow(sigma, -2)
sigma <- exp(lsigma)
}

modelD <- function()
{
## --- Additive + Dominance model (genotype as a factor) ---
## Phenotypes
for(i in 1:n) {
y[i] ~ dnorm(mu[i], tau2)
mu[i] <- g[x[i]]
}
## Location prior
g[1] ~ dnorm(0, 1.0E-6)
g[2] ~ dnorm(0, 1.0E-6)
g[3] ~ dnorm(0, 1.0E-6)
## Variance prior
lsigma ~ dunif(-10, 10) ## sigma between 4.539993e-05 and 2.202647e+04
tau2 <- pow(sigma, -2)
sigma <- exp(lsigma)
}

tmpAA <- prepairData(x=podatkiA)
tmpAD <- prepairData(x=podatkiA, dominance=TRUE)
tmpDA <- prepairData(x=podatkiD)
tmpDD <- prepairData(x=podatkiD, dominance=TRUE)

initsA <- function() list(a=rnorm(n=1, mean=mu), b=rnorm(n=1), lsigma=0)
initsD <- function() list(g=rnorm(n=3, mean=mu), lsigma=0)

(fitAA <- bugs(model=modelA, data=tmpAA, inits=initsA,
n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,
bugs.seed=19791123, parameters=c("a", "b", "sigma"),
bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))
## mean sd 2.5% 25% 50% 75% 97.5% Rhat n.eff
## a 99.9 0.0 99.9 99.9 100 100 100.0 1 1500
## b 1.0 0.0 0.9 1.0 1 1 1.1 1 1500
## sigma 1.0 0.0 1.0 1.0 1 1 1.0 1 400
## deviance 2828.3 2.4 2826.0 2826.0 2828 2829 2834.0 1 1500
## pD = 2.9 and DIC = 2831.2

(fitAD <- bugs(model=modelD, data=tmpAD, inits=initsD,
n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,
bugs.seed=19791123, parameters=c("g", "sigma"),
bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))
## mean sd 2.5% 25% 50% 75% 97.5% Rhat n.eff
## g[1] 98.9 0.1 98.8 98.9 98.9 99 99.1 1 660
## g[2] 99.9 0.0 99.8 99.9 99.9 100 100.0 1 1500
## g[3] 101.0 0.1 100.8 100.9 101.0 101 101.1 1 1500
## sigma 1.0 0.0 1.0 1.0 1.0 1 1.0 1 1300
## deviance 2829.2 2.9 2826.0 2827.0 2829.0 2831 2837.0 1 1500
## pD = 4.0 and DIC = 2833.2

## pD seem OK and DIC agrees with simulated data - it is worse
## for AD model for data following additive model

(fitDA <- bugs(model=modelA, data=tmpDA, inits=initsA,
n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,
bugs.seed=19791123, parameters=c("a", "b", "sigma"),
bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))
## mean sd 2.5% 25% 50% 75% 97.5% Rhat n.eff
## a 100.0 0.0 99.9 100.0 100.0 100.0 100.1 1 570
## b 0.0 0.1 -0.1 0.0 0.0 0.1 0.2 1 1500
## sigma 1.4 0.0 1.3 1.4 1.4 1.4 1.5 1 1300
## deviance 3512.9 2.6 3510.0 3511.0 3512.0 3514.0 3520.0 1 1500
## pD = 3.0 and DIC = 3515.9

(fitDD <- bugs(model=modelD, data=tmpDD, inits=initsD,
n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,
bugs.seed=19791123, parameters=c("g", "sigma"),
bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))
## mean sd 2.5% 25% 50% 75% 97.5% Rhat n.eff
## g[1] 98.9 0.1 98.8 98.9 99.0 99 99.1 1 1500
## g[2] 100.9 0.1 100.8 100.9 100.9 101 101.0 1 1500
## g[3] 99.0 0.1 98.8 98.9 99.0 99 99.1 1 1500
## sigma 1.0 0.0 1.0 1.0 1.0 1 1.0 1 530
## deviance 2829.2 2.9 2826.0 2827.0 2829.0 2831 2837.0 1 1500
## pD = 4.1 and DIC = 2833.3

## pD seems OK and DIC also agrees with the model --> dominance
## model is better for this dataset

To leave a comment for the author, please follow the link and comment on his blog: Gregor Gorjanc (gg).

R-bloggers.com offers daily e-mail updates about R news and tutorials on topics such as: visualization (ggplot2, Boxplots, maps, animation), programming (RStudio, Sweave, LaTeX, SQL, Eclipse, git, hadoop, Web Scraping) statistics (regression, PCA, time series, trading) and more...



If you got this far, why not subscribe for updates from the site? Choose your flavor: e-mail, twitter, RSS, or facebook...

Comments are closed.