**Wiekvoet**, and kindly contributed to R-bloggers)

I wanted to extend my car weight distribution calculation of June 16 from only 2000 to years 2000 to 2013. Unfortunately, come Sunday afternoon the code seemed too slow and not even the beginning of a post. So, I went on to another calculation I was working on; scraping Diesel usage data from the web and examining how it varied. The background of this is the repeated news items one reads, which state that actual fuel usage is much higher than reported. Since I may be in the market for an Opel Corsa Diesel, this was the car of choice.

### Sourcing the data

### Model

### Engines

### Months

### Drivers

### Factory results and driver results

### R code

library(R2jags)

data_list <- list(NDriver=nlevels(usage$Driver),

NBY=nlevels(usage$buildyear),

NEngine=nlevels(usage$engine),

NMonth=nlevels(usage$Month),

N=nrow(usage))

data_list$Driver <- (1:data_list$NDriver)[usage$Driver]

data_list$BY <- (1:data_list$NBY)[usage$buildyear]

data_list$Engine<- (1:data_list$NEngine)[usage$engine]

data_list$Month <- (1:data_list$NMonth)[usage$Month]

data_list$lp100 <- usage$lp100

uu <- unique(data.frame(engine=usage$engine,cDriver=usage$Driver,Driver=data_list$Driver,Engine=data_list$Engine))

data_list$uDriver <- uu$Driver

data_list$uEngine <- uu$Engine

data_list$Driver1 <- uu$Driver[uu$Engine==1]

data_list$Driver2 <- uu$Driver[uu$Engine==2]

data_list$Driver3 <- uu$Driver[uu$Engine==3]

data_list$NDriver1 <- length(data_list$Driver1)

data_list$NDriver2 <- length(data_list$Driver2)

data_list$NDriver3 <- length(data_list$Driver3)

mymod <- function() {

for (i in 1:N) {

fit[i] <- grandmean +

MMonth[Month[i]] +

MEngine[Engine[i]] +

MBY[BY[i]] +

MDriver[Driver[i]]

lp100[i] ~dnorm(fit[i],tauOfDriver[Driver[i]])

}

grandmean ~ dnorm(0,.001)

MMonth[1] <- 0

for (i in 2:NMonth) {

MMonth[i] ~ dnorm(0,tauMonth)

}

tauMonth ~ dgamma(0.001,0.001)

sdMonth <- sqrt(1/tauMonth)

MBY[1] <- 0

for (i in 2:NBY) {

MBY[i] ~ dnorm(0,tauBY)

}

tauBY ~ dgamma(0.001,0.001)

sdBY <- sqrt(1/tauBY)

MEngine[1] <- 0

for (i in 2:NEngine) {

MEngine[i] ~ dnorm(0,tauEngine)

}

tauEngine ~ dgamma(0.001,0.001)

sdEngine <- sqrt(1/tauEngine)

MDriver[1] <- 0

for (i in 2:NDriver) {

MDriver[i] ~ dnorm(0,tauDriver)

}

tauDriver ~ dgamma(0.001,0.001)

sdDriver <- sqrt(1/tauDriver)

for (i in 1:NDriver) {

tauOfDriver[i] <- pow(sdOfDriver[i],-2)

sdOfDriver[i] ~ dlnorm(mu.lsdD,tau.lsdD)

}

mu.lsdD ~ dnorm(0,.0001)

tau.lsdD <- pow(sigma.lsdD,-2)

sigma.lsdD ~ dunif(0,100)

for (i in 1:NDriver) {

meanDriver[i] <- grandmean + MDriver[uDriver[i]] + mean(MMonth[1:NMonth]) + mean(MBY[1:NBY]) + MEngine[uEngine[i]]

}

for (i in 1:NMonth) {

meanMonth[i] <- grandmean + mean(MDriver[1:NDriver]) + MMonth[i] + mean(MBY[1:NBY]) + mean(MEngine[1:NEngine])

}

for (i in 1:NBY) {

meanBY[i] <- grandmean + mean(MDriver[1:NDriver]) + mean(MMonth[1:NMonth]) +MBY[i]+ mean(MEngine[1:NEngine])

}

for ( i in 1:NDriver1) {

MDriver1[i] <- MDriver[Driver1[i]]

}

for ( i in 1:NDriver2) {

MDriver2[i] <- MDriver[Driver2[i]]

}

for ( i in 1:NDriver3) {

MDriver3[i] <- MDriver[Driver3[i]]

}

meanEngine[1] <- grandmean + mean(MDriver1) + mean(MMonth[1:NMonth])+ mean(MBY[1:NBY]) +MEngine[1]

meanEngine[2] <- grandmean + mean(MDriver2) + mean(MMonth[1:NMonth])+ mean(MBY[1:NBY]) +MEngine[2]

meanEngine[3] <- grandmean + mean(MDriver3) + mean(MMonth[1:NMonth])+ mean(MBY[1:NBY]) +MEngine[3]

}

model.file <- file.path(tempdir(),’mijnmodel.txt’)

write.model(mymod,con=model.file)

inits <- function() list(

grandmean = rnorm(1,5,1),

MDriver = c(NA,rnorm(data_list$NDriver-1)) ,

MBY = c(NA,rnorm(data_list$NBY-1)) ,

MEngine = c(NA,rnorm(data_list$NEngine-1)) ,

MMonth = c(NA,rnorm(data_list$NMonth-1)) ,

tauDriver = runif(1,1,3),

tauBY = runif(1,1,3),

tauEngine=runif(1,1,3),

tauMonth=runif(1,1,3),

sdOfDriver = runif(data_list$NDriver,1,3)

)

parameters <- c(‘sdOfDriver’,’meanMonth’,’meanDriver’,’meanBY’,

‘meanEngine’,’sdDriver’,’sdMonth’,’sdEngine’)

jagsfit <- jags(data=data_list,inits=inits,model.file=model.file,

parameters.to.save=parameters,n.chains=4,DIC=FALSE,n.iter=20000,

progress.bar=’none’)

png(‘jagsfit.png’)

plot(jagsfit)

dev.off()

jagsfit.mc <- as.mcmc(jagsfit)

fitsummary <- summary(jagsfit.mc)

stats <- fitsummary$statistics

quants <- fitsummary$quantiles

library(ggplot2)

Engines <- as.data.frame(cbind(stats[grepl(‘meanEngine’,rownames(stats)),],

quants[grepl(‘meanEngine’,rownames(quants)),]))

Engines$type <- levels(usage$engine)

Engines

limits <- aes(ymax = `25%`, ymin=`75%`)

p <- ggplot(Engines, aes(y=Mean, x=type))

dodge <- position_dodge(width=0.9)

png(‘engines.png’)

p + geom_point(position=dodge) +

geom_errorbar(limits, position=dodge, width=0.25) +

coord_flip()

dev.off()

Months <- as.data.frame(cbind(stats[grepl(‘meanMonth’,rownames(stats)),],

quants[grepl(‘meanMonth’,rownames(quants)),]))

Months$Month <- levels(usage$Month)

limits <- aes(ymax = `25%`, ymin=`75%`)

p <- ggplot(Months, aes(y=Mean, x=Month))

dodge <- position_dodge(width=0.9)

png(‘Months.png’)

p + geom_point(position=dodge) +

geom_errorbar(limits, position=dodge, width=0.25) +

coord_flip()

dev.off()

Driver <- as.data.frame(cbind(stats[grepl(‘meanDriver’,rownames(stats)),],

quants[grepl(‘meanDriver’,rownames(quants)),]))

Driver <- cbind(uu,Driver)

limits <- aes(ymax = `25%`, ymin=`75%`)

p <- ggplot(Driver, aes(y=Mean, x=Driver,colour=engine))

dodge <- position_dodge(width=0.9)

png(‘driver.png’)

p + geom_point(position=dodge) +

geom_errorbar(limits, position=dodge, width=0.25) +

coord_flip()

dev.off()

ag <- aggregate(usage$lp100,by=list(Driver=usage$Driver),mean)

min(ag$x)

**leave a comment**for the author, please follow the link and comment on their blog:

**Wiekvoet**.

R-bloggers.com offers

**daily e-mail updates**about R news and tutorials on topics such as: Data science, Big Data, R jobs, visualization (ggplot2, Boxplots, maps, animation), programming (RStudio, Sweave, LaTeX, SQL, Eclipse, git, hadoop, Web Scraping) statistics (regression, PCA, time series, trading) and more...