This was supposed to be a post in which General Social Surveys (GSS) data were used to understand a bit more about the causation of differences between states. Thus it was to give additioanl insight than my previous post; Guns are Cool – Differences between states. Unfortunately, that did not work so good, and it ended as a kind of investigation of regions.
The GSS have data by region rather than by state. Hence incidences by region were calculated. After aggregation the following results were obtained. From analysis point of view, New England forms somewhat an outlier. It is so much lower that any model should cater to this feature. Since there are only nine regions, which is quite low to do an analysis, and potentially tens of independent variables, I gave up on linking this incidence to GSS.
StateRegion shootings Population Incidence
5 New England 12 14618806 0.05148008
4 Mountain 30 22881245 0.08222644
8 West North Central 30 20885710 0.09008280
9 West South Central 58 37883604 0.09601666
3 Middle Atlantic 68 41970716 0.10160906
6 Pacific 91 51373178 0.11108997
7 South Atlantic 110 61137198 0.11283843
2 East South Central 36 18716202 0.12062981
1 East North Central 101 46662180 0.13574575
I had been wondering if the week day effect observed before would vary between states. But spreading the data over all states would thin the data too much and make plots unappealing. The nine regions are much better. While the Sunday/Weekend effect is present in all regions, its size differs markedly. Sundays seem particularly bad in West North Central. Three regions, New England, West South Central and Mountain have no shootings on one weekday.
r14 <- readLines('raw14.txt')
r1 <- c(r13,r14)
r2 <- gsub('\\[[a-zA-Z0-9]*\\]','',r1)
r3 <- gsub('^ *$','',r2)
r4 <- r3[r3!='']
r5 <- gsub('\\t$','',r4)
r6 <- gsub('\\t References$','',r5)
r7 <- read.table(textConnection(r6),
sep=’\t’,
header=TRUE,
stringsAsFactors=FALSE)
r7$Location[r7$Location==’Washington DC’] <-
‘WashingtonDC, DC’
r8 <- read.table(textConnection(as.character(r7$Location)),
sep=’,’,
col.names=c(‘Location’,’State’),
stringsAsFactors=FALSE)
r8$State <- gsub(' ','',r8$State)
r8$State[r8$State==’Tennessee’] <- 'TN'
r8$State[r8$State==’Ohio’] <- 'OH'
r8$State[r8$State %in% c(‘Kansas’,’KA’)] <- 'KS'
r8$State[r8$State==’Louisiana’] <- 'LA'
r8$State[r8$State==’Illinois’] <- 'IL'
r8$State <- toupper(r8$State)
table(r8$State)
r7$StateAbb <- r8$State
r7$Location <- r8$Location
r7 <- r7[! (r7$State %in% c( 'PUERTORICO','PR')),]
r7$Date <- gsub('/13$','/2013',r7$Date)
r7$date <- as.Date(r7$Date,format="%m/%d/%Y")
states <- data.frame(
StateAbb=as.character(state.abb),
StateRegion=state.division,
State=as.character(state.name)
)
states <- rbind(states,data.frame(StateAbb='DC',
State=’District of Columbia’,
StateRegion= ‘Middle Atlantic’))
# http://www.census.gov/popest/data/state/totals/2013/index.html
inhabitants <- read.csv('NST-EST2013-01.treated.csv')
#put it all together
states <- merge(states,inhabitants)
r9 <- merge(r7,states)
Sys.setlocale(category = “LC_TIME”, locale = “C”)
r9$weekday <- factor(format(r9$date,'%a'),
levels=format(as.Date(’07/07/2014′,format=”%m/%d/%Y”)+0:6,’%a’))
alldays <- data.frame(
date=seq(min(r9$date),max(r9$date),by=1))
alldays$weekday <- factor(format(alldays$date,'%a'),
levels=levels(r9$weekday))
agg1 <- as.data.frame(xtabs(~StateRegion+weekday,data=r9))
names(agg1)[names(agg1)==’Freq’] <- 'shootings'
agg2 <- with(states,
aggregate(Population,
by=list(StateRegion=StateRegion),
FUN=sum))
names(agg2)[names(agg2)==’x’] <- 'population'
agg12 <- merge(agg1,agg2,all=TRUE)
agg3 <- as.data.frame(table(alldays$weekday))
names(agg3) <- c('weekday','ndays')
agg123 <- merge(agg12,agg3)
agg123$incidence <- 100000*agg123$shootings/agg123$population*365/agg123$ndays
#########################
agg4 <- as.data.frame(xtabs(~StateRegion,data=r9))
names(agg4)[names(agg4)==’Freq’] <- 'shootings'
agg5 <- as.data.frame(xtabs(Population ~StateRegion,data=states))
names(agg5)[names(agg5)==’Freq’] <- 'Population'
agg45 <- merge(agg4,agg5)
agg45$Incidence <- 100000*365*
agg45$shootings/agg45$Population/
as.numeric((max(r7$date)-min(r7$date)))
agg45[order(agg45$Incidence),c(1:4)]
#########################
library(ggplot2)
agg123$Region <- agg123$StateRegion
levels(agg123$Region) <- gsub(' ','\n',levels(agg123$Region))
ggplot(data=agg123,
aes(y=incidence,x=weekday)) +
geom_bar(stat=’identity’) +
ylab(‘Incidence’) +
xlab(”) +
coord_flip() +
facet_grid(~Region )
########
r10 <- merge(as.data.frame(xtabs(~StateAbb,data=r9)),states,all=TRUE)
r10$Freq[is.na(r10$Freq)] <- 0
r10$Incidence <- r10$Freq*100000*365/r10$Population/
as.numeric((max(r7$date)-min(r7$date)))
library(R2jags)
datain <- list(
count=r10$Freq,
Population = r10$Population,
n=nrow(r10),
nregion =nlevels(r10$StateRegion),
Region=(1:nlevels(r10$StateRegion))[r10$StateRegion],
scale=100000*365/
as.numeric((max(r7$date)-min(r7$date))))
model1 <- function() {
for (i in 1:n) {
count[i] ~ dbin(p[i],Population[i])
p[i] ~ dbeta(a[Region[i]],b[Region[i]])
pp[i] <- p[i]*scale
}
for (i in 1:nregion) {
a[i] ~ dnorm(aa,tauaa) %_% T(0,)
b[i] ~ dnorm(bb,taubb) %_% T(0,)
betamean[i] <- scale * a[i]/(a[i]+b[i])
}
tauaa <- pow(sigmaaa,-2)
sigmaaa ~dunif(0,1e5)
taubb <- pow(sigmabb,-2)
sigmabb ~dunif(0,1e8)
aa ~ dunif(0,1e5)
bb ~ dunif(0,1e8)
}
parameters <- c('a','b','betamean','pp','aa','bb','sigmaaa','sigmabb')
jagsfit <- jags(datain,
model=model1,
parameters=parameters,
n.iter=250000,
n.burnin=100000,
n.chain=4,
progress.bar=”gui”)
jagsfit
traceplot(jagsfit)
#dimnames(jagsfit$BUGSoutput$sims.matrix)[2]
samples <- jagsfit$BUGSoutput$sims.matrix[,31:81]
r10$pred <- apply(samples,2,mean)
r10$l05 <- apply(samples,2,function(x) quantile(x,.05))
r10$l95 <- apply(samples,2,function(x) quantile(x,.95))
r11 <- subset(r10,,c(State,l05,l95,pred,Incidence,StateRegion))
r11 <- r11[order(r11$pred),]
r11$State <- factor(r11$State,levels=r11$State)
p <- ggplot(r11, aes(y=pred, x=State,col=StateRegion))
limits <- aes(ymin =l05, ymax=l95)
dodge <- position_dodge(width=0.9)
p +geom_point(position=dodge, stat=”identity”) +
geom_errorbar(limits, position=dodge, width=0.25) +
theme(legend.position = “bottom”) +
geom_point(aes(y=Incidence,x=State),
position=dodge, stat=”identity”,col=’black’) +
ylab(‘Incidence’) +
guides(colour=guide_legend(title=’Region’,nrow=3)) +
coord_flip()