# Unemployment revisited

February 23, 2014
By

(This article was first published on Wiekvoet, and kindly contributed to R-bloggers)

Approximately a year ago I made a post graphing unemployment in Europe and other locations. I have always wanted to do this again, not because the R-code would be so interesting, but just because I wanted to see the plots. As time progressed I attempted not to do this in R, but in Julia. I could not get it good enough in Julia, so this is, alas, the R version.

### Data

Data from Eurostat. Or, if you are lazy, Google une_rt_m, which is the name of the table. There is a bit of pre-processing of the data, mostly getting names of countries decent for plotting. The plots shown are unemployment and its first derivative, both smoothed.

### Code

library(ggplot2)
library(KernSmooth)
library(plyr)
library(scales) # to access breaks/formatting functions

levels(r1\$GEO) <- sub(‘ countries)’,’)’ ,levels(r1\$GEO),fixed=TRUE)
levels(r1\$GEO) <- sub(‘European Union’,’EU’ ,levels(r1\$GEO))
levels(r1\$GEO)[levels(r1\$GEO)==’Euro area (EA11-2000, EA12-2006, EA13-2007, EA15-2008, EA16-2010, EA17-2013, EA18)’] <- “EAll”
levels(r1\$GEO)[levels(r1\$GEO)==’United Kingdom’] <- ‘UK’
levels(r1\$GEO)[levels(r1\$GEO)==’United States’] <- ‘US’
levels(r1\$GEO)[levels(r1\$GEO)==’Germany (until 1990 former territory of the FRG)’] <- ‘Germany’
levels(r1\$GEO)
grep(’12|13|15|16|17|25|27′,x=levels(r1\$GEO),value=TRUE)
r1 <- r1[!(r1\$GEO %in% grep(’12|13|15|16|17|25|27′,x=levels(r1\$GEO),value=TRUE)),]
r1\$GEO <- factor(r1\$GEO)
r1\$Age <- factor(r1\$AGE,levels=levels(r1\$AGE))
r1\$Date <- as.Date(paste(gsub(‘M’,’-‘,as.character(r1\$TIME)),’-01′,sep=”))

#
maxi <- aggregate(r1\$Value,by=list(GEO=r1\$GEO),FUN=max,na.rm=TRUE)
parts <- data.frame(
low = maxi\$GEO[maxi\$x<quantile(maxi\$x,1/3)]
,middle = maxi\$GEO[maxi\$x>quantile(maxi\$x,1/3) & maxi\$x<quantile(maxi\$x,2/3)]
,high = maxi\$GEO[maxi\$x>quantile(maxi\$x,2/3)]
)
#ggplot(r1[r1\$GEO %in% low,],aes(x=Date,y=Value,colour=Age)) +
#        facet_wrap( ~ GEO, drop=TRUE) +
#        geom_line()  +
#        theme(legend.position = “bottom”)
#        ylab(‘% Unemployment’) + xlab(‘Year’)

r1\$class <- interaction(r1\$GEO,r1\$Age)
r3 <- r1[complete.cases(r1),]
r3\$class <- factor(r3\$class)
Perc <- ddply(.data=r3,.variables=.(class),
function(piece,…) {
lp <- locpoly(x=as.numeric(piece\$Date),y=piece\$Value,
drv=0,bandwidth=90)
sdf <- data.frame(Date=as.Date(lp\$x,origin=’1970-01-01′),
sPerc=lp\$y,Age=piece\$Age[1],GEO=piece\$GEO[1])}
,.inform=FALSE
)
for (i in c(‘low’,’middle’,’high’)) {
png(paste(i,’.png’,sep=”))
print(
ggplot(Perc[Perc\$GEO %in% parts[,i] ,],
aes(x=Date,y=sPerc,colour=Age)) +
facet_wrap( ~ GEO, drop=TRUE) +
geom_line()  +
theme(legend.position = “bottom”)+
ylab(‘% Unemployment’) + xlab(‘Year’) +
scale_x_date(breaks = date_breaks(“5 years”),
labels = date_format(“%y”))
)
dev.off()
}

dPerc <- ddply(.data=r3,.variables=.(class),
function(piece,…) {
lp <- locpoly(x=as.numeric(piece\$Date),y=piece\$Value,
drv=1,bandwidth=365/2)
sdf <- data.frame(Date=as.Date(lp\$x,origin=’1970-01-01′),
dPerc=lp\$y,Age=piece\$Age[1],GEO=piece\$GEO[1])}
,.inform=FALSE
)

for (i in c(‘low’,’middle’,’high’)) {
png(paste(‘d’,i,’.png’,sep=”))
print(
ggplot(dPerc[dPerc\$GEO %in% parts[,i] ,],
aes(x=Date,y=dPerc,colour=Age)) +
facet_wrap( ~ GEO, drop=TRUE) +
geom_line()  +
theme(legend.position = “bottom”)+
ylab(‘Change in % Unemployment’) + xlab(‘Year’)+
scale_x_date(breaks = date_breaks(“5 years”),
labels = date_format(“%y”))
)
dev.off()
}