“I’m a Republican because…”, visualized with R

library(XML)
library(plyr)
library(ggplot2)
library(tm)
 
# fetch & parse the HTML
doc <- htmlParse("http://gop.com/index.php/learn/republican_faces/",isURL = TRUE)
# pull the matching A elements of CSS class tipz
nodes <- getNodeSet(doc, "//a[@class='tipz']")
# extract the 'title' attribute 
titles <- sapply(nodes, function(x) xmlAttrs(x)[["title"]])
# clean up the title attribute 
titles <- sub("^[^:]+::","",titles)
# create the corpus and doc term matrix
co <- Corpus(VectorSource(titles))
tdm <- DocumentTermMatrix(co, control=list("tolower", removeNumbers=TRUE, stopwords=TRUE))
# extract the tags at each level
levels <- c(1,2,3,4)
df <- ldply(levels, function(x) data.frame(freq=x,term=findFreqTerms(tdm,x,x))) 
#assign random non-repeating coordinates to the terms
df$x <- sample(1:nrow(df),nrow(df), replace=F)
df$y <- df$freq + rnorm(nrow(df))
 
# clear standard graph options (thanks mike lawrence on r-help)
clear <- opts(
         legend.position = 'none'
         , panel.grid.minor = theme_blank()
         , panel.grid.major = theme_blank()
         , panel.background = theme_blank()
         , axis.line = theme_blank()
         , axis.text.x = theme_blank()
         , axis.text.y = theme_blank()
         , axis.ticks = theme_blank()
         , axis.title.x = theme_blank()
         , axis.title.y = theme_blank()
 )
 
p <- ggplot(df,aes(x=x,y=y,colour=freq,label=term,size=freq)) + geom_text() + coord_polar()+ clear 
ggsave("because.png",p,dpi=72,scale=1.3)
ggsave("because.pdf", p)