# Data Mining in A Nutshell

**Econometric Sense**, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)

Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

library(rpart)

library(MASS)

library(class)

library(e1071)

rmulnorm <- function (n, mu, sigma) {
M <- t(chol(sigma))

d <- nrow(sigma)

Z <- matrix(rnorm(d*n),d,n)

t(M %*% Z + mu)

}

cm <- function (actual, predicted)
{

t<-table(predicted,actual)

t[apply(t,2,function(c) order(-c)[1]),]

}

N <- 1000 * 3

Ntrain <- N * 0.7

A <- rmulnorm (N/3, c(1,1), matrix(c(4,-6,-6,18), 2,2))

B <- rmulnorm (N/3, c(8,1), matrix(c(1,0,0,1), 2,2))

C <- rmulnorm (N/3, c(3,8), matrix(c(4,0.5,0.5,2), 2,2))
data <- data.frame(rbind (A,B,C))

colnames(data) <- c('x', 'y')

data$class <- c(rep('A', N/3), rep('B', N/3), rep('C', N/3))

plot_it <- function () {

plot (data[,1:2], type=’n’)

points(A, pch=’A’, col=’red’)

points(B, pch=’B’, col=’blue’)

points(C, pch=’C’, col=’orange’)

}

plot_it()

data <- data[sample(1:N),]

train <- data[1:Ntrain,]

test <- data[(Ntrain+1):N,]

s <- svm( I(factor(class)) ~ x + y, data = train, cost = 100, gama = 1) s # print model results

(m <- cm(train$class, predict(s)))

1 – sum(diag(m)) / sum(m)

(m <- cm(test$class, predict(s, test[,1:2])))

1 – sum(diag(m)) / sum(m)

names(train)

names(test)

(r <- rpart(class ~ x + y, data = train)) plot(r) text(r) summary(r) plotcp(r) printcp(r) rsq.rpart(r)

cat(“\nTEST DATA Error Matrix – Counts\n\n”)

print(table(predict(r, test, type=”class”),test$class, dnn=c(“Predicted”, “Actual”)))

predicted <- as.numeric(apply(predict(r), 1, function(r) order(-r)[1]))

(m <- cm (train$class, predicted))

1 – sum(diag(m)) / sum(m)

# Neural Network

require(nnet, quietly=TRUE)

crs_nnet <- nnet(as.factor(class) ~ ., data=train, size=10, skip=TRUE, trace=FALSE, maxit=1000)

print(crs_nnet)

print(“

Network Weights:

“)

print(summary(crs_nnet))

crs_pr <- predict(crs_nnet, train, type="class")

table(crs_pr, train$class, dnn=c(“Predicted”, “Actual”))

round(100*table(crs_pr, train$class, dnn=c(“Predicted”, “Actual”))/length(crs_pr))

Calucate overall error percentage.

print( “Overall Error Rate”)

(function(x){ if (nrow(x) == 2) cat((x[1,2]+x[2,1])/sum(x)) else cat(1-(x[1,rownames(x)])/sum(x))}) (table(crs_pr, train$class, dnn=c(“Predicted”, “Actual”)))

**leave a comment**for the author, please follow the link and comment on their blog:

**Econometric Sense**.

R-bloggers.com offers

**daily e-mail updates**about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.

Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.