### more on trees - the tree library

cars93 <- subset(Cars93, select = -c(Manufacturer,
  Model, Rear.seat.room, Luggage.room, Make))
print(names(cars93), quote = FALSE)


###

library(tree)

cars93.t1 <- tree(Type ~ ., cars93, minsize = 5)
x11(width = 8, height = 6)
plot(cars93.t1); text(cars93.t1, cex = 0.75)

with(cars93, table(Type, predict(cars93.t1, type = "class")))

par(mfrow = c(2,2))
for(j in 1:4)
	plot(cv.tree(cars93.t1, FUN=prune.tree))

# an alternative criterion
for(j in 1:4)
	plot(cv.tree(cars93.t1, FUN=prune.misclass))

###

par(mfrow = c(1,2))
cars93.t2 <- prune.misclass(cars93.t1, best = 6)
plot(cars93.t2, type = "u"); text(cars93.t2)

cars93.t3 <- prune.tree(cars93.t1, best = 6)
plot(cars93.t3, type = "u"); text(cars93.t3)

### prediction

pred <- predict(cars93.t3, Cars93, type = "class")
with(cars93, table(pred, Type))

## comparison with multinomial models

library(nnet)
m <- multinom(Type ~ Width + EngineSize +
  Passengers + Origin, cars93, maxit = 1000)
pfm <- predict(m, type = "class")
with(cars93, table(Type, pfm))



### the case of two predictors only

cars.2t <- tree(Type ~ Width + EngineSize, Cars93)
par(mfrow = c(1,1))
plot(cars.2t); text(cars.2t)

par(mfrow = c(2,2))
for(j in 1:4)
	plot(cv.tree(cars.2t, FUN=prune.misclass))

par(mfrow = c(1,1))
cars.2t1 <- prune.misclass(cars.2t, best = 6)
plot(cars.2t1); text(cars.2t1)

partition.tree(cars.2t1)
with(cars93, {
  points(Width, EngineSize, pch=8,
    col = as.numeric(Type), cex = 0.5)
  legend("topleft", levels(Type), pch = 8,
    col = 1:length(levels(Type)), bty = "n")
})

### one-dimensional trees

janka.t1 <- tree(Hardness ~ Density, janka)
partition.tree(janka.t1)
with(janka, points(Density, Hardness, col="red"))

### the credit card data

# _> nrow(CC)
# _[1] 1620

set.seed(32867700) # my phone number at home
ind <- sample(nrow(CC), 810)
CCTrain <- CC[ind, ]
CCTest <- CC[-ind, ]
Store(CCTrain, CCTest)

CC.t1 <- tree(credit.card.owner ~ ., CCTrain)
par(mfrow = c(2,2))
for(j in 1:2)
  plot(cv.tree(CC.t1, FUN = prune.misclass))
for(j in 1:2)
  plot(cv.tree(CC.t1, FUN = prune.tree))

par(mfrow=c(1,1))
plot(CC.t1, type = "u"); text(CC.t1)

CC.t2 <- prune.misclass(CC.t1, best = 6)

testPred2 <- function(fit, data = CCTest) {
  pred <- predict(fit, data, type = "class")
  Y <- formula(fit)[[2]]
  Cmatrix <- with(data, table(eval(Y), pred))
  tot <- sum(Cmatrix)
  err <- tot - sum(diag(Cmatrix))
  100*err/tot
}

testPred2(CC.t1)
testPred2(CC.t2)


### simple bagging

baggedTree <- local({
  bsample <- function(data)
    data[sample(nrow(data), rep = TRUE), ]
    
  function (object, data = eval(object$call$data),
    nBags = 200, ...) {
    bagsFull <- list()
    for (j in 1:nBags)
      bagsFull[[j]] <- update(object, data = bsample(data))
    attr(bagsFull, "formula") <- formula(object)
    class(bagsFull) <- "bagTree"
    bagsFull
  }
})

formula.bagTree <- function(x, ...) attr(x, "formula")

predict.bagTree <- function(object, newdata, ...) {
  vals <- sapply(object, predict, newdata, type = "class")
  svals <- sort(unique(vals))
  mVote <- apply(vals, 1,
    function(x) which.max(table(factor(x, levels = svals))))
  svals[mVote]
}

CC.bag <- baggedTree(CC.t1)

testPred2(CC.bag)

### random forests

library(randomForest)

CC.rf <- randomForest(credit.card.owner ~ ., CCTrain)

testPred2(CC.rf)
