###################################################
## Classification and regression trees ############
###################################################

## cleaning up ##########################
rm()
#dev.off()

## require package ######################
require('rpart')

## data input ###########################
# training data
load('longvow.RData');

# test data
vowtest <- longvow;

#### classification tree #################
## training
myclasstree = rpart(vowel ~ dur + f2, longvow)

# plot
dev.new()
par(mfrow=c(1,2), xpd=NA) # otherwise on some devices the text is clipped
plot(myclasstree)
text(myclasstree, use.n=TRUE)

## application: classify new objects
# returns assignment probability foreach class
class_pred <- predict(myclasstree,vowtest)

## evaluation: proportion of wrong predictions

# vector of vowel class types
lev_ref <- levels(factor(vowtest$vowel))
# reference class index vector
i_ref = c();
for (i in 1:length(vowtest$vowel)) {
  # add vowel class index
  i_ref[i] = which(as.character(lev_ref)==as.character(vowtest$vowel[i]))
}

# class_pred: each row contains assignment probabilities
#    for each class 
# prediction: class with highest probabilty
# i_pred: factor level indices of predictions
# apply(*,1,fun): row-wise application of fun
# which.max: index of max value
i_pred <- apply(class_pred[,],1,which.max)

# classifications error
# proportion of misclassifications
e_class <- sum(i_pred!=i_ref)/length(class_pred) * 100;


#### regression tree ######################
# training
myregtree = rpart(dur ~ f2, longvow)

# plot
dev.new()
par(mfrow=c(1,2), xpd=NA) # otherwise on some devices the text is clipped
plot(myregtree)
text(myregtree, use.n=TRUE)

# application: predict duration
reg_pred <- predict(myregtree,vowtest)

# evaluation:
# root mean squared error between predicted and observed duration
e_reg = sqrt(mean((reg_pred-vowtest$dur)^2))