################################################### ## Classification and regression trees ############ ################################################### ## cleaning up ########################## rm() #dev.off() ## require package ###################### require('rpart') ## data input ########################### # training data load('longvow.RData'); # test data vowtest <- longvow; #### classification tree ################# ## training myclasstree = rpart(vowel ~ dur + f2, longvow) # plot dev.new() par(mfrow=c(1,2), xpd=NA) # otherwise on some devices the text is clipped plot(myclasstree) text(myclasstree, use.n=TRUE) ## application: classify new objects # returns assignment probability foreach class class_pred <- predict(myclasstree,vowtest) ## evaluation: proportion of wrong predictions # vector of vowel class types lev_ref <- levels(factor(vowtest$vowel)) # reference class index vector i_ref = c(); for (i in 1:length(vowtest$vowel)) { # add vowel class index i_ref[i] = which(as.character(lev_ref)==as.character(vowtest$vowel[i])) } # class_pred: each row contains assignment probabilities # for each class # prediction: class with highest probabilty # i_pred: factor level indices of predictions # apply(*,1,fun): row-wise application of fun # which.max: index of max value i_pred <- apply(class_pred[,],1,which.max) # classifications error # proportion of misclassifications e_class <- sum(i_pred!=i_ref)/length(class_pred) * 100; #### regression tree ###################### # training myregtree = rpart(dur ~ f2, longvow) # plot dev.new() par(mfrow=c(1,2), xpd=NA) # otherwise on some devices the text is clipped plot(myregtree) text(myregtree, use.n=TRUE) # application: predict duration reg_pred <- predict(myregtree,vowtest) # evaluation: # root mean squared error between predicted and observed duration e_reg = sqrt(mean((reg_pred-vowtest$dur)^2))