# Lab: Decision Trees ## Fitting Classification Trees ### library(tree) ### library(ISLR2) attach(Carseats) High <- factor(ifelse(Sales <= 8, "No", "Yes")) ### Carseats <- data.frame(Carseats, High) ### tree.carseats <- tree(High ~ . - Sales, Carseats) ?tree ## default je split=deviance koristi entropiju kao mjeru necistoce u slucaju klasifikacije ### summary(tree.carseats) ### devijanca je u nasoj notaciji tocno n*C(T) (vidi (7.12)) plot(tree.carseats) text(tree.carseats, pretty = 0, cex=0.7) ### prva podjela je shelveLoc= Bad ili Medium, te ShelveLoc=Good tree.carseats ### set.seed(2) train <- sample(1:nrow(Carseats), 200) Carseats.test <- Carseats[-train, ] High.test <- High[-train] tree.carseats <- tree(High ~ . - Sales, Carseats, subset = train) tree.pred <- predict(tree.carseats, Carseats.test, type = "class") table(tree.pred, High.test) (104 + 50) / 200 ### ovo je 1- testna greska set.seed(7) cv.carseats <- cv.tree(tree.carseats, FUN = prune.misclass) # za obrezivanje koristimo # 0-1 mjeru necistoce cv.carseats ### dev je sada ukupan broj primjera koji su bili krivo klasificirani u CV metodi plot(cv.carseats, type="b") ### prune.carseats <- prune.misclass(tree.carseats, best = 9) plot(prune.carseats) text(prune.carseats, pretty = 0) ### tree.pred <- predict(prune.carseats, Carseats.test, type = "class") table(tree.pred, High.test) (97 + 58) / 200 ### nije veliko povecanje u preciznosti, ali dobili smo manje (dakle, intepretabilnije) stablo