library(MASS) library(class) data(fgl) head(fgl) table(fgl$type) predictions <- knn(train = fgl[,1:9], test = fgl[,1:9], cl=fgl[,10], k = 1, l = 0, prob = FALSE, use.all = TRUE) mean(predictions == fgl[,10]) predictions <- knn.cv(train = fgl[,1:9], cl=fgl[,10], k = 1, l = 0, prob = FALSE, use.all = TRUE) mean(predictions == fgl[,10]) # n-fold cross-validation estimate X <- as.matrix(fgl[,1:9]) R <- cor(X) e <- eigen(R) PC <- scale(X)%*%e$vectors[,1:2] library(lattice) xyplot(PC[,2]~PC[,1],groups=fgl[,10],col=1:10,pch=16) n <- dim(fgl)[1] set.seed(123) r <- sample(10,n,replace =T) predictions <- fgl[,10] u <- sort(unique(r)) for (i in 1:length(u)){ predictions[r==i] <- knn(train = fgl[r!=i,1:9], test = fgl[r==i,1:9], cl=fgl[r!=i,10], k = 1, l = 0, prob = FALSE, use.all = TRUE) cat("fold",i,"acc=",mean(predictions[r==i] == fgl[r==i,10]),"\n",sep=" ") } mean(predictions == fgl[,10]) # 10-fold cross-validation estimate reps <- 100 Acc.cv <- rep(0,reps) for (j in 1:reps) { set.seed(j) r <- sample(10,n,replace =T) u <- sort(unique(r)) for (i in 1:length(u)){ predictions[r==i] <- knn(train = fgl[r!=i,1:9], test = fgl[r==i,1:9], cl=fgl[r!=i,10], k = 1, l = 0, prob = FALSE, use.all = TRUE) } Acc.cv[j] <- mean(predictions == fgl[,10])} mean(Acc.cv) # better 10-fold cross-validation estimate y <- fgl[,10] tabl <- table(fgl[,10],predictions) options(digits=3) 100*diag(tabl/rowSums(tabl)) rowSums(tabl) 100*sum(diag(tabl))/sum(tabl) tabl2 <- rbind(cbind(tabl,rowSums(tabl)),c(colSums(tabl),sum(sum(tabl)))) rownames(tabl2) <- c(rownames(tabl),"Total") colnames(tabl2) <- c(colnames(tabl),"Total") "Rows = actual columns = predicted" tabl2