Data <- na.omit(read.table("C:/Documents and Settings/steeleb/My Documents/Math 542/BreastCancer.txt",header=FALSE)) names(Data) <- c("Obsvn","ID","thick","uniformSize","uniformShape","adhesion","cellSize","bareNuclei","Chromatin", "normalNuclei","Mitoses","Class") dim(Data) p <- dim(Data)[2] - 3 head(Data) y <- 1+as.integer(Data$Class=="malignant") X <- Data[,3:(2+p)] n <- dim(Data)[1] set.seed(123) r <- sample(10,n,replace =T) K <- 20 predictions.cv <- matrix(0,n,K) predictions.app <- matrix(0,n,K) u <- sort(unique(r)) for (k in 1:K){ for (i in 1:length(u)){ predictions.cv[r==i,k] <- knn(train = X[r!=i,], test = X[r==i,], cl=y[r!=i], k = k, l = 0, prob = FALSE, use.all = TRUE) predictions.app[,k] <- knn(train = X, test = X, cl=y, k = k, l = 0, prob = FALSE, use.all = TRUE) } } acc.cv <- rep(0,K) acc.app <- rep(0,K) for (k in 1:K){ acc.cv[k] <- mean((as.integer(predictions.cv[,k])==as.integer(y))) acc.app[k] <- mean((as.integer(predictions.app[,k])==as.integer(y))) } acc.cv plot(c(1,K),c(0.95,1),type="n",xlab="k",ylab="Accuracy estimate") lines(1:K,acc.cv,col=1) lines(1:K,acc.app,col=2) legend("topright",legend=c("CV estimate","Apparent accuracy estimate"),col=1:2,lty=1) ############################## 10 repetitions acc.cv <- matrix(0,10,K) acc.app <- matrix(0,10,K) for (rept in 1:10) { u <- sort(unique(r)) for (k in 1:K){ for (i in 1:length(u)){ predictions.cv[r==i,k] <- knn(train = X[r!=i,], test = X[r==i,], cl=y[r!=i], k = k, l = 0, prob = FALSE, use.all = TRUE) predictions.app[,k] <- knn(train = X, test = X, cl=y, k = k, l = 0, prob = FALSE, use.all = TRUE) } } for (k in 1:K){ acc.cv[rept,k] <- mean((as.integer(predictions.cv[,k])==as.integer(y))) acc.app[rept,k] <- mean((as.integer(predictions.app[,k])==as.integer(y))) } } acc.cv.10 <- colMeans(acc.cv) acc.app.10 <- colMeans(acc.app) plot(c(1,K),c(0.95,1),type="n",xlab="k",ylab="Accuracy estimate") lines(1:K,acc.cv.10,col=1) lines(1:K,acc.app.10,col=2) legend("topright",legend=c("CV estimate","Apparent accuracy estimate"),col=1:2,lty=1) lines(1:K,acc.cv[1,],col=3) lines(1:K,acc.app[1,],col=4) library(e1071) tune.obj <- tune.knn(x=X, y=as.factor(y), k = 1:25, tunecontrol = tune.control(sampling = "cross",cross=10)) summary(tune.obj)