# BreastCancer.R # change this path for your computer: Data <- na.omit(read.table("C:/Documents and Settings/steeleb/My Documents//Math 542/BreastCancer.txt",header=FALSE)) names(Data) <- c("Obsvn","ID","thick","uniformSize","uniformShape","adhesion","cellSize","bareNuclei","Chromatin", "normalNuclei","Mitoses","Class") dim(Data) p <- dim(Data)[2] - 1 head(Data) cor(Data[,3:p]) library(lattice) trellis.par.set(canonical.theme(color = FALSE)) attach(Data) plot(uniformSize,uniformShape) cor(uniformSize,uniformShape) x <- (uniformSize - mean(uniformSize)) x <- x/sqrt(sum(x^2)) mean(x) t(x)%*%x y <- (uniformShape - mean(uniformShape)) y <- y/sqrt(sum(y^2)) mean(y) t(y)%*%y t(x)%*%y D <- table(uniformSize,uniformShape) D z <- as.vector(D) u <- rep(1:10,time=10) v <- rep(1:10,each=10) cbind(u,v,z) symbols(u,v,circles=z) plot(uniformShape,thick) I <- Class=="benign" sum(I==T)/n points(uniformShape[I],thick[I],col=2,pch=16) points(uniformShape[I==F],thick[I==F],col=3,pch=16) R <- cor(Data[,3:p]) eigen.obj <- eigen(R) P <- eigen.obj$vectors[,1:2] P # the projection matrix t(P)%*%P X <- matrix(unlist(Data[,3:p]),n,p-2) cor(X) Y <- scale(X)%*%P # project X onto 2 new basis vectors P[,1] and P[,2] plot(Y) I <- Class=="benign" sum(I==T)/n points(Y[I,1],Y[I,2],col=2,pch=16) points(Y[I==F,1],Y[I==F,2],col=3,pch=16) j <- rep(1,n) X.centered <- X - j%*%t(colMeans(X)) colMeans(X.centered) Z <- X.centered%*%diag(1/sd(X))/sqrt(n-1) t(Z)%*%Z