################### PCA2.R ######################## data <- read.table("C:/Documents and Settings/steeleb/My Documents/Math 543/MagneticForce.txt",header=TRUE) # measurements on magnetic force surrounding a metal rod in an electronic printer. Factors: current, configuration # material = type of metal (4 types) head(data) p <- 11 n <- dim(data)[1] y <- as.vector(unlist(data[,1:p])) x <- rep(1:p,n) cur <- 1+data$CURRENT/250 config <- data$CONFIGUR+1 material <- data$MATERIAL plot(x,y,type="n",xlab="Location",ylab="Force") x <- rep(1:p,1) for (i in 1:n){ y <- unlist(data[i,1:p]) lines(x,y,col=material[i])} S <- var(data[,1:p]) e.obj <- eigen(S) e.ve <- e.obj$vectors e.va <- e.obj$values S.approx <- e.ve[,1]%*%t(e.ve[,1])*e.va[1] sum(abs(S)) sum(abs(S-S.approx))/sum(abs(S)) S.approx <- S.approx + e.ve[,2]%*%t(e.ve[,2])*e.va[2] sum(abs(S)) sum(abs(S-S.approx))/sum(abs(S)) A <- e.ve[,1:2] par(mfrow=c(2,2)) plot(c(-1,1),c(-1,1),type="n",xlab="First eigenvector coefficients",ylab="second eigenvector coefficients") points(A,pch=16) plot(c(1,11),c(-1,1),type="n",xlab="Position",ylab="Eigenvector coefficients") points(1:11,A[,1],pch=16,col=1) points(1:11,A[,2],pch=16,col=2) abline(h=0) X <- matrix(unlist(data[,1:p]),ncol=p) P <- X%*%A par(mfrow=c(2,2)) plot(P[,1],P[,2],col=cur,pch=16,xlab="First principal component", ylab="Second principal component",main="Color = current") plot(P[,1],P[,2],col=material,pch=16,xlab="First principal component", ylab="Second principal component",main="Color = material") plot(P[,1],P[,2],col=config,pch=16,xlab="First principal component", ylab="Second principal component",main="Color = configuragtion") anova(lm(P[,1]~data$CURRENT+data$CONFIG+data$MATERIAL)) anova(lm(P[,2]~data$CURRENT+data$CONFIG+data$MATERIAL)) ############################################# new data set ####################### par(mfrow=c(1,1)) library(mlbench) data(Ozone) data <- na.omit(Ozone) head(data) names(data) <- c("month","daym","dayw","ozone","pressure","windspeed", "humidity","tempS","tempEM","inversion","grad","inverstemp","visibility") attach(data) y <- ozone y <-tempEM x <- tempS plot(x,y,pch=16,xlab="TempS ",ylab="TempEM") lm.obj <- lm(y~x) lm.coeff <- lm.obj$coef lm.coeff abline(lm.coeff,col=2) S <- var(cbind(x,y)) S ve <- eigen(S)$vectors ve ########### Note that ve[,1] is pointed away from (mean(x),mean(y)) ve[,1] <- -ve[,1] slope <- ve[2,1]/ve[1,1] intercept <- mean(y)-mean(x)*slope abline(a=intercept,b=slope,col=1) lm.obj <- lm(x~y) lines(lm.obj$fitted,y,col=3) legend("topleft",legend=c("PC axis 1","Regression of y on x", "Regression of x on y"),lty=1,col=1:3) ############### show the projections of the data onto axis 1 ############### par(mfrow=c(1,1)) plot(c(min(x),max(x)),c(min(y),max(y)),xlab="TempS ",ylab="TempEM") points(x,y,pch=16) abline(a=intercept,b=slope,col=1) n <- length(y) theta <- atan(slope) costheta <- cos(theta) sintheta <- sin(theta) pts <- matrix(0,n,2) for (i in 1:n) { p <- ve[1,1]*(x[i]-mean(x)) + ve[2,1]*(y[i]-mean(y)) proj.uncentered.x <- mean(x) + p*costheta proj.uncentered.y <- mean(y) + p*sintheta pts[i,1] <- proj.uncentered.x pts[i,2] <- proj.uncentered.y segments(x[i],y[i],proj.uncentered.x,proj.uncentered.y) } ################## least squares (vertical) projections onto least squares line plot(c(min(x),max(x)),c(min(y),max(y)),xlab="TempS ",ylab="TempEM") points(x,y,pch=16) lm.obj <- lm(y~x) abline(lm.obj) fitted <- lm.obj$fitted for (i in 1:n) { segments(x[i],y[i],x[i],fitted[i]) } ################## sums-of squares comparisons = vertical distances ############ mean((y-fitted)^2) # least squares proj.vert <- intercept+slope*x # PC axis 1 mean((y-proj.vert)^2) ################## projection to point distances ########## mean((y-fitted)^2) # least squares mean(rowSums((cbind(x,y)-pts)^2)) # PC axis 1 ##################################### food.names <- c("Beef, braised","Hamburger","Beef roast","Beef, steak","Beef, canned", "Chicken, broiled","Chicken, canned","Beef, heart","Lamb leg, roast","Lamb shoulder, roast", "Smoked Ham","Pork roast","Pork simmered","Beef tongue","Veal cutlet","Bluefish, baked", "Clams, raw","Clams, canned","Crabmeat, canned","Haddock, fried","Mackerel, broiled","Mackerel, canned", "Perch, fried", "Salmon, canned","Sardines, canned","Tuna, canned","Shrimp, canned") abrevfood.names <- c("BB","H","BR","BS","BC","CB","CC","BH","LL","LS","HS","PR","PS","PT","VC","FB","AR","AC", "TC","HF","MB","MC","PF","SC","DC","UC","RC") Energy <- c(340,245,420,375,180,115,170,160,265,300,340,340,355,205,185,136,70,45,90,135,100,155,195,120,180,170,110) Protein <- c(20,21,15,19,22,20,25,26,20,18,20,19,19,18,23,22,11,7,14,16,19,16,16,17,22,25,23) Fat <- c(29,17,39,32,10,3,7,5,20,25,28,29,30,14,9,4,1,1,2,5,13,9,11,5,9,7,1) Calcium <- c(9,9,7,9,17,8,12,14,9,9,9,9,9,7,9,25,82,74,38,15,5,157,14,159,367,7,98) Iron <- c(2.5,2.7,2.0,2.5,3.7,1.4,1.5,5.9,2.6,2.3,2.5,2.5,2.4,2.5,2.7,0.6,6.0,5.4,0.8,0.5,1.0,1.8,1.3,0.7,2.5,1.2,2.6) cbind(abrevfood.names,food.names,Energy,Protein,Fat,Calcium,Iron) X <- cbind(Energy,Protein,Fat,Calcium,Iron) colnames(X) <- c("Energy","Protein","Fat","Calcium","Iron") rownames(X) <- abrevfood.names cor(X) e <- eigen(cor(X)) m <- cbind(1:5,e$values,100*e$values/sum(e$values),100*cumsum(e$values)/sum(e$values)) colnames(m) <- c("e-pair","eigenvalue","%acc't by","% cumulative acc't by") options(digits = 3) vectrs <- e$vectors rownames(vectrs) <-colnames(X) vectrs PC <- scale(X)%*%vectrs plot(PC[,1],PC[,2],xlab="PC 1",ylab="PC 2") identify(PC[,1],PC[,2],labels=abrevfood.names)