##################################### Foodstuffs data set ######################### food.names <- c("Beef, braised","Hamburger","Beef roast","Beef, steak","Beef, canned", "Chicken, broiled","Chicken, canned","Beef, heart","Lamb leg, roast","Lamb shoulder, roast", "Smoked Ham","Pork roast","Pork simmered","Beef tongue","Veal cutlet","Bluefish, baked", "Clams, raw","Clams, canned","Crabmeat, canned","Haddock, fried","Mackerel, broiled","Mackerel, canned", "Perch, fried", "Salmon, canned","Sardines, canned","Tuna, canned","Shrimp, canned") abrevfood.names <- c("BB","HR","BR","BS","BC","CB","CC","BH","LL","LS","HS","PR","PS","PT","VC","FB","AR","AC", "TC","HF","MB","MC","PF","SC","DC","UC","RC") Energy <- c(340,245,420,375,180,115,170,160,265,300,340,340,355,205,185,136,70,45,90,135,100,155,195,120,180,170,110) Protein <- c(20,21,15,19,22,20,25,26,20,18,20,19,19,18,23,22,11,7,14,16,19,16,16,17,22,25,23) Fat <- c(29,17,39,32,10,3,7,5,20,25,28,29,30,14,9,4,1,1,2,5,13,9,11,5,9,7,1) Calcium <- c(9,9,7,9,17,8,12,14,9,9,9,9,9,7,9,25,82,74,38,15,5,157,14,159,367,7,98) Iron <- c(2.5,2.7,2.0,2.5,3.7,1.4,1.5,5.9,2.6,2.3,2.5,2.5,2.4,2.5,2.7,0.6,6.0,5.4,0.8,0.5,1.0,1.8,1.3,0.7,2.5,1.2,2.6) cbind(abrevfood.names,food.names,Energy,Protein,Fat,Calcium,Iron) X <- scale(cbind(Energy,Protein,Fat,Calcium,Iron)) colnames(X) <- c("Energy","Protein","Fat","Calcium","Iron") #rownames(X) <- abrevfood.names rownames(X) <- food.names par(mfrow=c(1,1)) plclust(hclust(dist(X),method="single"),labels=food.names,ylab="Distance",xlab="Single linkage") plclust(hclust(dist(X),method="complete"),labels=food.names,ylab="Distance",xlab="Complete linkage") plclust(hclust(dist(X),method="average"),labels=food.names,ylab="Distance",xlab="Average linkage") plclust(hclust(dist(X),method="ward"),labels=food.names,ylab="Distance",xlab="Average linkage") cluster.obj <- cutree(hclust(dist(X),method="average"),h=1) print(cluster.obj) for (i in 1:length(unique(cluster.obj))){ if (sum(cluster.obj==i)==1) { print(food.names[cluster.obj==i]) print(X[cluster.obj==i])} if (sum(cluster.obj==i)>1){ print(X[cluster.obj==i,]) print(colMeans(X[cluster.obj==i,])) } } ########################### new data ################## head(USArrests) state.names <- rownames(USArrests) library(MASS) data <- USArrests[,c(1,2,4)] e <- eigen(cor(data)) p <- dim(data)[2] m <- cbind(1:p,e$values,100*e$values/sum(e$values),100*cumsum(e$values)/sum(e$values)) colnames(m) <- c("e-pair","eigenvalue","%acc't by","% cumulative acc't by") options(digits = 3) m rownames(e$vectors) <- colnames(data) e$vectors PC <- scale(matrix(unlist(data),50,p))%*%e$vectors plot(PC[,1],PC[,2],pch=16) identify(PC[,1],PC[,2],state.names) plclust(hclust(dist(USArrests),method="average"),labels=state.names,ylab="Distance",xlab="Average linkage") plclust(hclust(dist(USArrests),method="ward"),labels=state.names,ylab="Distance",xlab="Average linkage") library(cluster) pltree(diana(USArrests),main="") # divisive clustering algorithm sort(cutree(as.hclust(diana(USArrests)),h=75)) clusters <- cutree(as.hclust(diana(USArrests)),h=100) library(lattice) xyplot(PC[,2]~PC[,1],groups=clusters,col=1:10,pch=16,xlab="First axis",ylab="Second axis")