# number of TB cases per 100,000 people Data <- na.omit(read.table(file="C:/Documents and Settings/steeleb/My Documents/Math 543/GlobalTB.txt",header=T)) name <- names(Data) head(Data,10) str(Data) n <- dim(Data)[1] Country <- Data$Country Region <- Data$Region regions <- levels(Region) Year <- 1982:2005 X <- matrix(unlist(Data[,3:dim(Data)[2]]),nrow=n) #X <- X - t(matrix(colMeans(X),nrow=24,ncol=n)) colnames(X) <- as.character(Year) p <- dim(X)[2] ################################### left e-vectors e <- eigen(t(X)%*%X) m <- cbind(1:p,e$values,100*e$values/sum(e$values),100*cumsum(e$values)/sum(e$values)) colnames(m) <- c("e-pair","eigenvalue","%acc't by","% cumulative acc't by") options(digits = 3) m vectrs <- e$vectors rownames(vectrs) <-colnames(X) vectrs[,1:3] plot(Year[c(1,24)],c(min(-vectrs),max(-vectrs)),type="n",pch=16,xlab="Year",ylab="Eigen-coefficient") lines(Year,vectrs[,1],pch=16,col=1) lines(Year,-vectrs[,2],pch=16,col=2) lines(Year,vectrs[,3],pch=16,col=3) legend(x="topright",legend=c("E1","E2","E3"),col=1:3,lty=1) PC <- -X%*%vectrs[,1:3]%*%diag(1/sqrt(e$values[1:3])) PC <- -X%*%vectrs[,1:3] plot(PC[,1],PC[,2]) identify(PC[,1],PC[,2],labels=Country) library(lattice) xyplot(PC[,2]~PC[,1],groups=Region,col=1:9,pch=16) legend(x="bottomleft",legend=regions,col=1:9,pch=16) order.1 <- order(-PC[,1]) Country[order.1] order.2 <- order(-PC[,2]) Country[order.2] order.3 <- order(-PC[,3]) Country[order.3] plot(c(1982,2005),c(0,max(X)),type="n",ylab="Rate",xlab="Year") I <- Country=="Zimbabwe" lines(Year,X[I,],col=1) I <- Country=="Botswana" lines(Year,X[I,],col=2) I <- Country=="SouthAfrica" lines(Year,X[I,],col=3) I <- Country=="Philippines" lines(Year,X[I,],col=4) I <- Country=="India" lines(Year,X[I,],col=5) I <- Country=="SKorea" lines(Year,X[I,],col=6) lines(Year,X[order.3[1],],col=7) I <- Country=="LaoS" lines(Year,X[I,],col=8) legend(x="topleft",legend=c("Zimbabwe","Botswana","SouthAfrica","Philippines", "India","SKorea","Congo","Laos"),col=1:8,lty=1) ############################################# right e-vectors e <- eigen(X%*%t(X)) m <- cbind(1:p,e$values,100*e$values/sum(e$values),100*cumsum(e$values)/sum(e$values)) colnames(m) <- c("e-pair","eigenvalue","%acc't by","% cumulative acc't by") options(digits = 3) m vectrs <- e$vectors rownames(vectrs) <-Country vectrs[,1:3] PC <- t(X)%*%vectrs[,1:3]%*%diag(1/sqrt(e$values[1:3])) PC <- t(X)%*%vectrs[,1:3] plot(c(Year[1],Year[24]),c(min(PC),max(PC)),pch=16,type="n",xlab="Year",ylab="Eigenvector coefficient") lines(Year,PC[,1],col=1) lines(Year,PC[,2],col=2) lines(Year,PC[,3],col=3) ############################################### multi-dimensional scaling approach D <- dist(X,method = "euclidean") full <- matrix(0, n, n) full[lower.tri(full)] <- D D <- full + t(full) colnames(D) <- Country rownames(D) <- Country D[,1:10] A <- -0.5*D J <- matrix(1,n,n) M <- diag(n) - J/n B <- M%*%A%*%M E <- eigen(B) Z <- E$vectors[,1:3]%*%diag(sqrt(E$values[1:3])) x <- c(Z[,1],Z[,1],Z[,2]) y <- c(Z[,2],Z[,3],Z[,3]) class <- rep(Region,3) panl <- rep(c("1 vs 2","1 vs 3","2 vs 3"),each=n) library(lattice) xyplot(y~x|panl,groups=class,col=1:9,pch=16) legend(x="topright",legend=regions,pch=16,col=1:9) plot(Z[,1],Z[,2]) identify(Z[,1],Z[,2],labels=Country) ########################## trend analysis # e <- eigen(X%*%t(X)) m <- cbind(1:p,e$values,100*e$values/sum(e$values),100*cumsum(e$values)/sum(e$values)) colnames(m) <- c("e-pair","eigenvalue","%acc't by","% cumulative acc't by") options(digits = 3) m vectrs <- e$vectors rownames(vectrs) <-Country vectrs[,1:3] PC <- t(X)%*%vectrs[,1:3]%*%diag(1/sqrt(E$values[1:3])) order.1 <- order(PC[,1]) y <- PC[,1] x <- Year - mean(Year) x2 <- x^2 lm.obj <- lm(y~x+x2) anova(lm.obj) plot(c(Year[1],Year[24]),c(min(PC),max(PC)),type="n",ylab="PC",xlab="Year") points(Year,y,col=1,pch=16) lines(lowess(Year, y, f = 2/3)) anova(lm.obj) y <- PC[,2] x <- Year - mean(Year) x2 <- x^2 lm.obj <- lm(y~x+x2) anova(lm.obj) points(Year,y,col=2,pch=16) lines(lowess(Year, y, f = 1/4),col=2) anova(lm.obj) y <- PC[,3] x <- Year - mean(Year) x2 <- x^2 lm.obj <- lm(y~x+x2) anova(lm.obj) points(Year,y,col=3,pch=16) lines(lowess(Year, y, f = 1/4),col=3) anova(lm.obj) yr.c <- as.numeric(Year - 1994) B <- matrix(0,n,2) for (i in 1:n){ y <- X[i,] lm.obj <- lm(y~yr.c) B[i,] <- lm.obj$coef } B k.obj <- kmeans(B[,2],4) xyplot(B[,2]~B[,1],groups=k.obj$cluster,col=1:4,pch=16) library(lattice) i <- 2 I <- k.obj$cluster==i Y <- as.vector(t(X[I,])) x <- rep(Year,times=sum(I)) r.v <- rep(Country[I],each=24) print(xyplot(Y~x|r.v,as.table=T,type="n", panel=function(x,y){ panel.loess(x,y,type="l",col=1) panel.xyplot(x,y,pch=3,col.line=c.v,type="p")}))