################ Example 1: Data are attributes on mammalian morphology and reproduction ######################### # read the data from a file and into a data frame (enhanced matrix) Data <- read.table(file="C:/Documents and Settings/steeleb/My Documents/Stat 341/BrainWeight.txt",sep = ",",header=T) # print the first 6 rows of the data frame head(Data) colnames(Data) print(Data$species) print(Data[,1]) #print column 1 of the data frame Data[,1] Data[1,] #print row 1 of the data frame attach(Data) # make each column into a vector with the same name as the column species # print the species names plot(body,gestation) plot(log(body),gestation) # transform body weight to the natural logarith scale before plotting identify(log(body),gestation,species) # note: hit Esc key to stop labelling points plot(log(body),gestation,pch=16,col="red") palette() plot(body,brain,pch=16,col="green") plot(log(body),log(brain),pch=16,col="black") ############################# construct new data.frame new.data <- data.frame(species,body,brain,row.names=1) head(new.data) detach(Data) # eliminate the variables from R memory x <- seq(from=1,to=10,by=1) y <- rep(1:5,2) data.frame(x,y) names <- c("a","b","c","d","e","f","g","h","i","j") length(names) new.data <- data.frame(names,x,y,row.names=1) new.data colnames(new.data) ######################### use data from a library ############## library(DAAG) data <- Manitoba.lakes head(data) log(data$area) row.names(data) plot(data$elevation,log(data$area)) #################### Example 2: Data are number of TB cases per 100,000 people ######################### # read the data from a file Data <- read.table(file="C:/Documents and Settings/steeleb/My Documents/Stat 341/GlobalTB.txt",header=T) # remove rows that contain missing data (represented by NA) Data <- na.omit(Data) # extract the names of the columns column.names <- names(Data) # print the column names column.names # print the first 10 rows of the data frame head(Data,10) # print the dimension of the data frame dim(Data) # store the number of rows (countries) and columns n <- dim(Data)[1] p <- dim(Data)[2] # store the TB rates leaving off the country and region names X <- Data[,3:p] # store the country names Country <- Data$Country # store the region names Region <- Data$Region # construct a list of the regions regions <- levels(Region) # construct a sequence corrresponding to the years of observation Year <- 1982:2005 Year # get the data for Algeria (first country in the data.frame TB.cases.Algeria <- X[1,] plot(Year,TB.cases.Algeria) plot(Year,TB.cases.Algeria,type="b",pch=16) ## use a high-level graphics library - tells R to open the package library(lattice) plot(c(1982,2005),c(0,max(X)),type="n",ylab="Rate per 100,000",xlab="Year") # create a plotting field I <- Country=="Zimbabwe" I lines(Year,X[I,],col=1) # add a country I <- Country=="Botswana" lines(Year,X[I,],col=2) # add a country I <- Country=="SouthAfrica" lines(Year,X[I,],col=3) I <- Country=="Philippines" lines(Year,X[I,],col=4) I <- Country=="India" lines(Year,X[I,],col=5) I <- Country=="SKorea" lines(Year,X[I,],col=6) legend(x="topleft",legend=c("Zimbabwe","Botswana","SouthAfrica","Philippines", "India","SKorea"),col=1:6,lty=1)