library(mlbench) data(PimaIndiansDiabetes) help(PimaIndiansDiabetes) head(PimaIndiansDiabetes) attach(PimaIndiansDiabetes) SF <- cbind(as.integer(diabetes=="pos"),as.integer(diabetes=="neg")) SF glm.obj <- glm(SF~pregnant+glucose+pressure+triceps+insulin+mass+pedigree+age, family=binomial) summary(glm.obj) anova(glm.obj) #################### prediction accuracy version 1 ################### pred.diabetes <- as.integer(glm.obj$fitted > 0.5) tabl <- table(SF[,1],pred.diabetes) rownames(tabl) <- c("Observed/No","Observed/Diabetic") colnames(tabl) <- c("Predicted/No","Predicted/Diabetic") tabl correct <- diag(tabl)/rowSums(tabl) sensitivity <- correct[2] specificity <- correct[1] overall <- sum(diag(tabl))/sum(rowSums(tabl)) c(overall,sensitivity,specificity) ################################################################### dropterm(glm.obj,test="Chisq") glm.obj <- glm(SF~pregnant+glucose+pressure+mass+pedigree+age, family=binomial) dropterm(glm.obj,test="Chisq") #################### prediction accuracy version 1 ################### pred.diabetes <- as.integer(glm.obj$fitted > 0.5) tabl <- table(SF[,1],pred.diabetes) rownames(tabl) <- c("Observed/No","Observed/Diabetic") colnames(tabl) <- c("Predicted/No","Predicted/Diabetic") tabl correct <- diag(tabl)/rowSums(tabl) sensitivity <- correct[2] specificity <- correct[1] overall <- sum(diag(tabl))/sum(rowSums(tabl)) c(overall,sensitivity,specificity) ################################################################### dropterm(glm.obj,test="Chisq") glm.obj <- glm(SF~pregnant+glucose+pressure+mass+pedigree, family=binomial) #################### prediction accuracy version 1 ################### pred.diabetes <- as.integer(glm.obj$fitted > 0.5) tabl <- table(SF[,1],pred.diabetes) rownames(tabl) <- c("Observed/No","Observed/Diabetic") colnames(tabl) <- c("Predicted/No","Predicted/Diabetic") tabl correct <- diag(tabl)/rowSums(tabl) sensitivity <- correct[2] specificity <- correct[1] overall <- sum(diag(tabl))/sum(rowSums(tabl)) c(overall,sensitivity,specificity) ################################################################### glm.obj <- glm(SF~pregnant+pressure+mass+pedigree, family=binomial) dropterm(glm.obj,test="Chisq") glm.obj <- glm(SF~pregnant+glucose+pressure+mass+pedigree, family=binomial) #################### prediction accuracy version 1 ################### pred.diabetes <- as.integer(glm.obj$fitted > 0.5) tabl <- table(SF[,1],pred.diabetes) rownames(tabl) <- c("Observed/No","Observed/Diabetic") colnames(tabl) <- c("Predicted/No","Predicted/Diabetic") tabl correct <- diag(tabl)/rowSums(tabl) sensitivity <- correct[2] specificity <- correct[1] overall <- sum(diag(tabl))/sum(rowSums(tabl)) c(overall,sensitivity,specificity) ######################## prediction accuracy version 2 - cross-validation ########################################### r <- 10 n <- dim(SF)[1] nt <- floor(0.9*n) for (i in 1:r){ s <- sample(1:n,size=nt) u <- rep(0,nt) ct <- 1 for (j in 1:n){ if (sum(s==j)==0) { u[ct] <- j ct <- ct + 1} } glm.obj <- glm(SF~pregnant+glucose+pressure+mass+pedigree, family=binomial,subset=s) pred.diabetes <- as.integer(glm.obj$fitted > 0.5) if (i==1) save.m <- cbind(SF[s,1],pred.diabetes) if (i!=1) save.m <- rbind(save.m,cbind(SF[s,1],pred.diabetes)) print(c("CV repetition ",i)) } y <- save.m[,1] predictions <- save.m[,2] tabl <- table(y,predictions) options(digits=3) rownames(tabl) <- c("Observed/No","Observed/Diabetic") colnames(tabl) <- c("Predicted/No","Predicted/Diabetic") tabl correct <- diag(tabl)/rowSums(tabl) sensitivity <- correct[2] specificity <- correct[1] overall <- sum(diag(tabl))/sum(rowSums(tabl)) c(overall,sensitivity,specificity)