Created
April 24, 2014 00:52
-
-
Save bhoung/11237681 to your computer and use it in GitHub Desktop.
Revisions
-
bhoung created this gist
Apr 24, 2014 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,60 @@ # original example from Digg Data website (Takashi J. OZAKI, Ph. D.) # http://diggdata.in/post/58333540883/k-fold-cross-validation-in-r library(plyr) library(randomForest) data <- iris # in this cross validation example, we use the iris data set to # predict the Sepal Length from the other variables in the dataset # with the random forest model k = 5 #Folds # sample from 1 to k, nrow times (the number of observations in the data) data$id <- sample(1:k, nrow(data), replace = TRUE) list <- 1:k # prediction and testset data frames that we add to with each iteration over # the folds prediction <- data.frame() testsetCopy <- data.frame() #Creating a progress bar to know the status of CV progress.bar <- create_progress_bar("text") progress.bar$init(k) for (i in 1:k){ # remove rows with id i from dataframe to create training set # select rows with id i to create test set trainingset <- subset(data, id %in% list[-i]) testset <- subset(data, id %in% c(i)) # run a random forest model mymodel <- randomForest(trainingset$Sepal.Length ~ ., data = trainingset, ntree = 100) # remove response column 1, Sepal.Length temp <- as.data.frame(predict(mymodel, testset[,-1])) # append this iteration's predictions to the end of the prediction data frame prediction <- rbind(prediction, temp) # append this iteration's test set to the test set copy data frame # keep only the Sepal Length Column testsetCopy <- rbind(testsetCopy, as.data.frame(testset[,1])) progress.bar$step() } # add predictions and actual Sepal Length values result <- cbind(prediction, testsetCopy[, 1]) names(result) <- c("Predicted", "Actual") result$Difference <- abs(result$Actual - result$Predicted) # As an example use Mean Absolute Error as Evalution summary(result$Difference)