bhoung · April 24, 2014 00:52 · Apr 24, 2014
diff --git a/k-fold CV.r b/k-fold CV.r
@@ -0,0 +1,60 @@
+# original example from Digg Data website (Takashi J. OZAKI, Ph. D.) 
+# http://diggdata.in/post/58333540883/k-fold-cross-validation-in-r
+
+
+library(plyr)
+library(randomForest)
+
+data <- iris
+
+# in this cross validation example, we use the iris data set to 
+# predict the Sepal Length from the other variables in the dataset 
+# with the random forest model 
+
+k = 5 #Folds
+
+# sample from 1 to k, nrow times (the number of observations in the data)
+data$id <- sample(1:k, nrow(data), replace = TRUE)
+list <- 1:k
+
+# prediction and testset data frames that we add to with each iteration over
+# the folds
+
+prediction <- data.frame()
+testsetCopy <- data.frame()
+
+#Creating a progress bar to know the status of CV
+progress.bar <- create_progress_bar("text")
+progress.bar$init(k)
+
+for (i in 1:k){
+  # remove rows with id i from dataframe to create training set
+  # select rows with id i to create test set
+  trainingset <- subset(data, id %in% list[-i])
+  testset <- subset(data, id %in% c(i))
+
+  # run a random forest model
+  mymodel <- randomForest(trainingset$Sepal.Length ~ ., data = trainingset, ntree = 100)
+
+  # remove response column 1, Sepal.Length
+  temp <- as.data.frame(predict(mymodel, testset[,-1]))
+  # append this iteration's predictions to the end of the prediction data frame
+  prediction <- rbind(prediction, temp)
+
+  # append this iteration's test set to the test set copy data frame
+  # keep only the Sepal Length Column
+  testsetCopy <- rbind(testsetCopy, as.data.frame(testset[,1]))
+
+  progress.bar$step()
+}
+
+# add predictions and actual Sepal Length values
+result <- cbind(prediction, testsetCopy[, 1])
+names(result) <- c("Predicted", "Actual")
+result$Difference <- abs(result$Actual - result$Predicted)
+
+# As an example use Mean Absolute Error as Evalution 
+summary(result$Difference)
+
+
+
No results found