tomfbush · November 22, 2018 20:30 · Nov 22, 2018
diff --git a/clean.r b/clean.r
@@ -0,0 +1,32 @@
+# function to substitute matching string and everything after it with nothing
+# note leading space if that applies to your dataset
+removeCrap <- function(crapThing) {
+    gsub(" (0|[1-9][0-9]*)\ ?(GB).*$", "", crapThing)
+}
+
+# load data from csv into a dataframe
+d <- read.csv("input.csv", sep = ",")
+
+# display a few rows of the dataframe 'd'
+# note name of column we want to change in this case is Col_1
+head(d)
+
+# apply the removeCrap() function to each row of the correct column
+# by referencing dataframe 'd', column 'Col_1' (known together as d$Col_1)
+# and output to a separate 'factor' called cleanCol
+cleanCol <- sapply(d$Col_1, FUN = removeCrap)
+
+# bind the original data and the clean column
+cleanData <- cbind(cleanCol, d)
+
+# output the data to a new csv file, not using any row numbers
+# (cos Excel has those already)
+write.csv(cleanData, "output.csv", row.names = F)
+
+# APPENDIX
+# creating fake data and writing to a file to read in above this
+# col1 <- c("some nice text 9GB blah blah", "some other text 10 GB nonsense with a space")
+# col2 <- c(12, 24)
+# testData <- data.frame(col1, col2)
+# colnames(testData) <- c("Col_1", "Col_2")
+# write.csv(testData, "input.csv", row.names = F)
No results found