Created
November 22, 2018 20:30
-
-
Save tomfbush/a2972816f4c8a849484c75a4a32f5e18 to your computer and use it in GitHub Desktop.
Revisions
-
tomfbush created this gist
Nov 22, 2018 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,32 @@ # function to substitute matching string and everything after it with nothing # note leading space if that applies to your dataset removeCrap <- function(crapThing) { gsub(" (0|[1-9][0-9]*)\ ?(GB).*$", "", crapThing) } # load data from csv into a dataframe d <- read.csv("input.csv", sep = ",") # display a few rows of the dataframe 'd' # note name of column we want to change in this case is Col_1 head(d) # apply the removeCrap() function to each row of the correct column # by referencing dataframe 'd', column 'Col_1' (known together as d$Col_1) # and output to a separate 'factor' called cleanCol cleanCol <- sapply(d$Col_1, FUN = removeCrap) # bind the original data and the clean column cleanData <- cbind(cleanCol, d) # output the data to a new csv file, not using any row numbers # (cos Excel has those already) write.csv(cleanData, "output.csv", row.names = F) # APPENDIX # creating fake data and writing to a file to read in above this # col1 <- c("some nice text 9GB blah blah", "some other text 10 GB nonsense with a space") # col2 <- c(12, 24) # testData <- data.frame(col1, col2) # colnames(testData) <- c("Col_1", "Col_2") # write.csv(testData, "input.csv", row.names = F)