Last active
December 8, 2021 08:02
-
-
Save badele/f2d4e74b9135c25a69834cacc3f89b87 to your computer and use it in GitHub Desktop.
Revisions
-
badele revised this gist
Dec 8, 2021 . 1 changed file with 18 additions and 7 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -26,13 +26,15 @@ write_fwf <- function(df, filename,rowname = FALSE,nbspaces = 3, replace_na = "N df <- tibble::rownames_to_column(df, rowname) } # Convert all columns to character tmpdf = data.frame(df) tmpdf[] <- lapply(df, as.character) # Compute column size nasize=nchar(replace_na) maxwidthname <- nchar(colnames(tmpdf)) maxwidthvalue <- sapply(tmpdf, function(x) max(nchar(x))) maxcols <- pmax(maxwidthname,maxwidthvalue,nasize) delta <- maxwidthvalue - maxwidthname # Compute header @@ -56,7 +58,16 @@ write_fwf <- function(df, filename,rowname = FALSE,nbspaces = 3, replace_na = "N close(file) # Export data write.fwf( df, file=filename, append=TRUE, width=maxcols, colnames=FALSE, na=replace_na, sep=strrep(" ",nbspaces), justify="left" ) } #' Read automatically .fwf file (fixed width file) in R @@ -79,5 +90,5 @@ read_fwf <- function(filename,maxsearchlines=100) { colwidths <- str_split(colwidths, ",") colwidths <- strtoi(unlist(colwidths)) return(read.fwf(file=filename, skip=idxcols+1, col.names = colnames, widths=colwidths,strip.white=TRUE)) } -
badele revised this gist
Nov 23, 2021 . 1 changed file with 6 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -21,11 +21,15 @@ suppressPackageStartupMessages({ #' Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #' Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 write_fwf <- function(df, filename,rowname = FALSE,nbspaces = 3, replace_na = "NA") { # Convert rownames to column if (rowname) { df <- tibble::rownames_to_column(df, rowname) } # Replace NA df[is.na(df)] <- replace_na # Compute column size maxwidthname <- nchar(colnames(df)) maxwidthvalue <- sapply(df, function(x) max(nchar(x))) maxcols <- pmax(maxwidthname,maxwidthvalue) -
badele revised this gist
Nov 20, 2021 . 1 changed file with 4 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -6,6 +6,10 @@ suppressPackageStartupMessages({ #' Generate automatically .fwf file (fixed width file) in R #' @description This function creates automatically fixed width file #' It align columns headers with datas #' @param df dataframe #' @param filename filename #' @param nbspaces nb spaces for columns separator #' @param replace_na Empty/NA chain replacement #' @param rowname If it's defined, it convert rownames column to named column #' @examples write_fwf(mtcars, "carname", "/tmp/mtcars.fwf") #' -
badele created this gist
Nov 20, 2021 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,75 @@ suppressPackageStartupMessages({ library(gdata) library(stringr) }) #' Generate automatically .fwf file (fixed width file) in R #' @description This function creates automatically fixed width file #' It align columns headers with datas #' @param rowname If it's defined, it convert rownames column to named column #' @examples write_fwf(mtcars, "carname", "/tmp/mtcars.fwf") #' #' # colnames: carname,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb #' # cols: 22,7,6,8,6,7,8,8,5,5,7,7 #' carname mpg cyl disp hp drat wt qsec vs am gear carb #' Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 #' Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 #' Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 #' Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 write_fwf <- function(df, filename,rowname = FALSE,nbspaces = 3, replace_na = "NA") { # Compute columns size if (rowname) { df <- tibble::rownames_to_column(df, rowname) } maxwidthname <- nchar(colnames(df)) maxwidthvalue <- sapply(df, function(x) max(nchar(x))) maxcols <- pmax(maxwidthname,maxwidthvalue) delta <- maxwidthvalue - maxwidthname # Compute header header <- c() for (idx in seq(ncol(df))) { if (is.character(df[,idx])) { header <- append(header,paste0(colnames(df)[idx],strrep(" ",max(delta[idx],0)))) } else { header <- append(header,paste0(strrep(" ",max(delta[idx],0)), colnames(df)[idx])) } } # Open file file <- file(filename, "w") # Write header writeLines(paste("# colnames:", paste(colnames(df), collapse=',')),file) writeLines(paste("# cols:", paste(unlist(maxcols+nbspaces), collapse=',')),file) writeLines(header,file, sep=strrep(" ",nbspaces)) writeLines("", file, sep="\n") close(file) # Export data write.fwf(df,file=filename,append=TRUE, width=maxcols,colnames=FALSE,na=replace_na, sep=strrep(" ",nbspaces)) } #' Read automatically .fwf file (fixed width file) in R #' @description This function read and detect automatically fixed width file #' @param maxsearchlines nb lines for the searching the columns metadata description #' @examples read_fwf("/tmp/mtcars.fwf") read_fwf <- function(filename,maxsearchlines=100) { # Search columns informations file <- file(filename, "r") on.exit(close(file)) lines <- readLines(file,n=maxsearchlines) idxname <- str_which(lines,"# colnames: ") colnames <- str_replace(lines[idxname], "# colnames: ", "") colnames <- unlist(str_split(colnames, ",")) idxcols <- str_which(lines,"# cols: ") colwidths <- str_replace(lines[idxcols], "# cols: ", "") colwidths <- str_split(colwidths, ",") colwidths <- strtoi(unlist(colwidths)) return(read.fwf(file=filename, skip=idxcols+2, col.names = colnames, widths=colwidths,strip.white=TRUE)) }