Last active
December 31, 2018 23:24
-
-
Save pkoppstein/bbbbdf7489c8c515680beb1c75fa59f2 to your computer and use it in GitHub Desktop.
Revisions
-
pkoppstein revised this gist
Dec 31, 2018 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,5 @@ # Copyright (C) 2018 [email protected] # License: Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported (CC BY-NC-SA 3.0) # See http://creativecommons.org/licenses/by-nc/3.0/ # Attribution shall include the copyright notice above. -
pkoppstein revised this gist
Dec 31, 2018 . 1 changed file with 20 additions and 15 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,21 +3,22 @@ # See http://creativecommons.org/licenses/by-nc/3.0/ # Attribution shall include the copyright notice above. # fromcsv.jq version: 0.4 of 2018-12-30 # Requires: jq with `inputs` # Source: https://gist.github.com/pkoppstein/bbbbdf7489c8c515680beb1c75fa59f2 # A PEG-inspired parser for reading CSV files without the need to "slurp" them. # The parser is intended to handle a wide variety of "edge cases". # Note that both \r\n and \r\r\n are interpreted as end-of-record. # Error-reporting showing the relevant record number and troublesome string fragment is provided on STDERR. # Main jq filters: # fromcsv/0 # JSON string input # fromcsvfile/0 # read from STDIN # Example usage: jq -nRc 'include "fromcsvfile"; fromcsvfile' MYFILE.csv ######### PEG machinery @@ -40,7 +41,7 @@ def at_eof: .remainder | (.=="" or . == "\n" or . == "\r\n"); # end-of-record if end-of-record characters or else at eof def EOR: consume("\n|\r\n|\r\r\n") // (if .remainder=="" then . else empty end) ; # Internal double-quotes must be doubled; @@ -57,7 +58,9 @@ def unquoted_field: ; def quoted_field_continue: def trim: sub("(\r\r|\r)$";""); .remainder += ("\n" + input | trim) # | (.record|debug) as $debug | (field_content_quoted | consume("\" *")) // quoted_field_continue ; @@ -83,18 +86,20 @@ def fields: # i.e. field fields def record: field | (fields // .) ; def fromcsv: ## Loop for processing all the records def _fromcsv: if at_eof then empty else (EOR // .) | record | select(.result) | .result, (.result = null | .record+=1 | _fromcsv) end ; {record:0, remainder: .} | _fromcsv ; def fromcsvfile: def trim: sub("(\r\r|\r)$";""); inputs | trim | fromcsv; -
pkoppstein revised this gist
Dec 28, 2018 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,7 +3,7 @@ # See http://creativecommons.org/licenses/by-nc/3.0/ # Attribution shall include the copyright notice above. # fromcsvfile.jq version: 0.3 of 2018-12-27 # Requires: jq with `inputs` # Source: https://gist.github.com/pkoppstein/bbbbdf7489c8c515680beb1c75fa59f2 -
pkoppstein revised this gist
Dec 28, 2018 . 1 changed file with 18 additions and 18 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,6 +1,11 @@ # Copyright (C) 2018 [email protected] # License: Creative Commons Attribution-NonCommerical-ShareAlike 3.0 Unported (CC BY-NC-SA 3.0) # See http://creativecommons.org/licenses/by-nc/3.0/ # Attribution shall include the copyright notice above. # fromcsv.jq version: 0.3 of 2018-12-27 # Requires: jq with `inputs` # Source: https://gist.github.com/pkoppstein/bbbbdf7489c8c515680beb1c75fa59f2 # A PEG-inspired parser for reading CSV files without the need to "slurp" them. @@ -12,7 +17,7 @@ # fromcsv/0 # JSON string input # fromcsvfile/0 # read from STDIN # Example usage: jq -nRc 'include "fromcsvfile"; fromcsfile' MYFILE.csv ######### PEG machinery @@ -51,11 +56,17 @@ def unquoted_field: // parse("[^\",\n]*") # possibly empty ; def quoted_field_continue: .remainder += ("\n" + input) | (field_content_quoted | consume("\" *")) // quoted_field_continue ; # Ignore blanks before and after the enclosing quotation marks # EXTENSION def quoted_field: consume(" *\"") | ( (field_content_quoted | consume("\" *")) // quoted_field_continue ) ; def field: quoted_field @@ -86,15 +97,4 @@ def fromcsv: {record:0, remainder: .} | _fromcsv ; def fromcsvfile: inputs | fromcsv; -
pkoppstein revised this gist
Dec 26, 2018 . 1 changed file with 3 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,5 @@ # Author: [email protected] # Version: 0.2 of 2018-12-26 # Requires: jq with `inputs` # A PEG-inspired parser for reading CSV files without the need to "slurp" them. @@ -26,7 +26,7 @@ def parse($re): # on failure, match yields empty (.remainder | match("^" + $re)) as $match | .remainder |= .[$match.length :] |.result += [$match.string | gsub("\"\"";"\"")] ; # Utility function as there is no EOF marker def at_eof: .remainder | (.=="" or . == "\n" or . == "\r\n"); @@ -88,7 +88,7 @@ def fromcsv: def fromcsvfile: def finish: def checkline: test( "^((, *\"(\"\"|[^\"]*)\" *)|,[^\",\n]*|( *\"(\"\"|[^\"]*)\" *)|[^\",\n]+)*$" ); if checkline then . else (input // "") as $in | if $in != "" then (. + "\n" + $in | finish) -
pkoppstein created this gist
Dec 24, 2018 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,100 @@ # Author: [email protected] # Version: 0.1 of 2018-12-23 # Requires: jq with `inputs` # A PEG-inspired parser for reading CSV files without the need to "slurp" them. # The parser is intended to handle a wide variety of "edge cases". # Error-reporting showing the relevant record number and troublesome string fragment is provided on STDERR. # Main jq filters: # fromcsv/0 # JSON string input # fromcsvfile/0 # read from STDIN # Example usage: jq -nR 'include "fromcsvfile"; fromcsfile' MYFILE.csv ######### PEG machinery # consume a regular expression rooted at the start of .remainder def consume($re): # on failure, match yields empty (.remainder | match("^" + $re)) as $match | .remainder |= .[$match.length :] ; def parse($re): # on failure, match yields empty (.remainder | match("^" + $re)) as $match | .remainder |= .[$match.length :] |.result += [$match.string] ; # Utility function as there is no EOF marker def at_eof: .remainder | (.=="" or . == "\n" or . == "\r\n"); ############ Grammar for CSV # end-of-record if end-of-record characters or else at eof def EOR: consume("\n|\r\n") // (if .remainder=="" then . else empty end) ; # Internal double-quotes must be doubled; # CRs and LFs are allowed, as are empty quoted fields. def field_content_quoted: parse("((\"\")|([^\"]))*") ; # # EXTENSION: When reading an unquoted field, we ought to recognize CRLF as end-of-record, # i.e. only accept LF if it is NOT preceded by CR. # Reject unescaped double-quote def unquoted_field: (parse("[^\",\r\n]+") | (if .remainder|test("^\r\n") then consume("\r") else empty end)) // parse("[^\",\n]*") # possibly empty ; # Ignore blanks before and after the enclosing quotation marks # EXTENSION def quoted_field: consume(" *\"") | field_content_quoted | consume("\" *") ; def field: quoted_field // unquoted_field // if at_eof then empty else stderr end ; # ("," field)+ def fields: consume(",") | field | (fields // .) ; # field ("," field)* # i.e. field fields def record: field | (fields // .) ; ## Loop for processing all the records def _fromcsv: if at_eof then empty else (EOR // .) | record | select(.result) | .result, (.result = null | .record+=1 | _fromcsv) end ; def fromcsv: {record:0, remainder: .} | _fromcsv ; def fromcsvfile: def finish: def checkline: match( "^(( *\"(\"\"|[^\"]*)\" *)|[^\",\n]*|[ ,]+)*$") // false; if checkline then . else (input // "") as $in | if $in != "" then (. + "\n" + $in | finish) else . end end; inputs as $in | ($in|finish) | fromcsv;