Last active
November 28, 2024 12:12
-
-
Save PhiLhoSoft/ff60eefcb8ed43326cd7 to your computer and use it in GitHub Desktop.
Revisions
-
PhiLhoSoft revised this gist
Feb 23, 2016 . 2 changed files with 17 additions and 7 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -9,7 +9,7 @@ var fs = require('fs'); // http://blog.james-carr.org/2010/07/07/parsing-csv-files-with-nodejs/ // https://gist.github.com/PhiLhoSoft/ff60eefcb8ed43326cd7 function parseCsvFile(fileName, options, onNext, onComplete) { var lineNb = 0, header = [], buffer = ''; var pattern = /(?:^|,)("(?:[^"]*)"|[^,]*)/g; @@ -32,6 +32,10 @@ function parseCsvFile(fileName, options, callback) { // Process the last, complete line of the file (skipped if empty) processLine(buffer, 1); if (onComplete) { onComplete(); } }); function processLine(line, idx) @@ -47,12 +51,12 @@ function parseCsvFile(fileName, options, callback) } else { onNext(buildRecord(line)); } } else { onNext(extractFields(line)); } } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -21,7 +21,13 @@ inputCsv = inputFileName + '.csv'; var options = { readOptions: { encoding: encoding } }; //~ options.hasHeader = true; //~ options.fieldIndexes = [ 1, 3, 4 ]; parseCsvFile(inputCsv, options, function onNext(record) { console.log(record); }, function onComplete() { console.log('Done'); } ); -
PhiLhoSoft revised this gist
Feb 23, 2016 . 2 changed files with 31 additions and 21 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -7,32 +7,13 @@ var fs = require('fs'); // http://blog.james-carr.org/2010/07/07/parsing-csv-files-with-nodejs/ // https://gist.github.com/PhiLhoSoft/ff60eefcb8ed43326cd7 function parseCsvFile(fileName, options, callback) { var lineNb = 0, header = [], buffer = ''; var pattern = /(?:^|,)("(?:[^"]*)"|[^,]*)/g; var stream = fs.createReadStream(fileName, options.readOptions); stream.on('data', function (data) // data is a Buffer { // Add chunk of data to remainder of previous chunk => the CSV file MUST have a blank line at the end! @@ -114,3 +95,5 @@ function parseCsvFile(fileName, options, callback) return fields; } } module.exports = parseCsvFile; This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,27 @@ // Parses a CSV file. // // Made to run on the Node.js platform. // Should accept more encodings, using a conversion library. var fs = require('fs'); var parseCsvFile = require('./ParseCsvFile'); var arguments = process.argv; var inputFileName = arguments[2], encoding; if (inputFileName === undefined) { // arguments[0] = "node", arguments[1] = file name console.log('Usage: ' + arguments[0] + ' ' + arguments[1] + ' filePath [encoding]'); return; } encoding = arguments[4] || 'utf8'; // 'ascii' or 'utf8' inputCsv = inputFileName + '.csv'; var options = { readOptions: { encoding: encoding } }; //~ options.hasHeader = true; //~ options.fieldIndexes = [ 1, 3, 4 ]; parseCsvFile(inputCsv, options, function (record) { console.log(record); }); -
PhiLhoSoft revised this gist
Feb 23, 2016 . 1 changed file with 15 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -20,7 +20,8 @@ encoding = arguments[4] || 'utf8'; // 'ascii' or 'utf8' inputCsv = inputFileName + '.csv'; //~ var options = { hasHeader: true }; //~ var options = { fieldIndexes: [ 1, 3, 4 ] }; var options = {}; parseCsvFile(inputCsv, options, function (record) { console.log(record); @@ -31,7 +32,7 @@ function parseCsvFile(fileName, options, callback) { var lineNb = 0, header = [], buffer = ''; var pattern = /(?:^|,)("(?:[^"]*)"|[^,]*)/g; var stream = fs.createReadStream(fileName, { encoding: encoding }); stream.on('data', function (data) // data is a Buffer { // Add chunk of data to remainder of previous chunk => the CSV file MUST have a blank line at the end! @@ -94,10 +95,19 @@ function parseCsvFile(fileName, options, callback) { if (index % 2 === 0) return; // Skip, that's the separator index = Math.floor(index / 2); var idx; if (options.fieldIndexes !== undefined) { idx = options.fieldIndexes.findIndex(function (v) { return v === index; }); if (idx === -1) return; } else { idx = index; } fields[idx] = value.replace(/"/g, ''); }) -
PhiLhoSoft created this gist
Feb 23, 2016 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,4 @@ Area Type,Town name,number value,region,sub-region "Unknown Area Type","Hlegu",1511.19895194,"Yangon","Yangon, (North)" "Unknown Area Type","Cocokyun",33.8113207395,"Yangon","Yangon, (South)" "Unknown Area Type","Mese",1818.94431751,"Kayah","Bawlake" This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,106 @@ // Parses a CSV file. // If told it has a header, the callback is called with an object (per line) where the keys are the header names. // If asked to return field indexes, the callback is called with an array (per line) with the extracted values. // // Made to run on the Node.js platform. // Should accept more encodings, using a conversion library. var fs = require('fs'); var arguments = process.argv; var inputFileName = arguments[2], encoding; if (inputFileName === undefined) { // arguments[0] = "node", arguments[1] = file name console.log('Usage: ' + arguments[0] + ' ' + arguments[1] + ' filePath [encoding]'); return; } encoding = arguments[4] || 'utf8'; // 'ascii' or 'utf8' inputCsv = inputFileName + '.csv'; //~ var options = { hasHeader: true }; var options = { fieldIndexes: [ 1, 3, 4 ] }; parseCsvFile(inputCsv, options, function (record) { console.log(record); }); // http://blog.james-carr.org/2010/07/07/parsing-csv-files-with-nodejs/ function parseCsvFile(fileName, options, callback) { var lineNb = 0, header = [], buffer = ''; var pattern = /(?:^|,)("(?:[^"]*)"|[^,]*)/g; var stream = fs.createReadStream(fileName); stream.on('data', function (data) // data is a Buffer { // Add chunk of data to remainder of previous chunk => the CSV file MUST have a blank line at the end! buffer += data.toString(); // Cut the content of the buffer in lines var lines = buffer.split(/[\r\n]+/); lines.forEach(function(line, idx) { // Don't process the last, partial line of this chunk if (idx === lines.length - 1) return; processLine(line, idx); }); buffer = lines[lines.length - 1]; }); stream.on('end', function () { // Process the last, complete line of the file (skipped if empty) processLine(buffer, 1); }); function processLine(line, idx) { if (line === '') return; // Skip empty lines if (options.hasHeader) { if (lineNb++ === 0 && idx === 0) { header = line.split(pattern); } else { callback(buildRecord(line)); } } else { callback(extractFields(line)); } } function buildRecord(line) { var record = {}; line.split(pattern).forEach(function (value, index) { if (header[index] !== '') { record[header[index]] = value.replace(/"/g, ''); } }) return record; } function extractFields(line) { var fields = []; line.split(pattern).forEach(function(value, index) { if (index % 2 === 0) return; // Skip, that's the separator var idx = options.fieldIndexes.findIndex(function (v) { return v === Math.floor(index / 2); }); if (idx === -1) return; fields[idx] = value.replace(/"/g, ''); }) return fields; } }