Skip to content

Instantly share code, notes, and snippets.

@PhiLhoSoft
Last active November 28, 2024 12:12
Show Gist options
  • Save PhiLhoSoft/ff60eefcb8ed43326cd7 to your computer and use it in GitHub Desktop.
Save PhiLhoSoft/ff60eefcb8ed43326cd7 to your computer and use it in GitHub Desktop.

Revisions

  1. PhiLhoSoft revised this gist Feb 23, 2016. 2 changed files with 17 additions and 7 deletions.
    10 changes: 7 additions & 3 deletions ParseCsvFile.js
    Original file line number Diff line number Diff line change
    @@ -9,7 +9,7 @@ var fs = require('fs');

    // http://blog.james-carr.org/2010/07/07/parsing-csv-files-with-nodejs/
    // https://gist.github.com/PhiLhoSoft/ff60eefcb8ed43326cd7
    function parseCsvFile(fileName, options, callback)
    function parseCsvFile(fileName, options, onNext, onComplete)
    {
    var lineNb = 0, header = [], buffer = '';
    var pattern = /(?:^|,)("(?:[^"]*)"|[^,]*)/g;
    @@ -32,6 +32,10 @@ function parseCsvFile(fileName, options, callback)
    {
    // Process the last, complete line of the file (skipped if empty)
    processLine(buffer, 1);
    if (onComplete)
    {
    onComplete();
    }
    });

    function processLine(line, idx)
    @@ -47,12 +51,12 @@ function parseCsvFile(fileName, options, callback)
    }
    else
    {
    callback(buildRecord(line));
    onNext(buildRecord(line));
    }
    }
    else
    {
    callback(extractFields(line));
    onNext(extractFields(line));
    }
    }

    14 changes: 10 additions & 4 deletions TestParseCsvFile.js
    Original file line number Diff line number Diff line change
    @@ -21,7 +21,13 @@ inputCsv = inputFileName + '.csv';
    var options = { readOptions: { encoding: encoding } };
    //~ options.hasHeader = true;
    //~ options.fieldIndexes = [ 1, 3, 4 ];
    parseCsvFile(inputCsv, options, function (record)
    {
    console.log(record);
    });
    parseCsvFile(inputCsv, options,
    function onNext(record)
    {
    console.log(record);
    },
    function onComplete()
    {
    console.log('Done');
    }
    );
  2. PhiLhoSoft revised this gist Feb 23, 2016. 2 changed files with 31 additions and 21 deletions.
    25 changes: 4 additions & 21 deletions ParseCsvFile.js
    Original file line number Diff line number Diff line change
    @@ -7,32 +7,13 @@

    var fs = require('fs');

    var arguments = process.argv;

    var inputFileName = arguments[2], encoding;
    if (inputFileName === undefined)
    {
    // arguments[0] = "node", arguments[1] = file name
    console.log('Usage: ' + arguments[0] + ' ' + arguments[1] + ' filePath [encoding]');
    return;
    }
    encoding = arguments[4] || 'utf8'; // 'ascii' or 'utf8'
    inputCsv = inputFileName + '.csv';

    //~ var options = { hasHeader: true };
    //~ var options = { fieldIndexes: [ 1, 3, 4 ] };
    var options = {};
    parseCsvFile(inputCsv, options, function (record)
    {
    console.log(record);
    });

    // http://blog.james-carr.org/2010/07/07/parsing-csv-files-with-nodejs/
    // https://gist.github.com/PhiLhoSoft/ff60eefcb8ed43326cd7
    function parseCsvFile(fileName, options, callback)
    {
    var lineNb = 0, header = [], buffer = '';
    var pattern = /(?:^|,)("(?:[^"]*)"|[^,]*)/g;
    var stream = fs.createReadStream(fileName, { encoding: encoding });
    var stream = fs.createReadStream(fileName, options.readOptions);
    stream.on('data', function (data) // data is a Buffer
    {
    // Add chunk of data to remainder of previous chunk => the CSV file MUST have a blank line at the end!
    @@ -114,3 +95,5 @@ function parseCsvFile(fileName, options, callback)
    return fields;
    }
    }

    module.exports = parseCsvFile;
    27 changes: 27 additions & 0 deletions TestParseCsvFile.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,27 @@
    // Parses a CSV file.
    //
    // Made to run on the Node.js platform.
    // Should accept more encodings, using a conversion library.

    var fs = require('fs');
    var parseCsvFile = require('./ParseCsvFile');

    var arguments = process.argv;

    var inputFileName = arguments[2], encoding;
    if (inputFileName === undefined)
    {
    // arguments[0] = "node", arguments[1] = file name
    console.log('Usage: ' + arguments[0] + ' ' + arguments[1] + ' filePath [encoding]');
    return;
    }
    encoding = arguments[4] || 'utf8'; // 'ascii' or 'utf8'
    inputCsv = inputFileName + '.csv';

    var options = { readOptions: { encoding: encoding } };
    //~ options.hasHeader = true;
    //~ options.fieldIndexes = [ 1, 3, 4 ];
    parseCsvFile(inputCsv, options, function (record)
    {
    console.log(record);
    });
  3. PhiLhoSoft revised this gist Feb 23, 2016. 1 changed file with 15 additions and 5 deletions.
    20 changes: 15 additions & 5 deletions ParseCsvFile.js
    Original file line number Diff line number Diff line change
    @@ -20,7 +20,8 @@ encoding = arguments[4] || 'utf8'; // 'ascii' or 'utf8'
    inputCsv = inputFileName + '.csv';

    //~ var options = { hasHeader: true };
    var options = { fieldIndexes: [ 1, 3, 4 ] };
    //~ var options = { fieldIndexes: [ 1, 3, 4 ] };
    var options = {};
    parseCsvFile(inputCsv, options, function (record)
    {
    console.log(record);
    @@ -31,7 +32,7 @@ function parseCsvFile(fileName, options, callback)
    {
    var lineNb = 0, header = [], buffer = '';
    var pattern = /(?:^|,)("(?:[^"]*)"|[^,]*)/g;
    var stream = fs.createReadStream(fileName);
    var stream = fs.createReadStream(fileName, { encoding: encoding });
    stream.on('data', function (data) // data is a Buffer
    {
    // Add chunk of data to remainder of previous chunk => the CSV file MUST have a blank line at the end!
    @@ -94,10 +95,19 @@ function parseCsvFile(fileName, options, callback)
    {
    if (index % 2 === 0)
    return; // Skip, that's the separator
    index = Math.floor(index / 2);

    var idx = options.fieldIndexes.findIndex(function (v) { return v === Math.floor(index / 2); });
    if (idx === -1)
    return;
    var idx;
    if (options.fieldIndexes !== undefined)
    {
    idx = options.fieldIndexes.findIndex(function (v) { return v === index; });
    if (idx === -1)
    return;
    }
    else
    {
    idx = index;
    }

    fields[idx] = value.replace(/"/g, '');
    })
  4. PhiLhoSoft created this gist Feb 23, 2016.
    4 changes: 4 additions & 0 deletions Example.csv
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,4 @@
    Area Type,Town name,number value,region,sub-region
    "Unknown Area Type","Hlegu",1511.19895194,"Yangon","Yangon, (North)"
    "Unknown Area Type","Cocokyun",33.8113207395,"Yangon","Yangon, (South)"
    "Unknown Area Type","Mese",1818.94431751,"Kayah","Bawlake"
    106 changes: 106 additions & 0 deletions ParseCsvFile.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,106 @@
    // Parses a CSV file.
    // If told it has a header, the callback is called with an object (per line) where the keys are the header names.
    // If asked to return field indexes, the callback is called with an array (per line) with the extracted values.
    //
    // Made to run on the Node.js platform.
    // Should accept more encodings, using a conversion library.

    var fs = require('fs');

    var arguments = process.argv;

    var inputFileName = arguments[2], encoding;
    if (inputFileName === undefined)
    {
    // arguments[0] = "node", arguments[1] = file name
    console.log('Usage: ' + arguments[0] + ' ' + arguments[1] + ' filePath [encoding]');
    return;
    }
    encoding = arguments[4] || 'utf8'; // 'ascii' or 'utf8'
    inputCsv = inputFileName + '.csv';

    //~ var options = { hasHeader: true };
    var options = { fieldIndexes: [ 1, 3, 4 ] };
    parseCsvFile(inputCsv, options, function (record)
    {
    console.log(record);
    });

    // http://blog.james-carr.org/2010/07/07/parsing-csv-files-with-nodejs/
    function parseCsvFile(fileName, options, callback)
    {
    var lineNb = 0, header = [], buffer = '';
    var pattern = /(?:^|,)("(?:[^"]*)"|[^,]*)/g;
    var stream = fs.createReadStream(fileName);
    stream.on('data', function (data) // data is a Buffer
    {
    // Add chunk of data to remainder of previous chunk => the CSV file MUST have a blank line at the end!
    buffer += data.toString();
    // Cut the content of the buffer in lines
    var lines = buffer.split(/[\r\n]+/);
    lines.forEach(function(line, idx)
    {
    // Don't process the last, partial line of this chunk
    if (idx === lines.length - 1) return;
    processLine(line, idx);
    });
    buffer = lines[lines.length - 1];
    });
    stream.on('end', function ()
    {
    // Process the last, complete line of the file (skipped if empty)
    processLine(buffer, 1);
    });

    function processLine(line, idx)
    {
    if (line === '')
    return; // Skip empty lines

    if (options.hasHeader)
    {
    if (lineNb++ === 0 && idx === 0)
    {
    header = line.split(pattern);
    }
    else
    {
    callback(buildRecord(line));
    }
    }
    else
    {
    callback(extractFields(line));
    }
    }

    function buildRecord(line)
    {
    var record = {};
    line.split(pattern).forEach(function (value, index)
    {
    if (header[index] !== '')
    {
    record[header[index]] = value.replace(/"/g, '');
    }
    })
    return record;
    }

    function extractFields(line)
    {
    var fields = [];
    line.split(pattern).forEach(function(value, index)
    {
    if (index % 2 === 0)
    return; // Skip, that's the separator

    var idx = options.fieldIndexes.findIndex(function (v) { return v === Math.floor(index / 2); });
    if (idx === -1)
    return;

    fields[idx] = value.replace(/"/g, '');
    })
    return fields;
    }
    }