PhiLhoSoft · November 28, 2024 12:12 · Feb 23, 2016 · Feb 23, 2016 · Feb 23, 2016 · Feb 23, 2016
diff --git a/ParseCsvFile.js b/ParseCsvFile.js
@@ -9,7 +9,7 @@ var fs = require('fs');
 
 // http://blog.james-carr.org/2010/07/07/parsing-csv-files-with-nodejs/
 // https://gist.github.com/PhiLhoSoft/ff60eefcb8ed43326cd7
-function parseCsvFile(fileName, options, callback)
+function parseCsvFile(fileName, options, onNext, onComplete)
 {
 	var lineNb = 0, header = [], buffer = '';
 	var pattern = /(?:^|,)("(?:[^"]*)"|[^,]*)/g;
@@ -32,6 +32,10 @@ function parseCsvFile(fileName, options, callback)
 	{
 		// Process the last, complete line of the file (skipped if empty)
 		processLine(buffer, 1);
+		if (onComplete)
+		{
+			onComplete();
+		}
 	});
 
 	function processLine(line, idx)
@@ -47,12 +51,12 @@ function parseCsvFile(fileName, options, callback)
 			}
 			else
 			{
-				callback(buildRecord(line));
+				onNext(buildRecord(line));
 			}
 		}
 		else
 		{
-			callback(extractFields(line));
+			onNext(extractFields(line));
 		}
 	}
 

diff --git a/TestParseCsvFile.js b/TestParseCsvFile.js
@@ -21,7 +21,13 @@ inputCsv = inputFileName + '.csv';
 var options = { readOptions: { encoding: encoding } };
 //~ options.hasHeader = true;
 //~ options.fieldIndexes = [ 1, 3, 4 ];
-parseCsvFile(inputCsv, options, function (record)
-{
-	console.log(record);
-});
+parseCsvFile(inputCsv, options,
+	function onNext(record)
+	{
+		console.log(record);
+	},
+	function onComplete()
+	{
+		console.log('Done');
+	}
+);
diff --git a/ParseCsvFile.js b/ParseCsvFile.js
@@ -7,32 +7,13 @@
 
 var fs = require('fs');
 
-var arguments = process.argv;
-
-var inputFileName = arguments[2], encoding;
-if (inputFileName === undefined)
-{
-	// arguments[0] = "node", arguments[1] = file name
-	console.log('Usage: ' + arguments[0] + ' ' + arguments[1] + ' filePath [encoding]');
-	return;
-}
-encoding = arguments[4] || 'utf8'; // 'ascii' or 'utf8'
-inputCsv = inputFileName + '.csv';
-
-//~ var options = { hasHeader: true };
-//~ var options = { fieldIndexes: [ 1, 3, 4 ] };
-var options = {};
-parseCsvFile(inputCsv, options, function (record)
-{
-	console.log(record);
-});
-
 // http://blog.james-carr.org/2010/07/07/parsing-csv-files-with-nodejs/
+// https://gist.github.com/PhiLhoSoft/ff60eefcb8ed43326cd7
 function parseCsvFile(fileName, options, callback)
 {
 	var lineNb = 0, header = [], buffer = '';
 	var pattern = /(?:^|,)("(?:[^"]*)"|[^,]*)/g;
-	var stream = fs.createReadStream(fileName, { encoding: encoding });
+	var stream = fs.createReadStream(fileName, options.readOptions);
 	stream.on('data', function (data) // data is a Buffer
 	{
 		// Add chunk of data to remainder of previous chunk => the CSV file MUST have a blank line at the end!
@@ -114,3 +95,5 @@ function parseCsvFile(fileName, options, callback)
 		return fields;
 	}
 }
+
+module.exports = parseCsvFile;
diff --git a/TestParseCsvFile.js b/TestParseCsvFile.js
@@ -0,0 +1,27 @@
+// Parses a CSV file.
+//
+// Made to run on the Node.js platform.
+// Should accept more encodings, using a conversion library.
+
+var fs = require('fs');
+var parseCsvFile = require('./ParseCsvFile');
+
+var arguments = process.argv;
+
+var inputFileName = arguments[2], encoding;
+if (inputFileName === undefined)
+{
+	// arguments[0] = "node", arguments[1] = file name
+	console.log('Usage: ' + arguments[0] + ' ' + arguments[1] + ' filePath [encoding]');
+	return;
+}
+encoding = arguments[4] || 'utf8'; // 'ascii' or 'utf8'
+inputCsv = inputFileName + '.csv';
+
+var options = { readOptions: { encoding: encoding } };
+//~ options.hasHeader = true;
+//~ options.fieldIndexes = [ 1, 3, 4 ];
+parseCsvFile(inputCsv, options, function (record)
+{
+	console.log(record);
+});
diff --git a/ParseCsvFile.js b/ParseCsvFile.js
@@ -20,7 +20,8 @@ encoding = arguments[4] || 'utf8'; // 'ascii' or 'utf8'
 inputCsv = inputFileName + '.csv';
 
 //~ var options = { hasHeader: true };
-var options = { fieldIndexes: [ 1, 3, 4 ] };
+//~ var options = { fieldIndexes: [ 1, 3, 4 ] };
+var options = {};
 parseCsvFile(inputCsv, options, function (record)
 {
 	console.log(record);
@@ -31,7 +32,7 @@ function parseCsvFile(fileName, options, callback)
 {
 	var lineNb = 0, header = [], buffer = '';
 	var pattern = /(?:^|,)("(?:[^"]*)"|[^,]*)/g;
-	var stream = fs.createReadStream(fileName);
+	var stream = fs.createReadStream(fileName, { encoding: encoding });
 	stream.on('data', function (data) // data is a Buffer
 	{
 		// Add chunk of data to remainder of previous chunk => the CSV file MUST have a blank line at the end!
@@ -94,10 +95,19 @@ function parseCsvFile(fileName, options, callback)
 		{
 			if (index % 2 === 0)
 				return; // Skip, that's the separator
+			index = Math.floor(index / 2);
 
-			var idx = options.fieldIndexes.findIndex(function (v) { return v === Math.floor(index / 2); });
-			if (idx === -1)
-				return;
+			var idx;
+			if (options.fieldIndexes !== undefined)
+			{
+				idx = options.fieldIndexes.findIndex(function (v) { return v === index; });
+				if (idx === -1)
+					return;
+			}
+			else
+			{
+				idx = index;
+			}
 
 			fields[idx] = value.replace(/"/g, '');
 		})

diff --git a/Example.csv b/Example.csv
@@ -0,0 +1,4 @@
+Area Type,Town name,number value,region,sub-region
+"Unknown Area Type","Hlegu",1511.19895194,"Yangon","Yangon, (North)"
+"Unknown Area Type","Cocokyun",33.8113207395,"Yangon","Yangon, (South)"
+"Unknown Area Type","Mese",1818.94431751,"Kayah","Bawlake"
diff --git a/ParseCsvFile.js b/ParseCsvFile.js
@@ -0,0 +1,106 @@
+// Parses a CSV file.
+// If told it has a header, the callback is called with an object (per line) where the keys are the header names.
+// If asked to return field indexes, the callback is called with an array (per line) with the extracted values.
+//
+// Made to run on the Node.js platform.
+// Should accept more encodings, using a conversion library.
+
+var fs = require('fs');
+
+var arguments = process.argv;
+
+var inputFileName = arguments[2], encoding;
+if (inputFileName === undefined)
+{
+	// arguments[0] = "node", arguments[1] = file name
+	console.log('Usage: ' + arguments[0] + ' ' + arguments[1] + ' filePath [encoding]');
+	return;
+}
+encoding = arguments[4] || 'utf8'; // 'ascii' or 'utf8'
+inputCsv = inputFileName + '.csv';
+
+//~ var options = { hasHeader: true };
+var options = { fieldIndexes: [ 1, 3, 4 ] };
+parseCsvFile(inputCsv, options, function (record)
+{
+	console.log(record);
+});
+
+// http://blog.james-carr.org/2010/07/07/parsing-csv-files-with-nodejs/
+function parseCsvFile(fileName, options, callback)
+{
+	var lineNb = 0, header = [], buffer = '';
+	var pattern = /(?:^|,)("(?:[^"]*)"|[^,]*)/g;
+	var stream = fs.createReadStream(fileName);
+	stream.on('data', function (data) // data is a Buffer
+	{
+		// Add chunk of data to remainder of previous chunk => the CSV file MUST have a blank line at the end!
+		buffer += data.toString();
+		// Cut the content of the buffer in lines
+		var lines = buffer.split(/[\r\n]+/);
+		lines.forEach(function(line, idx)
+		{
+			// Don't process the last, partial line of this chunk
+			if (idx === lines.length - 1) return;
+			processLine(line, idx);
+		});
+		buffer = lines[lines.length - 1];
+	});
+	stream.on('end', function ()
+	{
+		// Process the last, complete line of the file (skipped if empty)
+		processLine(buffer, 1);
+	});
+
+	function processLine(line, idx)
+	{
+		if (line === '')
+			return; // Skip empty lines
+
+		if (options.hasHeader)
+		{
+			if (lineNb++ === 0 && idx === 0)
+			{
+				header = line.split(pattern);
+			}
+			else
+			{
+				callback(buildRecord(line));
+			}
+		}
+		else
+		{
+			callback(extractFields(line));
+		}
+	}
+
+	function buildRecord(line)
+	{
+		var record = {};
+		line.split(pattern).forEach(function (value, index)
+		{
+			if (header[index] !== '')
+			{
+				record[header[index]] = value.replace(/"/g, '');
+			}
+		})
+		return record;
+	}
+
+	function extractFields(line)
+	{
+		var fields = [];
+		line.split(pattern).forEach(function(value, index)
+		{
+			if (index % 2 === 0)
+				return; // Skip, that's the separator
+
+			var idx = options.fieldIndexes.findIndex(function (v) { return v === Math.floor(index / 2); });
+			if (idx === -1)
+				return;
+
+			fields[idx] = value.replace(/"/g, '');
+		})
+		return fields;
+	}
+}