bennadel · March 3, 2017 12:49 · Mar 3, 2017
diff --git a/data.ndjson b/data.ndjson
@@ -0,0 +1,5 @@
+{"id":1,"name":"O Brother, Where Art Thou?"}
+{"id":2,"name":"Home for the Holidays"}
+{"id":3,"name":"The Firm"}
+{"id":4,"name":"Broadcast News"}
+{"id":5,"name":"Raising Arizona"}
diff --git a/test.js b/test.js
@@ -0,0 +1,106 @@
+// Require the core node modules.
+var chalk = require( "chalk" );
+var fileSystem = require( "fs" );
+var ndjson = require( "ndjson" );
+
+
+// ----------------------------------------------------------------------------------- //
+// ----------------------------------------------------------------------------------- //
+
+
+// Imagine that we are performing some sort of data migration and we have to move data
+// from one database to flat files; then transport those flat files elsewhere; then,
+// import those flat files into a different database.
+var records = [
+	{ id: 1, name: "O Brother, Where Art Thou?" },
+	{ id: 2, name: "Home for the Holidays" },
+	{ id: 3, name: "The Firm" },
+	{ id: 4, name: "Broadcast News" },
+	{ id: 5, name: "Raising Arizona" }
+	// .... hundreds of thousands of records ....
+];
+
+// Traditionally, we might store ONE JSON document PER FILE. However, this has some
+// serious implications once we move out of local development environment and into
+// production. As the JSON documents grow in size, we run the risk of running out of
+// memory (during the serialization and parsing process). To get around this, we can
+// use a slightly different storage format in which our data file is not ONE JSON
+// document PER FILE, but rather ONE JSON document PER LINE. This is known as "ndjson"
+// or "Newline-Delimited JSON". To use this format, we're going to create an ndjson
+// Transform stream (aka "through" stream) that takes each JavaScript object and
+// writes it as a newline-delimited String to the output stream (which will be a
+// file-output stream in our case).
+// --
+// NOTE: We're using .ndjson - NOT .json - for this storage format.
+var transformStream = ndjson.stringify();
+
+// Pipe the ndjson serialized output to the file-system.
+var outputStream = transformStream.pipe( fileSystem.createWriteStream( __dirname + "/data.ndjson" ) );
+
+// Iterate over the records and write EACH ONE to the TRANSFORM stream individually.
+// Each one of these records will become a line in the output file.
+records.forEach(
+	function iterator( record ) {
+
+		transformStream.write( record );
+
+	}
+);
+
+// Once we've written each record in the record-set, we have to end the stream so that
+// the TRANSFORM stream knows to flush and close the file output stream.
+transformStream.end();
+
+// Once ndjson has flushed all data to the output stream, let's indicate done.
+outputStream.on(
+	"finish",
+	function handleFinish() {
+
+		console.log( chalk.green( "ndjson serialization complete!" ) );
+		console.log( "- - - - - - - - - - - - - - - - - - - - - - -" );
+
+	}
+);
+
+
+// ----------------------------------------------------------------------------------- //
+// ----------------------------------------------------------------------------------- //
+
+
+// Since the stream actions are event-driven (and asynchronous), we have to wait until
+// our output stream has been closed before we can try reading it back in.
+outputStream.on(
+	"finish",
+	function handleFinish() {
+
+		// When we read the file back into memory, ndjson will stream, buffer, and split
+		// the content based on the newline character. It will then parse each newline-
+		// delimited value as a JSON object and emit it from the TRANSFORM stream.
+		var inputStream = fileSystem.createReadStream( __dirname + "/data.ndjson" );
+		var transformStream = inputStream.pipe( ndjson.parse() );
+
+		transformStream
+			// Each "data" event will emit one item from our original record-set.
+			.on(
+				"data",
+				function handleRecord( data ) {
+
+					console.log( chalk.red( "Record (event):" ), data );
+
+				}
+			)
+
+			// Once ndjson has parsed all the input, let's indicate done.
+			.on(
+				"end",
+				function handleEnd() {
+
+					console.log( "- - - - - - - - - - - - - - - - - - - - - - -" );
+					console.log( chalk.green( "ndjson parsing complete!" ) );
+
+				}
+			)
+		;
+
+	}
+);