firebuggirl · December 6, 2017 20:57 · Dec 5, 2017 · Dec 5, 2017
diff --git a/Example_Output.txt b/Example_Output.txt
@@ -0,0 +1,42 @@
+
+MongoDB shell version v3.4.10
+connecting to: mongodb://127.0.0.1:27017/m121
+MongoDB server version: 3.4.10
+
+Chapter 3 Lab Lookup Pipelines Performance Comparison
+
+
+## start on air_alliances - using $filter
+
+
+{
+        "_id" : {
+                "keysExamined" : 0,
+                "docsExamined" : 3,
+                "nreturned" : 3,
+                "planSummary" : "COLLSCAN"
+        },
+        "nbRuns" : 10,
+        "minMillis" : 122,
+        "maxMillis" : 151,
+        "avgMillis" : 130.2
+}
+
+
+
+## start with $match on air_routes - only one $lookUp
+
+
+{
+        "_id" : {
+                "keysExamined" : 0,
+                "docsExamined" : 66985,
+                "nreturned" : 3,
+                "planSummary" : "COLLSCAN"
+        },
+        "nbRuns" : 10,
+        "minMillis" : 36,
+        "maxMillis" : 42,
+        "avgMillis" : 36.8
+}
+
diff --git a/README.md b/README.md
@@ -0,0 +1,46 @@
+## Comparing the Performance of Different MongoDB Aggregation Pipelines  
+Sometimes you have different ways to do an aggregation and you would like
+to compare the performance of the pipelines you came up with.  
+
+The script `pipeCompare.js` allows you to do this: it will run each pipeline several times and print out some statistical information.  
+#### General Setup  
+* download the script `pipeCompare.js` below  
+* In the Mongo Shell, enable profiling and load the script:
+```
+use <db>    // make sure you're on the correct database
+db.setProfilingLevel(2)    // enable profiling
+load("[<path-to>/]pipeCompare.js")
+```
+This loads the `config` object and the `comparePipes()` and `addPipe()` functions.  
+
+#### The config objet  
+```javascript
+config = {
+  title: "",        // you may set a title here
+  nbRuns: 10,       // number of times each pipeline will be run
+  defaultColl: "",  // default collection to run aggregate() on
+  printPipes: false,  // include the pipelines in the output
+  pipes: [],        // the pipelines, see below
+}
+```  
+Set `config.title`, `.nbRuns`, `.defaultColl` to suit your needs  
+#### Configure the Pipelines  
+```javascript
+pipe1 = [    // unique name for each pipeline
+  ...        // the pipeline stages
+]
+addPipe(pipe1, "collection", "description")
+```
+where `collection` is the name of the collection the aggregate() will act upon (if different from `config.defaultColl`)  
+The `description` is facultative (String)  
+
+**Repeat this for each pipeline you want to include in the comparison.**  
+#### Run the comparison  
+```javascript
+comparePipes()
+```
+If you want to save the output in a file, put all the above steps (including the `load()`) in a `<script>.js` file and run it from the command line:  
+```sh
+mongo <db> <script>.js > comparePipes.out
+```
+(For a nicely colored visualization in your editor, use `> comparePipes.out.js`)  
diff --git a/pipeCompare.js b/pipeCompare.js
@@ -0,0 +1,72 @@
+// Comparing the Performance of Different MongoDB Aggregation Pipelines
+// for instructions see the README.md file in this gist
+// license: MIT, (c) 2017 Ronald Stalder @ronalstal.gmail.com
+
+config = {
+  title: "",          // you may set a title here
+  nbRuns: 10,         // number of times each pipeline will be run
+  defaultColl: "",    // default collection to run aggregate() on
+  printPipes: false,  // include the pipelines in the output
+  pipes: [],          // the pipelines to compare
+}
+
+addPipe = function(pipeline, collection, description) {
+  pipe = {
+    pipe: pipeline,
+    coll: collection || config.defaultColl,
+    descr: description || "",
+  }
+  config.pipes.push(pipe)
+}
+
+comparePipes = function() {
+  title = config.title || "Comparing Performance of Pipes"
+  print("\n"+title+"\n\n")
+
+  // run the stats for each pipeline
+
+  config.pipes.forEach(pipe => {
+    print("## "+pipe.descr+"\n\n")
+    if ( config.printPipes ) {
+      printjson(pipe.pipe)
+      print("\n\n")
+    }
+    startStamp = new ISODate()
+    command = 'cursor = db.'+pipe.coll+'.aggregate(pipe.pipe)'
+    // run the aggregate nbRuns times
+    for (i=0; i<config.nbRuns; i++) {eval(command)}
+    // now aggregate the system.profile
+    _profilePipe = [
+      {"$match": {
+        "op": {$eq: "command"},
+        "ts": {$gte: startStamp},
+        "command.aggregate": {$eq: pipe.coll},
+      }},
+      {"$project": {
+        "_id": 0,
+        // info: fields for group key, same for all runs
+        "info.keysExamined" : "$keysExamined",
+      	"info.docsExamined" : "$docsExamined",
+      	"info.nreturned" : "$nreturned",
+        "info.planSummary" : "$planSummary",
+      	"millis" : 1,
+      }},
+      {"$group": {
+        "_id": "$info",
+        "nbRuns": {"$sum": 1},
+        "minMillis": {"$min": "$millis"},
+        "maxMillis": {"$max": "$millis"},
+        "avgMillis": {"$avg": "$millis"},
+      }},
+    ]
+    cursor = db.system.profile.aggregate(_profilePipe)
+    while ( cursor.hasNext() ) { printjson( cursor.next() )}
+    print("\n\n")
+  })
+}
+
+// internal use
+_truncate = function( field, decimals) {
+  const factor = Math.pow(10, decimals)
+  return JSON.parse('{"$divide": [ {"$trunc": {"$multiply": ["$'+field+'", '+factor+'] }}, '+factor+' ]}')
+}