Created
January 21, 2014 20:15
-
-
Save joseraya/8547524 to your computer and use it in GitHub Desktop.
Revisions
-
joseraya created this gist
Jan 21, 2014 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,105 @@ var Browser = require('zombie'), url = require('url'), fs = require('fs'), $q = require('Q'), saveDir = __dirname + '/_snapshots'; var scriptTagRegex = /<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi; var stripScriptTags = function(html) { return html.replace(scriptTagRegex, ''); } var mkdirParent = function(dirPath, mode, callback) { //Call the standard fs.mkdir fs.mkdir(dirPath, mode, function(error) { //When it fail in this way, do the custom steps if (error && error.errno === 34) { //Create all the parents recursively fs.mkdirParent(path.dirname(dirPath), mode, callback); //And then the directory fs.mkdirParent(dirPath, mode, callback); } //Manually run the callback since we used our own callback to do all these callback && callback(error); }); }; var saveSnapshot = function(uri, body) { var lastIdx = uri.lastIndexOf('#!/'); if (lastIdx < 0) { // If we're using html5mode path = url.parse(uri).pathname; } else { // If we're using hashbang mode path = uri.substring(lastIdx + 2, uri.length); } if (path === '/') path = "/index.html"; if (path.indexOf('.html') == -1) path += ".html"; var filename = saveDir + path; console.log("Saving ", uri, " to ", filename); var dirname = require("path").dirname(filename); mkdirParent(dirname); fs.open(filename, 'w', function(e, fd) { if (e) return; fs.write(fd, body); }); }; var browserOpts = { waitFor: "100ms", loadCSS: false, waitDuration: "100ms" } var browser = new Browser(browserOpts); var crawlPage = function(idx, arr) { // location = window.location if (idx < arr.length) { var uri = arr[idx]; console.time("voy"); var promise = browser.visit(uri) .then(function() { console.timeEnd("voy"); var intervalId = setInterval(function() { console.log("checking status") var status = browser.body.getAttribute('data-status'); console.log(status); if (status === "ready") { clearInterval(intervalId); // Turn links into absolute links // and save them, if we need to // and we haven't already crawled them var links = browser.queryAll('a'); links.forEach(function(link) { var href = link.getAttribute('href'); var absUrl = url.resolve(uri, href); link.setAttribute('href', absUrl); if (arr.indexOf(absUrl) < 0) { arr.push(absUrl); } }); // Save saveSnapshot(uri, browser.html()); // Call again on the next iteration crawlPage(idx+1, arr); } }, 500); var d = $q.defer(); }); } } crawlPage(0, ["http://localhost:4000/#!/"]);