Skip to content

Instantly share code, notes, and snippets.

@parkerproject
Forked from mickaelandrieu/quotes_json.js
Created February 6, 2014 21:08
Show Gist options
  • Select an option

  • Save parkerproject/8852499 to your computer and use it in GitHub Desktop.

Select an option

Save parkerproject/8852499 to your computer and use it in GitHub Desktop.

Revisions

  1. @mickaelandrieu mickaelandrieu created this gist Nov 26, 2013.
    86 changes: 86 additions & 0 deletions quotes_json.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,86 @@
    var links = [];
    var quotes = [];
    var tempUrl = [];
    var infos = [];

    var maxLinks = 10;
    var firstUrl = 'http://www.imdb.com/search/title?at=0&num_votes=5000,&sort=user_rating,desc&start=1&title_type=tv_infoss';

    var newUrl;
    var x = require('casper').selectXPath;
    var fs = require('fs');
    var utils = require('utils');

    var casper = require('casper').create({
    verbose: true,
    logLevel: 'error',
    pageSettings: {
    loadImages: false,
    loadPlugins: false,
    userAgent: 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36'
    }
    });

    //Fonctions------------------------

    function getLinks() {
    var links = document.querySelectorAll('.results td.image a');
    return Array.prototype.map.call(links, function(e) {
    var href = e.getAttribute('href');
    var url = 'http://www.imdb.com'+href+'quotes/';
    return url;
    });
    }

    function Quote(innerText) {
    this.innerText = innerText;
    };

    casper.renderJSON = function(what) {
    return this.echo(JSON.stringify(what, null, ' '));
    };

    casper.saveJSON = function(what) {
    fs.write('json/quotes.json', JSON.stringify(what, null, ' '), 'w');
    };

    //Crawl------------------------

    casper.start(firstUrl);

    casper.then(function() {
    links = this.evaluate(getLinks);
    var j = 0;
    this.eachThen(links,function(response){
    j++;
    if(j >= maxLinks) return;

    this.thenOpen(response.data, function writeOnJson() {

    var objectsCount = this.evaluate(function(){
    return __utils__.findAll('.list .quote p').length;
    });

    var objects = this.evaluate(function(){
    return __utils__.findAll('.list .quote p');
    });

    quotes = new Array();
    if(objectsCount != undefined) {
    for(i = 0; i < objectsCount; i++) {
    if(objects[i] != null) {
    var quote = new Quote(objects[i]['innerText']);
    quotes.push(quote);
    }
    }
    }
    });
    });

    });

    casper.run(function() {
    this.saveJSON(quotes);
    this.echo('quotes :'+quotes.length)
    this.renderJSON(quotes).exit();
    });