Skip to content

Instantly share code, notes, and snippets.

@ekyfauzi
Forked from juanbrujo/react-app-scrapping.js
Created October 18, 2017 07:24
Show Gist options
  • Select an option

  • Save ekyfauzi/1ca464ce0a94d79238be855c1de38f07 to your computer and use it in GitHub Desktop.

Select an option

Save ekyfauzi/1ca464ce0a94d79238be855c1de38f07 to your computer and use it in GitHub Desktop.

Revisions

  1. Jorge Epuñan created this gist Dec 9, 2016.
    66 changes: 66 additions & 0 deletions react-app-scrapping.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,66 @@
    var phantom = require('phantom');
    var Q = require('q');
    var cheerio = require('cheerio');
    var _ph, _page, _outObj;
    var url = ABSOLUTE_URL; // change here for your React app site

    phantom.create().then(ph => {
    _ph = ph;
    return _ph.createPage();
    }).then(page => {
    _page = page;
    return _page.open(url);
    }).then(status => {
    console.log(status);
    return waitState(textPopulated, 3);
    }).then(() => {
    return _page.property('content');
    }).then(content => {

    var $ = cheerio.load(content);
    var resultados = [];

    $('.item.panel.panel-default').each(function() {
    var title = $(this).find('.title').text();
    var link = $(this).find('a').attr('href');

    resultados.push( '<' + link + '|' + title + '>' );
    });

    _page.close();
    _ph.exit();

    }).catch(e => console.log(e));

    function textPopulated() {
    return _page.evaluate(function() {
    return document.querySelector('#app').outerHTML;
    }).then(function(html) {
    return html;
    });
    }

    function waitState(state, timeout) { // timeout in seconds is optional
    console.log('Start waiting for state: ' + state.name);

    var limitTime = timeout * 1000 || 20000;
    var startTime = new Date();

    return wait();

    function wait() {
    return state().then(function(result) {
    if (result) {
    console.log('Reached state: ' + state.name);
    return;
    } else if (new Date() - startTime > limitTime) {
    var errorMessage = 'Timeout state: ' + state.name;
    throw new Error(errorMessage);
    } else {
    return Q.delay(50).then(wait);
    }
    }).catch(function(error) {
    throw error;
    });
    }
    }