Skip to content

Instantly share code, notes, and snippets.

@astur
Created May 22, 2016 03:30
Show Gist options
  • Save astur/2b3258a7991d2bc83d07670f27036fb0 to your computer and use it in GitHub Desktop.
Save astur/2b3258a7991d2bc83d07670f27036fb0 to your computer and use it in GitHub Desktop.

Revisions

  1. astur created this gist May 22, 2016.
    41 changes: 41 additions & 0 deletions index.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,41 @@
    var tress = require('tress');
    var needle = require('needle');
    var cheerio = require('cheerio');
    var resolve = require('url').resolve;
    var fs = require('fs');

    var URL = 'http://www.ferra.ru/ru/techlife/news/';
    var results = [];

    var q = tress(function(url, callback){
    needle.get(url, function(err, res){
    if (err) throw err;

    var $ = cheerio.load(res.body);

    if($('.b_infopost').contents().eq(2).text().trim().slice(0, -1) === 'Алексей Козлов'){
    results.push({
    title: $('h1').text(),
    date: $('.b_infopost>.date').text(),
    href: url,
    size: $('.newsbody').text().length
    });
    }

    $('.b_rewiev p>a').each(function() {
    q.push($(this).attr('href'));
    });

    $('.bpr_next>a').each(function() {
    q.push(resolve(URL, $(this).attr('href')));
    });

    callback();
    });
    }, 10);

    q.drain = function(){
    fs.writeFileSync('./data.json', JSON.stringify(results, null, 4));
    }

    q.push(URL);
    14 changes: 14 additions & 0 deletions package.json
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,14 @@
    {
    "private": true,
    "name": "ferra-scraper",
    "version": "0.0.1",
    "description": "Web scraping example for habrahabr",
    "main": "index.js",
    "author": "astur <[email protected]> (http://kozlov.am/)",
    "license": "WTFPL",
    "dependencies": {
    "cheerio": "^0.20.0",
    "needle": "^1.0.0",
    "tress": "^1.0.0"
    }
    }