var cheerio = require('cheerio'); var request = require('request'); var fs = require('fs'); var exec = require('child_process').exec; var twitterAPI = require('node-twitter-api'); var phpsessid = ''; var consumerKey = ''; var consumerSecret = ''; var accessToken = ''; var accessTokenSecret = ''; var scraper = { data: { count: 0, companies: {} }, init: function(){ this.handleInitial(); }, clone: function(a) { return JSON.parse(JSON.stringify(a)); }, handleInitial: function(){ var initialData = fs.readFileSync('data.json').toString(); initialData = JSON.parse(initialData); console.log("Initially known " + initialData.count); this.data = this.clone(initialData); var that = this; setInterval(function(){ that.getPage.call(that, that.handleNewHTML); }, 60000); }, getPage: function(callback){ var that = this; var j = request.jar(); var cookie = request.cookie('PHPSESSID=' + phpsessid); var url = 'https://channeli.in/placement/results/2014/company/'; j.setCookie(cookie, url); request({url: url, jar: j}, function (error, response, html) { if (!error && response.statusCode == 200) { callback.call(that, html); } }); }, getPageMocked: function(callback){ var html = fs.readFileSync('test-1.html').toString(); callback.call(this,html); }, scrape: function(html){ var $ = cheerio.load(html); var count = $('table tr').last().children().first().text(); var companies = {}; var links = {}; $('table tr').each(function(i, element){ if (i !== 0) { var data = $(this).children(); var name = $(data[1]).text(); var selected = $(data[2]).text(); var link = 'https://channeli.in' + $(data[3]).children().last().attr('href'); companies[name] = selected; links[name] = link; } }); return { 'count': Number(count), 'companies': companies, 'links': links } }, handleNewHTML: function(html){ var newData = this.scrape(html) this.writeToFile('data.json', newData); if(newData.count !== this.data.count){ for(var i in newData.companies){ if(this.data.companies[i] === undefined){ this.notify(i, newData.companies[i], newData.links[i]); } } this.data = this.clone(newData); } }, getPeopleList: function(link, company){ var that = this; var j = request.jar(); var cookie = request.cookie('PHPSESSID=' + phpsessid); j.setCookie(cookie, link); request({url: link, jar: j}, function (error, response, html) { if (!error && response.statusCode == 200) { that.tweet.call(that, html, company); } }); }, getPeopleListMocked: function(link, company){ var html = fs.readFileSync('test-2.html').toString(); this.tweet(html, company); }, notify: function(name, count, link){ // sends a desktop notification var cmd = 'notify-send -u critical "'+name+ '" "'+count+' selected"'; console.log(name, count); exec(cmd); var tweet = name + ' has just released their results. '+ count +' selected. Details coming up soon.'; this.sendTweet(tweet); this.getPeopleList(link, name); }, tweet: function(html, company){ var $ = cheerio.load(html); var people = {}; var that = this; $('table tr').each(function(i, element){ if (i !== 0) { var data = $(this).children(); var name = $(data[2]).text(); var branch = $(data[3]).text(); var c = company.split(' '); if (c.length > 1) company = [c[0], c[1]].join(' '); else company = c[0]; var tweet = company + ': ' + name + '/' + branch; if(tweet.length > 140){ tweet = tweet.substring(0,140); } that.sendTweet(tweet); } }); }, sendTweet: function(tweet){ var twitter = new twitterAPI({ consumerKey: consumerKey, consumerSecret: consumerSecret, callback: 'http://localhost' }); var accessToken = accessToken; var accessTokenSecret = accessTokenSecret; twitter.statuses("update", { status: tweet }, accessToken, accessTokenSecret, function(error, data, response) { if (error) { console.error('Error in tweeting', tweet, error); } } ); }, writeToFile: function(file, data){ fs.writeFile(file, JSON.stringify(data), function(err) { if(err) { console.error('Failed to write to file', err); } }); } } scraper.init();