Created
June 7, 2016 07:45
-
-
Save efazati/fe35d84ee9d1f760f4b5230ef29609a8 to your computer and use it in GitHub Desktop.
Revisions
-
efazati created this gist
Jun 7, 2016 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,64 @@ #-*- coding: utf-8 -*- from lxml import html import requests from pymongo import MongoClient from pprint import pprint import urllib import telepot from datetime import datetime client = MongoClient('mongodb://localhost:27017/') token = '' chat_id = '@...' url = "http://www.yjc.ir/fa/photo" db = client['telepy'] article_obj = db.article element = '' def data_gathering(): page = requests.get(url) tree = html.fromstring(page.content) element = tree articles = tree.find_class('ax_faal') result = [] for article in articles: item = {} item['img'] = article.cssselect('img')[0].values()[-1] addr = article.cssselect('a') if addr: item['url'] = addr[0].values()[1] item['title'] = article.cssselect('.title_txt1')[0].text item['data'] = datetime.now() item['source'] = 'yjc' result.append(item) return result def submit_data(bot, row): if store_db(row): rawimg = urllib.urlopen(row['img']) print 'submit img url', row['img'] print datetime.now() return bot.sendPhoto(chat_id, ('newsimage.jpg', rawimg), caption='%s - @axekhabar' % row['title']) return def store_db(row): article = article_obj.find_one({"img": row['img']}) if not article: id = article_obj.insert_one(row).inserted_id return True return False def submit_alldata(data): bot = telepot.Bot(token) me = bot.getMe() # print me for row in data: submit_data(bot, row) # print row['title'] print 'started', datetime.now() result = data_gathering() submit_alldata(result)