#!/usr/bin/python # coding:utf-8 import requests import re import time sess = requests.Session() def extractData(regex, content, index=1): r = None p = re.compile(regex) m = p.search(content) if m: r = m.group(index) return r def trans_cookies_str(cookies_str): cookies = {} for line in cookies_str.split(';'): name,value = line.strip().split('=',1) cookies[name] = value return cookies def notify(text, desp): # wechat msg api http://sc.ftqq.com/3.version url = '' sess.get(url, params={ 'text': text, 'desp': desp }) class JDCrawly: def __init__(self, id, cookies): self.id = id self.title = None self.state = None self.price = 0 self.headers = { 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36', 'ContentType': 'text/html; charset=utf-8', 'Accept-Encoding':'gzip, deflate, sdch', 'Accept-Language':'zh-CN,zh;q=0.8', 'Connection' : 'keep-alive', } self.cookies = trans_cookies_str(cookies) self.sess = sess def update(self): url = "https://item.m.jd.com/product/%s.html" %self.id print 'request.. %s' %url r = self.sess.get(url, headers=self.headers, cookies=self.cookies) print 'ok, got it. parsing...' # pattern = re.compile(r'var jap = ({[^}]*})') # 提取 json self.title = extractData(r'goodName" value="(.*)"', r.text) self.state = extractData(r'stockState:\'(.*)\'', r.text) self.price = extractData(r'skuPrice:\'(.*)\'', r.text) if self.title == None: print r.text return r.elapsed.total_seconds() * 1000 def buyable(self): return self.state != u'无货' and self.title != None and len(self.title) > 0 if __name__ == '__main__': # good_id = 188000 good_id = 3495459 # 1000x pd = JDCrawly(id=good_id, cookies='') # 填入 chrome cookies 字符串 while (True): dur = pd.update() print pd.state, pd.price, pd.title, pd.buyable(), dur if pd.buyable(): notify(u'有货了!' + pd.price, pd.title) break time.sleep(1)