#!/usr/bin/env python # coding=utf8 # author=evi1m0#n0tr00t # Fri Apr 10 14:14:35 2015 import os import re import sys import wget import requests import urlparse import threadpool as tp def _archives(author): archives_url = 'http://hi.baidu.com/{}/archive'.format(author) print '[*] Target URL: {}'.format(archives_url) year_content = requests.get(archives_url).content years = re.findall('

(.*?)', year_content)[0] months = re.findall('', month_content) for u in urls: archives_list.append(u) return archives_list def main(url): _page = requests.get(url).content _title = re.findall('

(.*?)

', _page)[0] _filename = '{author}/{title}'.format(author=sys.argv[1], title=_title) print '[+] Download: {}'.format(_title) try: wget.download(url, out=_filename, bar='') except Exception, e: print '[-] Error: ' + str(e) if __name__ == '__main__': if len(sys.argv) == 1: print '[-] Usage: {} Blog_name'.format(sys.argv[0]) print '[-] Example: {} evi1m0'.format(sys.argv[0]) sys.exit() author = sys.argv[1] if not os.path.exists(author): os.mkdir(author) archives = _archives(author) print '[*] Archives statistics: {}'.format(len(archives)) # threadpool pool = tp.ThreadPool(30) reqs = tp.makeRequests(main, archives) [pool.putRequest(req) for req in reqs] pool.wait()