# coding:utf-8


import sys
import os
import re
import time
import math
from urllib import request, error
from http import cookiejar


class Downloader():

    dir_name = './DL_images'
    user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8) ' \
                 + 'AppleWebKit/536.25 (KHTML, like Gecko) Version/6.0 Safari/536.25'
    a_tag_pattern = re.compile('<[\s]*a[\s]*href[\s]*=.*?>')
    a_link_pattern = re.compile('href[\s]*="(.*?)"')
    img_tag_pattern = re.compile('<[\s]*img[\s]*src[\s]*=.*?>')
    img_link_pattern = re.compile('src[\s]*="(.*?)"')
    img_format = ['jpg', 'jpeg', 'png', 'gif', 'bmp']

    def __init__(self, url, progress=None):
        self.url = url
        self.progress = progress

    def run_download(self):
        urls = self.__parse_html(self.fetch_html())
        self.__export_file(urls)

    def download(self, url, file_name):
        opener = request.build_opener()
        req = request.Request(url)
        req.add_header('User-agent', self.user_agent)
        try:
            conn = opener.open(req)
            with open(file_name, "wb") as img_file:
                img_file.write(conn.read())
        except (error.URLError, IOError) as e:
            pass

    def fetch_html(self):
        cj = cookiejar.CookieJar()
        opener = request.build_opener(request.HTTPCookieProcessor(cj))
        opener.addheaders = [('User-agent', self.user_agent)]
        html = None
        try:
            conn = opener.open(self.url)
        except error.URLError as e:
            pass
        else:
            html = conn.read().decode('utf-8')
        return html if html else ''

    def __parse_html(self, str):
        if not str: return []
        # fetch tag
        a_tag_list = self.a_tag_pattern.findall(str)
        img_tag_list = self.img_tag_pattern.findall(str)

        urls = []
        # fetch url
        for a_tag in a_tag_list:
            a_url_match = self.a_link_pattern.search(a_tag)
            if a_url_match:
                a_url = a_url_match.group(1)
                a_words = a_url.split('.')
                if a_words[-1].lower() in self.img_format:
                    urls.append(a_url)
        for img_tag in img_tag_list:
            img_url_match = self.img_link_pattern.search(img_tag)
            if img_url_match:
                img_url = img_url_match.group(1)
                img_words = img_url.split('.')
                if img_words[-1].lower() in self.img_format:
                    urls.append(img_url)
        return urls

    def __get_filename(self, path):
        if not path : return ''
        names = path.split('/')
        return names[-1]

    def __export_file(self, urls):
        if not urls : return
        times = str(time.time()).split('.')
        dir_name = self.dir_name + '_' + times[0] + '/'
        os.mkdir(dir_name)
        if self.progress: self.progress.set_origin(len(urls))
        count = 0
        for url in urls:
            file_name = dir_name + self.__get_filename(url)
            self.download(url, file_name)
            count += 1
            if self.progress: self.progress.show(count)


class Progress():

    max_gauge = 40

    def __init__(self):
        pass

    def set_origin(self, origin):
        self.origin = origin

    def show(self, increment):
        rate = self.__calc(increment)
        #lf = '\n' if rate == self.max_gauge else ''
        lf = ''
        val = '\rprogress: {0}{1}'.format('#' * rate, lf)
        sys.stdout.write(val)

    def __calc(self, increment):
        rate = round(increment / self.origin, 2)
        now_rate = math.ceil(self.max_gauge * rate)
        return now_rate

if __name__ == '__main__':
    param = sys.argv
    if len(param) < 2:
        print('no args')
        sys.exit()
    print('download start')
    dl = Downloader(param[1], Progress())
    dl.run_download()
    print('\n')
    print('download finish')