# -*- coding: utf8 -*-

from __future__ import absolute_import, division, print_function

import socket
from urlparse import urlparse


class DnsResolverMiddleware(object):
    """
        Downloader Middleware Class for address resolving.

        It resolves domain to IPv4 \ IPv6 addresses.
        Previously it was designed for IPv6-only hosts.

        Algorithm:
            1) Replace all domain names to addresses for a request.
            2) Form a new request.
            3) Perform this request (at Scrapy Engine).
            4) Get a response.
            5) Replaces addresses to names for this response.
            6) Return response.

        It works without any caching. So it may be quite slow.

        TODO:
            * Add caching [?].

    """

    dns_dict = None

    def process_request(self, request, spider):
        """
        Replaces all domain names to addresses and forms a new request.

        Also it replaces addresses to domain names
        for a Referer-header.

        :param request: the request being processed;
        :param spider: dummy parameter in this case;

        :return: None or a new request with replaced links.
        """

        meta = getattr(request, 'meta', None)
        if not meta:
            meta = dict()

        if not self.dns_dict:
            self.dns_dict = dict()

        # To resist infinite loop.
        if meta.get('resolved_request'):
            return None

        domain_url = request.url

        # Check if domain and address are computed already.
        domain = meta.get('domain', str())
        address = meta.get('address', str())
        if not address or not address:
            # Compute domain and IP-address.
            domain = self.parse_host(domain_url)
            address = self.compute_address(domain)

        self.dns_dict.setdefault(domain, [])
        self.dns_dict.setdefault(address, [])
        if address not in self.dns_dict[domain]:
            self.dns_dict[domain] += [address]
        if domain not in self.dns_dict[address]:
            self.dns_dict[address] += [domain]

        # Get a new url with an address instead of domain.
        address_url = self.convert_to_address(domain_url)

        # To replace `Referer`.
        headers = request.headers
        if not headers:
            headers = dict()

        # Get a old `Referer` with an address instead of domain.
        address_referer = request.headers.get('Referer', str())
        # Get a new `Referer` with a domain.
        domain_referer = self.convert_to_domain(address_referer)

        # Form a new headers dict.
        new_headers = dict(
            headers,
            # Store a new Host-header
            # for correct resolving on the requested server.
            Host=domain,
        )

        if domain_referer:
            new_headers = dict(
                new_headers,
                # Store a new Referer-header
                # for correct handling on the requested server
                Referer=domain_referer
            )

        # Form a new meta dict.
        new_meta = dict(
            meta,
            # Mark this request as processed to resist infinite loop.
            resolved_request=True,
            # Store some auxiliary data.
            # It helps us to escape from unnecessary computations
            # and debug'em all.
            dns_dict=self.dns_dict,
            domain=domain,
            address=address,
            url=dict(
                domain=domain_url,
                address=address_url,
            ),
            referer=dict(
                domain=domain_referer,
                address=address_referer,
            ),
        )
        # Form a new request.
        new_request = request.replace(
            url=address_url,
            headers=new_headers,
            meta=new_meta,
        )
        return new_request

    def process_response(self, request, response, spider):
        """
        Tries to replace addresses to domain names.

        Replaces are not so successfully as I expects.
        May be bug in Scrapy Engine.
        Nevertheless:

            1) It gets `domain` and `address` from request's meta.
            2) It replaces `address` to `domain` in request's url.
            3) It replaces `address` to `domain` in response's url.
            4) It forms a new response.

        :param request: the request that originated the response;
        :param response: the response being processed;
        :param spider: dummy parameter in this case;

        :return: None or a new request with replaced links.
        """

        # Get meta from request. It is important.
        meta = getattr(request, 'meta', None)
        if not meta:
            meta = dict()
        if meta.get('resolved_response'):
            return None

        # Form a new url with domain instead of IP-address.
        url = response.url
        new_url = self.convert_to_domain(url)

        # Form a new meta dict.
        new_meta = dict(
            meta,
            # Mark this response as processed.
            resolved_response=True,
        )
        # Form a new request with our new url.
        new_request = request.replace(
            url=new_url,
            meta=new_meta
        )
        # Form a new request with our new url and request.
        response.replace(
            url=new_url,
            request=new_request
        )
        return response

    def convert_to_address(self, url):
        """
        Replaces a domain to an address in the url.

        This function is a stub for caching.

        :param str url:     original url;
        :return: a new url.
        :rtype: str
        """

        domain = self.parse_host(url)
        address_list = self.dns_dict.get(domain, [])
        for address in address_list:
            url = url.replace(domain, address)
        return url

    def convert_to_domain(self, url):
        """
        Replaces an address to a domain in the url.

        This function is a stub for caching.

        :param str url:     original url;
        :return: a new url.
        :rtype: str
        """

        address = self.parse_host(url)
        domain_list = self.dns_dict.get(address, [])
        for domain in domain_list:
            url = url.replace(address, domain)
        return url

    def parse_host(self, url):
        """
        Returns a host part of url.

        :param str url: source uri with IP or domain
        :return: host of given url
        :rtype: str
        """
        parsed_uri = urlparse(url)
        host = parsed_uri.netloc
        return host

    def compute_address(self, host_name):
        """
        Returns a http-compatible IP-address of the given host.

        This function is a stub for caching.

        :param str host_name: symbolic name of host;
        :return: string with IP-address.
        :rtype: str
        """
        address = self._compute_address(host_name)
        return address

    def _compute_address(self, host_name):
        """
        Returns a http-compatible IP-address of the given host.

        For IPv6 it wraps address into square brackets.

        :param str host_name: symbolic name of host;
        :return: string with IP-address.
        :rtype: str
        """
        addr_list = socket.getaddrinfo(host_name, 0)
        for addr in addr_list:
            (family, _socktype, _proto, _canonname, sockaddr) = addr
            if socket.AF_INET6 == family:
                (address, _port, _flow_info, _scope_id) = sockaddr
                address = '[{address}]'.format(
                    address=address
                )
                return address
            elif socket.AF_INET == family:
                (address, _port) = sockaddr
                return address
            return host_name