"""
This script retrieves all proxy addresses from https://free-proxy-list.net (all pages).
Proxy list can be used for ip rotation to get around bot protection.
"""

from selenium import webdriver
from bs4 import BeautifulSoup


class Proxy:

    def __init__(self, ip, code, type, https):
        self.ip = ip
        self.code = code
        self.type = type
        self.https = https

class ProxyScrapper:

    def init(self):
        chromeOptions = webdriver.ChromeOptions()
        prefs = {'profile.managed_default_content_settings.images': 2, 'disk-cache-size': 4096}
        chromeOptions.add_experimental_option('prefs', prefs)
        chromeOptions.add_argument("--headless")
        self.driver = webdriver.Chrome(executable_path='drivers/chromedriver', options=chromeOptions)
        self.driver.implicitly_wait(10)

    def tear_down(self):
        self.driver.close()

    def scrape_proxies(self):
        home_url = 'https://free-proxy-list.net'
        self.driver.get(home_url)
        proxy_list = []
        is_next_disabled = 'disabled' in self.driver.find_element_by_css_selector('#proxylisttable_next').get_attribute(
            "class")

        while not is_next_disabled:
            soup = BeautifulSoup(self.driver.page_source, 'html.parser')
            table = soup.findAll('tr', {'class': 'odd'}) + soup.findAll('tr', {'class': 'even'})
            for row in table:
                data = row.findAll('td')
                proxy_list.append(Proxy(data[0].string + ':' + data[1].string, data[2], data[4], data[6]))

            next_btn = self.driver.find_element_by_css_selector('#proxylisttable_next>a')
            next_btn.click()
            is_next_disabled = 'disabled' in self.driver.find_element_by_css_selector('#proxylisttable_next').get_attribute(
                "class")

        return proxy_list

    def get_proxy_list(self):
        self.init()
        result = []
        try:
            result = self.scrape_proxies()
        finally:
            self.tear_down()
        return result


proxy_scrapper = ProxyScrapper()
for proxy in proxy_scrapper.get_proxy_list():
    print(proxy.ip)