Created
August 27, 2019 15:55
-
-
Save 6aditya8/c8ff33d6fc0c11de839bd9facf175cb6 to your computer and use it in GitHub Desktop.
Revisions
-
6aditya8 created this gist
Aug 27, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,39 @@ from bs4 import BeautifulSoup import random import requests USER_AGENT_SCRAPER_BASE_URL = 'http://www.useragentstring.com/pages/useragentstring.php?name=' POPULAR_BROWSERS = ['Chrome', 'Firefox', 'Mozilla', 'Safari', 'Opera', 'Opera Mini', 'Edge', 'Internet Explorer'] def get_user_agent_strings_for_this_browser(browser): """ Get the latest User-Agent strings of the given Browser :param browser: string of given Browser :return: list of User agents of the given Browser """ url = USER_AGENT_SCRAPER_BASE_URL + browser response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') user_agent_links = soup.find('div', {'id': 'liste'}).findAll('a')[:20] return [str(user_agent.text) for user_agent in user_agent_links] def get_user_agents(): """ Gather a list of some active User-Agent strings from http://www.useragentstring.com of some of the Popular Browsers :return: list of User-Agent strings """ user_agents = [] for browser in POPULAR_BROWSERS: user_agents.extend(get_user_agent_strings_for_this_browser(browser)) return user_agents[3:] # Remove the first 3 Google Header texts from Chrome's user agents proxy_user_agents = get_user_agents() # To randomly select an User-Agent from the collected user-agent strings random_user_agent = random.choice(proxy_user_agents)