Created
January 18, 2022 01:03
-
-
Save carlosrbta/5a4d7b9d34d7973cb5e3e18d6b69adb2 to your computer and use it in GitHub Desktop.
Revisions
-
carlosrbta created this gist
Jan 18, 2022 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,42 @@ from operator import le from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from json import dump import time import csv service = Service('drivers/chromedriver') options = Options() options.add_argument("start-maximized") driver = webdriver.Chrome(options=options, service=service) result = [] for page in range(5): driver.get(f'https://www.reclameaqui.com.br/empresa/americanas-marketplace/lista-reclamacoes/?pagina={page+1}') try: driver.find_element(By.ID, 'onetrust-accept-btn-handler').click() print("Accepted cookies") except: pass time.sleep(3) elements = driver.find_elements(By.CLASS_NAME, 'bJdtis') data = [] data = list(map(lambda div: { 'titulo': div.find_element(By.CSS_SELECTOR, 'a > h4').get_attribute('innerText'), 'texto': div.find_element(By.TAG_NAME, 'p').get_attribute('innerText') }, elements)) result.extend(data) with open('data.csv', 'w', encoding='UTF8', newline='') as f: writer = csv.DictWriter(f, fieldnames=['titulo', 'texto']) writer.writeheader() writer.writerows(result)