Skip to content

Instantly share code, notes, and snippets.

@AnastasiyaByelyakova
Created August 6, 2024 10:57
Show Gist options
  • Save AnastasiyaByelyakova/6d9d3ee5ea206b7e4abb36a9c282a5db to your computer and use it in GitHub Desktop.
Save AnastasiyaByelyakova/6d9d3ee5ea206b7e4abb36a9c282a5db to your computer and use it in GitHub Desktop.
#selenium #firefox #openai
# from webdriver_manager.firefox import GeckoDriverManager
# from selenium.webdriver.chrome.service import Service
# def main(data):
# service = Service(executable_path=GeckoDriverManager().install())
# # Set path to firefox binary
# opt = webdriver.FirefoxOptions()
# opt.binary_location = "/usr/bin/firefox"
# driver = webdriver.Chrome()
# # Set webdriver path
# with open('acceptance_rates_ai.json', 'r') as jh:
# results = json.load(jh)
# processed_urls = [i['url'] for i in results ]
# for item in tqdm(data):
# try:
# url = item["Source (Manually checked) "].split(", ")[0].strip()
# except:
# continue
# if not url or 'niche' in url or 'usnews' in url:
# continue
# if url in processed_urls:
# continue
# print([url,item["Source (Manually checked) "] ])
# driver.get(url)
# html = driver.page_source
# text = clean_html(html)
# if text:
# res = get_from_openai(text, prompt.format(item["College Names "]), schema)
# results.append({
# "url": url,
# "college": item["College Names "],
# "id": item["ID"],
# "html": html,
# "text": text,
# "data": res
# })
# with open("acceptance_rates_ai.json", "w") as jh:
# json.dump(results, jh, indent=2) # Pretty-printed JSON
# return results
# main(data.to_dict(orient='records'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment