AnastasiyaByelyakova · August 6, 2024 10:57 · Aug 6, 2024
diff --git a/selenium_firefox.py b/selenium_firefox.py
@@ -0,0 +1,51 @@
+# from webdriver_manager.firefox import GeckoDriverManager
+# from selenium.webdriver.chrome.service import Service
+
+# def main(data):
+
+
+#     service = Service(executable_path=GeckoDriverManager().install())
+#     # Set path to firefox binary
+#     opt = webdriver.FirefoxOptions()
+#     opt.binary_location = "/usr/bin/firefox"
+#     driver = webdriver.Chrome()
+#     # Set webdriver path
+
+
+#     with open('acceptance_rates_ai.json', 'r') as jh:
+#         results = json.load(jh)
+#         processed_urls = [i['url'] for i in results ]
+
+#     for item in tqdm(data):
+#         try:
+#             url = item["Source (Manually checked) "].split(", ")[0].strip()
+#         except:
+#             continue
+#         if not url or 'niche' in url or 'usnews' in url:
+#             continue
+#         if url in processed_urls:
+#             continue
+#         print([url,item["Source (Manually checked) "] ])
+#         driver.get(url)
+#         html = driver.page_source
+
+#         text = clean_html(html)
+
+#         if text:
+#             res = get_from_openai(text, prompt.format(item["College Names "]), schema)
+
+#             results.append({
+#                 "url": url,
+#                 "college": item["College Names "],
+#                 "id": item["ID"],
+#                 "html": html,
+#                 "text": text,
+#                 "data": res
+#             })
+
+#             with open("acceptance_rates_ai.json", "w") as jh:
+#                 json.dump(results, jh, indent=2)  # Pretty-printed JSON
+
+#     return results
+
+# main(data.to_dict(orient='records'))
No results found