# from webdriver_manager.firefox import GeckoDriverManager # from selenium.webdriver.chrome.service import Service # def main(data): # service = Service(executable_path=GeckoDriverManager().install()) # # Set path to firefox binary # opt = webdriver.FirefoxOptions() # opt.binary_location = "/usr/bin/firefox" # driver = webdriver.Chrome() # # Set webdriver path # with open('acceptance_rates_ai.json', 'r') as jh: # results = json.load(jh) # processed_urls = [i['url'] for i in results ] # for item in tqdm(data): # try: # url = item["Source (Manually checked) "].split(", ")[0].strip() # except: # continue # if not url or 'niche' in url or 'usnews' in url: # continue # if url in processed_urls: # continue # print([url,item["Source (Manually checked) "] ]) # driver.get(url) # html = driver.page_source # text = clean_html(html) # if text: # res = get_from_openai(text, prompt.format(item["College Names "]), schema) # results.append({ # "url": url, # "college": item["College Names "], # "id": item["ID"], # "html": html, # "text": text, # "data": res # }) # with open("acceptance_rates_ai.json", "w") as jh: # json.dump(results, jh, indent=2) # Pretty-printed JSON # return results # main(data.to_dict(orient='records'))