# from webdriver_manager.firefox import GeckoDriverManager
# from selenium.webdriver.chrome.service import Service
 
# def main(data):

        
#     service = Service(executable_path=GeckoDriverManager().install())
#     # Set path to firefox binary
#     opt = webdriver.FirefoxOptions()
#     opt.binary_location = "/usr/bin/firefox"
#     driver = webdriver.Chrome()
#     # Set webdriver path

    
#     with open('acceptance_rates_ai.json', 'r') as jh:
#         results = json.load(jh)
#         processed_urls = [i['url'] for i in results ]

#     for item in tqdm(data):
#         try:
#             url = item["Source (Manually checked) "].split(", ")[0].strip()
#         except:
#             continue
#         if not url or 'niche' in url or 'usnews' in url:
#             continue
#         if url in processed_urls:
#             continue
#         print([url,item["Source (Manually checked) "] ])
#         driver.get(url)
#         html = driver.page_source

#         text = clean_html(html)

#         if text:
#             res = get_from_openai(text, prompt.format(item["College Names "]), schema)

#             results.append({
#                 "url": url,
#                 "college": item["College Names "],
#                 "id": item["ID"],
#                 "html": html,
#                 "text": text,
#                 "data": res
#             })

#             with open("acceptance_rates_ai.json", "w") as jh:
#                 json.dump(results, jh, indent=2)  # Pretty-printed JSON

#     return results

# main(data.to_dict(orient='records'))