import requests import os import csv # Define input and output paths query_file_path = "/School/Stanford_University/Programs/Leadership_Education_for_Aspiring_Physicians/Research/Literature/Scholarly/Query/scholar_queries.csv" output_dir = "/School/Stanford_University/Programs/Leadership_Education_for_Aspiring_Physicians/Research/Literature/Scholarly/Results" os.makedirs(output_dir, exist_ok=True) # Fetch results using CrossRef API def get_crossref_results(query, num_results=1000): # Back to 1000 print(f"Fetching results for query: '{query}'") url = f"https://api.crossref.org/works" params = {"query": query, "rows": num_results} try: response = requests.get(url, params=params) response.raise_for_status() data = response.json() # Extract items from response results = data.get("message", {}).get("items", []) if not results: print("No results found.") return "No results found." # Sort results by citation count in descending order sorted_results = sorted( results, key=lambda x: int(x.get("is-referenced-by-count", 0)), reverse=True ) # Select the top 50 most-cited results top_results = sorted_results[:50] # Format results formatted_results = "" for i, result in enumerate(top_results, start=1): title = result.get("title", ["No title available"])[0] link = result.get("URL", "No link available") citation_count = result.get("is-referenced-by-count", "0") formatted_results += f"{i}. {title}\n Citations: {citation_count}\n Link: {link}\n\n" return formatted_results except requests.exceptions.RequestException as e: print(f"Error: {e}") return "No results found or an error occurred." # Process queries and write to file with open(query_file_path, newline='', encoding='utf-8') as csvfile: reader = csv.reader(csvfile) for row in reader: if row: query = row[0] result_text = get_crossref_results(query) # Clean query string for filename safe_query = "".join(c if c.isalnum() or c in " _-" else "_" for c in query) output_file_path = os.path.join(output_dir, f"scholar_{safe_query}_result.txt") with open(output_file_path, "w", encoding="utf-8") as output_file: output_file.write(result_text) print(f"Results for query '{query}' saved to '{output_file_path}'")