Created
November 2, 2024 02:00
-
-
Save classicvalues/eeaa8768bfdd842e48311c04400ec5a7 to your computer and use it in GitHub Desktop.
Top Cited Research Fetcher by CrossRef API Query
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import requests | |
| import os | |
| import csv | |
| # Define input and output paths | |
| query_file_path = "/School/Stanford_University/Programs/Leadership_Education_for_Aspiring_Physicians/Research/Literature/Scholarly/Query/scholar_queries.csv" | |
| output_dir = "/School/Stanford_University/Programs/Leadership_Education_for_Aspiring_Physicians/Research/Literature/Scholarly/Results" | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Fetch results using CrossRef API | |
| def get_crossref_results(query, num_results=1000): # Back to 1000 | |
| print(f"Fetching results for query: '{query}'") | |
| url = f"https://api.crossref.org/works" | |
| params = {"query": query, "rows": num_results} | |
| try: | |
| response = requests.get(url, params=params) | |
| response.raise_for_status() | |
| data = response.json() | |
| # Extract items from response | |
| results = data.get("message", {}).get("items", []) | |
| if not results: | |
| print("No results found.") | |
| return "No results found." | |
| # Sort results by citation count in descending order | |
| sorted_results = sorted( | |
| results, | |
| key=lambda x: int(x.get("is-referenced-by-count", 0)), | |
| reverse=True | |
| ) | |
| # Select the top 50 most-cited results | |
| top_results = sorted_results[:50] | |
| # Format results | |
| formatted_results = "" | |
| for i, result in enumerate(top_results, start=1): | |
| title = result.get("title", ["No title available"])[0] | |
| link = result.get("URL", "No link available") | |
| citation_count = result.get("is-referenced-by-count", "0") | |
| formatted_results += f"{i}. {title}\n Citations: {citation_count}\n Link: {link}\n\n" | |
| return formatted_results | |
| except requests.exceptions.RequestException as e: | |
| print(f"Error: {e}") | |
| return "No results found or an error occurred." | |
| # Process queries and write to file | |
| with open(query_file_path, newline='', encoding='utf-8') as csvfile: | |
| reader = csv.reader(csvfile) | |
| for row in reader: | |
| if row: | |
| query = row[0] | |
| result_text = get_crossref_results(query) | |
| # Clean query string for filename | |
| safe_query = "".join(c if c.isalnum() or c in " _-" else "_" for c in query) | |
| output_file_path = os.path.join(output_dir, f"scholar_{safe_query}_result.txt") | |
| with open(output_file_path, "w", encoding="utf-8") as output_file: | |
| output_file.write(result_text) | |
| print(f"Results for query '{query}' saved to '{output_file_path}'") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment