Skip to content

Instantly share code, notes, and snippets.

@classicvalues
Created November 2, 2024 02:00
Show Gist options
  • Save classicvalues/eeaa8768bfdd842e48311c04400ec5a7 to your computer and use it in GitHub Desktop.
Save classicvalues/eeaa8768bfdd842e48311c04400ec5a7 to your computer and use it in GitHub Desktop.
Top Cited Research Fetcher by CrossRef API Query
import requests
import os
import csv
# Define input and output paths
query_file_path = "/School/Stanford_University/Programs/Leadership_Education_for_Aspiring_Physicians/Research/Literature/Scholarly/Query/scholar_queries.csv"
output_dir = "/School/Stanford_University/Programs/Leadership_Education_for_Aspiring_Physicians/Research/Literature/Scholarly/Results"
os.makedirs(output_dir, exist_ok=True)
# Fetch results using CrossRef API
def get_crossref_results(query, num_results=1000): # Back to 1000
print(f"Fetching results for query: '{query}'")
url = f"https://api.crossref.org/works"
params = {"query": query, "rows": num_results}
try:
response = requests.get(url, params=params)
response.raise_for_status()
data = response.json()
# Extract items from response
results = data.get("message", {}).get("items", [])
if not results:
print("No results found.")
return "No results found."
# Sort results by citation count in descending order
sorted_results = sorted(
results,
key=lambda x: int(x.get("is-referenced-by-count", 0)),
reverse=True
)
# Select the top 50 most-cited results
top_results = sorted_results[:50]
# Format results
formatted_results = ""
for i, result in enumerate(top_results, start=1):
title = result.get("title", ["No title available"])[0]
link = result.get("URL", "No link available")
citation_count = result.get("is-referenced-by-count", "0")
formatted_results += f"{i}. {title}\n Citations: {citation_count}\n Link: {link}\n\n"
return formatted_results
except requests.exceptions.RequestException as e:
print(f"Error: {e}")
return "No results found or an error occurred."
# Process queries and write to file
with open(query_file_path, newline='', encoding='utf-8') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
if row:
query = row[0]
result_text = get_crossref_results(query)
# Clean query string for filename
safe_query = "".join(c if c.isalnum() or c in " _-" else "_" for c in query)
output_file_path = os.path.join(output_dir, f"scholar_{safe_query}_result.txt")
with open(output_file_path, "w", encoding="utf-8") as output_file:
output_file.write(result_text)
print(f"Results for query '{query}' saved to '{output_file_path}'")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment