Skip to content

Instantly share code, notes, and snippets.

@classicvalues
Created November 2, 2024 02:00
Show Gist options
  • Save classicvalues/eeaa8768bfdd842e48311c04400ec5a7 to your computer and use it in GitHub Desktop.
Save classicvalues/eeaa8768bfdd842e48311c04400ec5a7 to your computer and use it in GitHub Desktop.

Revisions

  1. classicvalues created this gist Nov 2, 2024.
    65 changes: 65 additions & 0 deletions scholarly_query.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,65 @@
    import requests
    import os
    import csv

    # Define input and output paths
    query_file_path = "/School/Stanford_University/Programs/Leadership_Education_for_Aspiring_Physicians/Research/Literature/Scholarly/Query/scholar_queries.csv"
    output_dir = "/School/Stanford_University/Programs/Leadership_Education_for_Aspiring_Physicians/Research/Literature/Scholarly/Results"
    os.makedirs(output_dir, exist_ok=True)

    # Fetch results using CrossRef API
    def get_crossref_results(query, num_results=1000): # Back to 1000
    print(f"Fetching results for query: '{query}'")
    url = f"https://api.crossref.org/works"
    params = {"query": query, "rows": num_results}

    try:
    response = requests.get(url, params=params)
    response.raise_for_status()
    data = response.json()

    # Extract items from response
    results = data.get("message", {}).get("items", [])
    if not results:
    print("No results found.")
    return "No results found."

    # Sort results by citation count in descending order
    sorted_results = sorted(
    results,
    key=lambda x: int(x.get("is-referenced-by-count", 0)),
    reverse=True
    )
    # Select the top 50 most-cited results
    top_results = sorted_results[:50]

    # Format results
    formatted_results = ""
    for i, result in enumerate(top_results, start=1):
    title = result.get("title", ["No title available"])[0]
    link = result.get("URL", "No link available")
    citation_count = result.get("is-referenced-by-count", "0")
    formatted_results += f"{i}. {title}\n Citations: {citation_count}\n Link: {link}\n\n"

    return formatted_results

    except requests.exceptions.RequestException as e:
    print(f"Error: {e}")
    return "No results found or an error occurred."

    # Process queries and write to file
    with open(query_file_path, newline='', encoding='utf-8') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
    if row:
    query = row[0]
    result_text = get_crossref_results(query)

    # Clean query string for filename
    safe_query = "".join(c if c.isalnum() or c in " _-" else "_" for c in query)
    output_file_path = os.path.join(output_dir, f"scholar_{safe_query}_result.txt")

    with open(output_file_path, "w", encoding="utf-8") as output_file:
    output_file.write(result_text)

    print(f"Results for query '{query}' saved to '{output_file_path}'")