Last active
November 2, 2025 11:58
-
-
Save cboulanger/d0227e5b1f2840cd495ba5ad04d3767d to your computer and use it in GitHub Desktop.
Revisions
-
cboulanger revised this gist
Nov 2, 2025 . 1 changed file with 106 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,106 @@ # Crossref to RIS Converter This Python script converts Crossref API bibliographic data into RIS format, specifically designed to handle books with chapters. It enriches chapter records with complete book information (title, editors, ISBN) that is typically missing in the Crossref API response. ## Features - Fetches bibliographic data from Crossref API using ISBN - Generates complete RIS records for both the book and all chapters - Includes full book metadata (title, editors, publisher, ISBNs) in each chapter entry - Can work with pre-downloaded JSON files when API access is restricted - Handles both electronic and print ISBNs ## Requirements - Python 3.x (uses only standard library modules) - Internet connection (for API mode) ## Usage ### Method 1: Direct ISBN lookup ```bash python crossref_to_ris.py <ISBN> [output_file] ``` Example: ```bash python crossref_to_ris.py 9781509982653 python crossref_to_ris.py 9781509982653 my_bibliography.ris ``` If no output file is specified, the script creates `bibliography_<ISBN>.ris` ### Method 2: From JSON file If API access is blocked or you have pre-downloaded JSON data: 1. Download JSON from: `https://api.crossref.org/works/?filter=isbn:<ISBN>` 2. Run the script with the JSON file: ```bash python crossref_to_ris.py --json <json_file> [output_file] ``` Example: ```bash python crossref_to_ris.py --json crossref_data.json bibliography.ris ``` ## Output Format The script generates RIS format with: - **Book entry** (TY - BOOK) - Complete book title with subtitle - All editors - Publisher and year - Both electronic and print ISBNs - DOI and URL - **Chapter entries** (TY - CHAP) - Chapter title - Chapter authors - Book title (BT field) - All book editors - Publisher and year - Page range (start and end pages) - ISBNs - Chapter-specific DOI and URL ## Example ```bash $ python crossref_to_ris.py 9781509982653 Fetching data for ISBN: 9781509982653 Found 11 items Generated RIS entries: 1 book + 10 chapters = 11 total RIS bibliography written to: bibliography_9781509982653.ris ``` ## RIS Fields Used - `TY` - Type of reference (BOOK or CHAP) - `TI` - Title - `AU` - Author - `ED` - Editor - `BT` - Book Title (for chapters) - `PB` - Publisher - `PY` - Publication Year - `SP` - Start Page - `EP` - End Page - `SN` - ISBN - `DO` - DOI - `UR` - URL - `ER` - End of Reference ## Notes - The script automatically removes hyphens from ISBNs - ISBNs can be provided in any format (with or without hyphens) - Both ISBN-10 and ISBN-13 formats are supported - Network restrictions may prevent direct API access; use the `--json` method as a workaround ## License Written by Claude code. Public domain / MIT - use freely for any purpose. -
cboulanger created this gist
Nov 2, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,257 @@ #!/usr/bin/env python3 """ Convert Crossref API data to RIS format for a book and its chapters. Usage: python crossref_to_ris.py <ISBN> [output_file] python crossref_to_ris.py --json <json_file> [output_file] Examples: python crossref_to_ris.py 9781509982653 python crossref_to_ris.py 9781509982653 output.ris python crossref_to_ris.py --json data.json output.ris The script will try to fetch from Crossref API, but if that fails, you can manually download the JSON from: https://api.crossref.org/works/?filter=isbn:<ISBN> and use the --json option. """ import sys import json import urllib.request import urllib.error import os def fetch_crossref_data(isbn): """Fetch bibliographic data from Crossref API for a given ISBN.""" url = f"https://api.crossref.org/works/?filter=isbn:{isbn}" print(f"Attempting to fetch from: {url}", file=sys.stderr) try: request = urllib.request.Request( url, headers={'User-Agent': 'Mozilla/5.0 (CrossrefToRIS/1.0; mailto:[email protected])'} ) with urllib.request.urlopen(request, timeout=30) as response: data = json.loads(response.read().decode('utf-8')) return data except urllib.error.URLError as e: print(f"Error fetching data from Crossref API: {e}", file=sys.stderr) print(f"\nTo work around this, manually download the JSON from:", file=sys.stderr) print(f" {url}", file=sys.stderr) print(f"and then run: python {sys.argv[0]} --json downloaded_file.json", file=sys.stderr) sys.exit(1) except json.JSONDecodeError as e: print(f"Error parsing JSON response: {e}", file=sys.stderr) sys.exit(1) def load_json_file(filepath): """Load JSON data from a file.""" try: with open(filepath, 'r', encoding='utf-8') as f: data = json.load(f) return data except FileNotFoundError: print(f"Error: File not found: {filepath}", file=sys.stderr) sys.exit(1) except json.JSONDecodeError as e: print(f"Error parsing JSON file: {e}", file=sys.stderr) sys.exit(1) def extract_book_info(items): """Extract book-level information from the items list.""" # The first item should be the book itself book_info = None for item in items: if item.get('type') == 'book': book_info = item break if not book_info: print("Warning: No book entry found, using first item as book info", file=sys.stderr) book_info = items[0] if items else {} book_title = book_info.get('title', [''])[0] book_subtitle = book_info.get('subtitle', [''])[0] full_book_title = f"{book_title}: {book_subtitle}" if book_subtitle else book_title book_editors = book_info.get('editor', []) book_publisher = book_info.get('publisher', '') book_year = '' if 'issued' in book_info and book_info['issued'].get('date-parts'): book_year = book_info['issued']['date-parts'][0][0] book_isbn_electronic = None book_isbn_print = None for isbn_entry in book_info.get('isbn-type', []): if isbn_entry['type'] == 'electronic': book_isbn_electronic = isbn_entry['value'] elif isbn_entry['type'] == 'print': book_isbn_print = isbn_entry['value'] return { 'book_info': book_info, 'full_title': full_book_title, 'title': book_title, 'subtitle': book_subtitle, 'editors': book_editors, 'publisher': book_publisher, 'year': book_year, 'isbn_electronic': book_isbn_electronic, 'isbn_print': book_isbn_print } def generate_ris(items): """Generate RIS format output from Crossref items.""" if not items: print("Error: No items found in Crossref response", file=sys.stderr) sys.exit(1) book_data = extract_book_info(items) ris_output = [] # Add the book entry ris_output.append("TY - BOOK") ris_output.append(f"TI - {book_data['full_title']}") for editor in book_data['editors']: ris_output.append(f"ED - {editor['family']}, {editor['given']}") ris_output.append(f"PB - {book_data['publisher']}") ris_output.append(f"PY - {book_data['year']}") if book_data['isbn_electronic']: ris_output.append(f"SN - {book_data['isbn_electronic']}") if book_data['isbn_print']: ris_output.append(f"SN - {book_data['isbn_print']}") if 'DOI' in book_data['book_info']: ris_output.append(f"DO - {book_data['book_info']['DOI']}") if 'URL' in book_data['book_info']: ris_output.append(f"UR - {book_data['book_info']['URL']}") ris_output.append("ER - ") ris_output.append("") # Add chapter entries chapter_count = 0 for item in items: # Skip the book entry itself if item.get('type') == 'book': continue chapter_count += 1 ris_output.append("TY - CHAP") # Chapter title chapter_title = item.get('title', [''])[0] ris_output.append(f"TI - {chapter_title}") # Chapter authors if 'author' in item: for author in item['author']: ris_output.append(f"AU - {author['family']}, {author['given']}") # Book title ris_output.append(f"BT - {book_data['full_title']}") # Book editors for editor in book_data['editors']: ris_output.append(f"ED - {editor['family']}, {editor['given']}") # Publisher ris_output.append(f"PB - {book_data['publisher']}") # Year year = book_data['year'] if 'issued' in item and item['issued'].get('date-parts'): year = item['issued']['date-parts'][0][0] ris_output.append(f"PY - {year}") # Pages if 'page' in item: page_range = item['page'] if '-' in page_range: start_page, end_page = page_range.split('-', 1) ris_output.append(f"SP - {start_page}") ris_output.append(f"EP - {end_page}") else: ris_output.append(f"SP - {page_range}") # ISBNs if book_data['isbn_electronic']: ris_output.append(f"SN - {book_data['isbn_electronic']}") if book_data['isbn_print']: ris_output.append(f"SN - {book_data['isbn_print']}") # DOI and URL if 'DOI' in item: ris_output.append(f"DO - {item['DOI']}") if 'URL' in item: ris_output.append(f"UR - {item['URL']}") ris_output.append("ER - ") ris_output.append("") print(f"Generated RIS entries: 1 book + {chapter_count} chapters = {chapter_count + 1} total", file=sys.stderr) return '\n'.join(ris_output) def main(): """Main function to handle command line arguments and process ISBN.""" if len(sys.argv) < 2: print(__doc__) sys.exit(1) # Check if using --json option if sys.argv[1] == '--json': if len(sys.argv) < 3: print("Error: --json option requires a JSON file path", file=sys.stderr) print(__doc__) sys.exit(1) json_file = sys.argv[2] output_file = sys.argv[3] if len(sys.argv) > 3 else "bibliography.ris" print(f"Loading data from JSON file: {json_file}", file=sys.stderr) data = load_json_file(json_file) else: # ISBN mode isbn = sys.argv[1].strip() output_file = sys.argv[2] if len(sys.argv) > 2 else f"bibliography_{isbn}.ris" # Remove hyphens from ISBN if present isbn = isbn.replace('-', '') print(f"Fetching data for ISBN: {isbn}", file=sys.stderr) data = fetch_crossref_data(isbn) # Check if we got results total_results = data.get('message', {}).get('total-results', 0) if total_results == 0: print(f"Error: No results found in the data", file=sys.stderr) sys.exit(1) print(f"Found {total_results} items", file=sys.stderr) items = data['message']['items'] ris_content = generate_ris(items) # Write to file with open(output_file, 'w', encoding='utf-8') as f: f.write(ris_content) print(f"RIS bibliography written to: {output_file}", file=sys.stderr) if __name__ == "__main__": main()