Skip to content

Instantly share code, notes, and snippets.

@cboulanger
Last active November 2, 2025 11:58
Show Gist options
  • Select an option

  • Save cboulanger/d0227e5b1f2840cd495ba5ad04d3767d to your computer and use it in GitHub Desktop.

Select an option

Save cboulanger/d0227e5b1f2840cd495ba5ad04d3767d to your computer and use it in GitHub Desktop.
crossref_to_ris.py

Crossref to RIS Converter

This Python script converts Crossref API bibliographic data into RIS format, specifically designed to handle books with chapters. It enriches chapter records with complete book information (title, editors, ISBN) that is typically missing in the Crossref API response.

Features

  • Fetches bibliographic data from Crossref API using ISBN
  • Generates complete RIS records for both the book and all chapters
  • Includes full book metadata (title, editors, publisher, ISBNs) in each chapter entry
  • Can work with pre-downloaded JSON files when API access is restricted
  • Handles both electronic and print ISBNs

Requirements

  • Python 3.x (uses only standard library modules)
  • Internet connection (for API mode)

Usage

Method 1: Direct ISBN lookup

python crossref_to_ris.py <ISBN> [output_file]

Example:

python crossref_to_ris.py 9781509982653
python crossref_to_ris.py 9781509982653 my_bibliography.ris

If no output file is specified, the script creates bibliography_<ISBN>.ris

Method 2: From JSON file

If API access is blocked or you have pre-downloaded JSON data:

  1. Download JSON from: https://api.crossref.org/works/?filter=isbn:<ISBN>
  2. Run the script with the JSON file:
python crossref_to_ris.py --json <json_file> [output_file]

Example:

python crossref_to_ris.py --json crossref_data.json bibliography.ris

Output Format

The script generates RIS format with:

  • Book entry (TY - BOOK)

    • Complete book title with subtitle
    • All editors
    • Publisher and year
    • Both electronic and print ISBNs
    • DOI and URL
  • Chapter entries (TY - CHAP)

    • Chapter title
    • Chapter authors
    • Book title (BT field)
    • All book editors
    • Publisher and year
    • Page range (start and end pages)
    • ISBNs
    • Chapter-specific DOI and URL

Example

$ python crossref_to_ris.py 9781509982653
Fetching data for ISBN: 9781509982653
Found 11 items
Generated RIS entries: 1 book + 10 chapters = 11 total
RIS bibliography written to: bibliography_9781509982653.ris

RIS Fields Used

  • TY - Type of reference (BOOK or CHAP)
  • TI - Title
  • AU - Author
  • ED - Editor
  • BT - Book Title (for chapters)
  • PB - Publisher
  • PY - Publication Year
  • SP - Start Page
  • EP - End Page
  • SN - ISBN
  • DO - DOI
  • UR - URL
  • ER - End of Reference

Notes

  • The script automatically removes hyphens from ISBNs
  • ISBNs can be provided in any format (with or without hyphens)
  • Both ISBN-10 and ISBN-13 formats are supported
  • Network restrictions may prevent direct API access; use the --json method as a workaround

License

Written by Claude code. Public domain / MIT - use freely for any purpose.

#!/usr/bin/env python3
"""
Convert Crossref API data to RIS format for a book and its chapters.
Usage:
python crossref_to_ris.py <ISBN> [output_file]
python crossref_to_ris.py --json <json_file> [output_file]
Examples:
python crossref_to_ris.py 9781509982653
python crossref_to_ris.py 9781509982653 output.ris
python crossref_to_ris.py --json data.json output.ris
The script will try to fetch from Crossref API, but if that fails,
you can manually download the JSON from:
https://api.crossref.org/works/?filter=isbn:<ISBN>
and use the --json option.
"""
import sys
import json
import urllib.request
import urllib.error
import os
def fetch_crossref_data(isbn):
"""Fetch bibliographic data from Crossref API for a given ISBN."""
url = f"https://api.crossref.org/works/?filter=isbn:{isbn}"
print(f"Attempting to fetch from: {url}", file=sys.stderr)
try:
request = urllib.request.Request(
url,
headers={'User-Agent': 'Mozilla/5.0 (CrossrefToRIS/1.0; mailto:[email protected])'}
)
with urllib.request.urlopen(request, timeout=30) as response:
data = json.loads(response.read().decode('utf-8'))
return data
except urllib.error.URLError as e:
print(f"Error fetching data from Crossref API: {e}", file=sys.stderr)
print(f"\nTo work around this, manually download the JSON from:", file=sys.stderr)
print(f" {url}", file=sys.stderr)
print(f"and then run: python {sys.argv[0]} --json downloaded_file.json", file=sys.stderr)
sys.exit(1)
except json.JSONDecodeError as e:
print(f"Error parsing JSON response: {e}", file=sys.stderr)
sys.exit(1)
def load_json_file(filepath):
"""Load JSON data from a file."""
try:
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
return data
except FileNotFoundError:
print(f"Error: File not found: {filepath}", file=sys.stderr)
sys.exit(1)
except json.JSONDecodeError as e:
print(f"Error parsing JSON file: {e}", file=sys.stderr)
sys.exit(1)
def extract_book_info(items):
"""Extract book-level information from the items list."""
# The first item should be the book itself
book_info = None
for item in items:
if item.get('type') == 'book':
book_info = item
break
if not book_info:
print("Warning: No book entry found, using first item as book info", file=sys.stderr)
book_info = items[0] if items else {}
book_title = book_info.get('title', [''])[0]
book_subtitle = book_info.get('subtitle', [''])[0]
full_book_title = f"{book_title}: {book_subtitle}" if book_subtitle else book_title
book_editors = book_info.get('editor', [])
book_publisher = book_info.get('publisher', '')
book_year = ''
if 'issued' in book_info and book_info['issued'].get('date-parts'):
book_year = book_info['issued']['date-parts'][0][0]
book_isbn_electronic = None
book_isbn_print = None
for isbn_entry in book_info.get('isbn-type', []):
if isbn_entry['type'] == 'electronic':
book_isbn_electronic = isbn_entry['value']
elif isbn_entry['type'] == 'print':
book_isbn_print = isbn_entry['value']
return {
'book_info': book_info,
'full_title': full_book_title,
'title': book_title,
'subtitle': book_subtitle,
'editors': book_editors,
'publisher': book_publisher,
'year': book_year,
'isbn_electronic': book_isbn_electronic,
'isbn_print': book_isbn_print
}
def generate_ris(items):
"""Generate RIS format output from Crossref items."""
if not items:
print("Error: No items found in Crossref response", file=sys.stderr)
sys.exit(1)
book_data = extract_book_info(items)
ris_output = []
# Add the book entry
ris_output.append("TY - BOOK")
ris_output.append(f"TI - {book_data['full_title']}")
for editor in book_data['editors']:
ris_output.append(f"ED - {editor['family']}, {editor['given']}")
ris_output.append(f"PB - {book_data['publisher']}")
ris_output.append(f"PY - {book_data['year']}")
if book_data['isbn_electronic']:
ris_output.append(f"SN - {book_data['isbn_electronic']}")
if book_data['isbn_print']:
ris_output.append(f"SN - {book_data['isbn_print']}")
if 'DOI' in book_data['book_info']:
ris_output.append(f"DO - {book_data['book_info']['DOI']}")
if 'URL' in book_data['book_info']:
ris_output.append(f"UR - {book_data['book_info']['URL']}")
ris_output.append("ER - ")
ris_output.append("")
# Add chapter entries
chapter_count = 0
for item in items:
# Skip the book entry itself
if item.get('type') == 'book':
continue
chapter_count += 1
ris_output.append("TY - CHAP")
# Chapter title
chapter_title = item.get('title', [''])[0]
ris_output.append(f"TI - {chapter_title}")
# Chapter authors
if 'author' in item:
for author in item['author']:
ris_output.append(f"AU - {author['family']}, {author['given']}")
# Book title
ris_output.append(f"BT - {book_data['full_title']}")
# Book editors
for editor in book_data['editors']:
ris_output.append(f"ED - {editor['family']}, {editor['given']}")
# Publisher
ris_output.append(f"PB - {book_data['publisher']}")
# Year
year = book_data['year']
if 'issued' in item and item['issued'].get('date-parts'):
year = item['issued']['date-parts'][0][0]
ris_output.append(f"PY - {year}")
# Pages
if 'page' in item:
page_range = item['page']
if '-' in page_range:
start_page, end_page = page_range.split('-', 1)
ris_output.append(f"SP - {start_page}")
ris_output.append(f"EP - {end_page}")
else:
ris_output.append(f"SP - {page_range}")
# ISBNs
if book_data['isbn_electronic']:
ris_output.append(f"SN - {book_data['isbn_electronic']}")
if book_data['isbn_print']:
ris_output.append(f"SN - {book_data['isbn_print']}")
# DOI and URL
if 'DOI' in item:
ris_output.append(f"DO - {item['DOI']}")
if 'URL' in item:
ris_output.append(f"UR - {item['URL']}")
ris_output.append("ER - ")
ris_output.append("")
print(f"Generated RIS entries: 1 book + {chapter_count} chapters = {chapter_count + 1} total",
file=sys.stderr)
return '\n'.join(ris_output)
def main():
"""Main function to handle command line arguments and process ISBN."""
if len(sys.argv) < 2:
print(__doc__)
sys.exit(1)
# Check if using --json option
if sys.argv[1] == '--json':
if len(sys.argv) < 3:
print("Error: --json option requires a JSON file path", file=sys.stderr)
print(__doc__)
sys.exit(1)
json_file = sys.argv[2]
output_file = sys.argv[3] if len(sys.argv) > 3 else "bibliography.ris"
print(f"Loading data from JSON file: {json_file}", file=sys.stderr)
data = load_json_file(json_file)
else:
# ISBN mode
isbn = sys.argv[1].strip()
output_file = sys.argv[2] if len(sys.argv) > 2 else f"bibliography_{isbn}.ris"
# Remove hyphens from ISBN if present
isbn = isbn.replace('-', '')
print(f"Fetching data for ISBN: {isbn}", file=sys.stderr)
data = fetch_crossref_data(isbn)
# Check if we got results
total_results = data.get('message', {}).get('total-results', 0)
if total_results == 0:
print(f"Error: No results found in the data", file=sys.stderr)
sys.exit(1)
print(f"Found {total_results} items", file=sys.stderr)
items = data['message']['items']
ris_content = generate_ris(items)
# Write to file
with open(output_file, 'w', encoding='utf-8') as f:
f.write(ris_content)
print(f"RIS bibliography written to: {output_file}", file=sys.stderr)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment