|
#!/usr/bin/env python3 |
|
""" |
|
Convert Crossref API data to RIS format for a book and its chapters. |
|
|
|
Usage: |
|
python crossref_to_ris.py <ISBN> [output_file] |
|
python crossref_to_ris.py --json <json_file> [output_file] |
|
|
|
Examples: |
|
python crossref_to_ris.py 9781509982653 |
|
python crossref_to_ris.py 9781509982653 output.ris |
|
python crossref_to_ris.py --json data.json output.ris |
|
|
|
The script will try to fetch from Crossref API, but if that fails, |
|
you can manually download the JSON from: |
|
https://api.crossref.org/works/?filter=isbn:<ISBN> |
|
and use the --json option. |
|
""" |
|
|
|
import sys |
|
import json |
|
import urllib.request |
|
import urllib.error |
|
import os |
|
|
|
|
|
def fetch_crossref_data(isbn): |
|
"""Fetch bibliographic data from Crossref API for a given ISBN.""" |
|
url = f"https://api.crossref.org/works/?filter=isbn:{isbn}" |
|
|
|
print(f"Attempting to fetch from: {url}", file=sys.stderr) |
|
|
|
try: |
|
request = urllib.request.Request( |
|
url, |
|
headers={'User-Agent': 'Mozilla/5.0 (CrossrefToRIS/1.0; mailto:[email protected])'} |
|
) |
|
with urllib.request.urlopen(request, timeout=30) as response: |
|
data = json.loads(response.read().decode('utf-8')) |
|
return data |
|
except urllib.error.URLError as e: |
|
print(f"Error fetching data from Crossref API: {e}", file=sys.stderr) |
|
print(f"\nTo work around this, manually download the JSON from:", file=sys.stderr) |
|
print(f" {url}", file=sys.stderr) |
|
print(f"and then run: python {sys.argv[0]} --json downloaded_file.json", file=sys.stderr) |
|
sys.exit(1) |
|
except json.JSONDecodeError as e: |
|
print(f"Error parsing JSON response: {e}", file=sys.stderr) |
|
sys.exit(1) |
|
|
|
|
|
def load_json_file(filepath): |
|
"""Load JSON data from a file.""" |
|
try: |
|
with open(filepath, 'r', encoding='utf-8') as f: |
|
data = json.load(f) |
|
return data |
|
except FileNotFoundError: |
|
print(f"Error: File not found: {filepath}", file=sys.stderr) |
|
sys.exit(1) |
|
except json.JSONDecodeError as e: |
|
print(f"Error parsing JSON file: {e}", file=sys.stderr) |
|
sys.exit(1) |
|
|
|
|
|
def extract_book_info(items): |
|
"""Extract book-level information from the items list.""" |
|
# The first item should be the book itself |
|
book_info = None |
|
for item in items: |
|
if item.get('type') == 'book': |
|
book_info = item |
|
break |
|
|
|
if not book_info: |
|
print("Warning: No book entry found, using first item as book info", file=sys.stderr) |
|
book_info = items[0] if items else {} |
|
|
|
book_title = book_info.get('title', [''])[0] |
|
book_subtitle = book_info.get('subtitle', [''])[0] |
|
full_book_title = f"{book_title}: {book_subtitle}" if book_subtitle else book_title |
|
|
|
book_editors = book_info.get('editor', []) |
|
book_publisher = book_info.get('publisher', '') |
|
|
|
book_year = '' |
|
if 'issued' in book_info and book_info['issued'].get('date-parts'): |
|
book_year = book_info['issued']['date-parts'][0][0] |
|
|
|
book_isbn_electronic = None |
|
book_isbn_print = None |
|
for isbn_entry in book_info.get('isbn-type', []): |
|
if isbn_entry['type'] == 'electronic': |
|
book_isbn_electronic = isbn_entry['value'] |
|
elif isbn_entry['type'] == 'print': |
|
book_isbn_print = isbn_entry['value'] |
|
|
|
return { |
|
'book_info': book_info, |
|
'full_title': full_book_title, |
|
'title': book_title, |
|
'subtitle': book_subtitle, |
|
'editors': book_editors, |
|
'publisher': book_publisher, |
|
'year': book_year, |
|
'isbn_electronic': book_isbn_electronic, |
|
'isbn_print': book_isbn_print |
|
} |
|
|
|
|
|
def generate_ris(items): |
|
"""Generate RIS format output from Crossref items.""" |
|
if not items: |
|
print("Error: No items found in Crossref response", file=sys.stderr) |
|
sys.exit(1) |
|
|
|
book_data = extract_book_info(items) |
|
ris_output = [] |
|
|
|
# Add the book entry |
|
ris_output.append("TY - BOOK") |
|
ris_output.append(f"TI - {book_data['full_title']}") |
|
|
|
for editor in book_data['editors']: |
|
ris_output.append(f"ED - {editor['family']}, {editor['given']}") |
|
|
|
ris_output.append(f"PB - {book_data['publisher']}") |
|
ris_output.append(f"PY - {book_data['year']}") |
|
|
|
if book_data['isbn_electronic']: |
|
ris_output.append(f"SN - {book_data['isbn_electronic']}") |
|
if book_data['isbn_print']: |
|
ris_output.append(f"SN - {book_data['isbn_print']}") |
|
|
|
if 'DOI' in book_data['book_info']: |
|
ris_output.append(f"DO - {book_data['book_info']['DOI']}") |
|
if 'URL' in book_data['book_info']: |
|
ris_output.append(f"UR - {book_data['book_info']['URL']}") |
|
|
|
ris_output.append("ER - ") |
|
ris_output.append("") |
|
|
|
# Add chapter entries |
|
chapter_count = 0 |
|
for item in items: |
|
# Skip the book entry itself |
|
if item.get('type') == 'book': |
|
continue |
|
|
|
chapter_count += 1 |
|
ris_output.append("TY - CHAP") |
|
|
|
# Chapter title |
|
chapter_title = item.get('title', [''])[0] |
|
ris_output.append(f"TI - {chapter_title}") |
|
|
|
# Chapter authors |
|
if 'author' in item: |
|
for author in item['author']: |
|
ris_output.append(f"AU - {author['family']}, {author['given']}") |
|
|
|
# Book title |
|
ris_output.append(f"BT - {book_data['full_title']}") |
|
|
|
# Book editors |
|
for editor in book_data['editors']: |
|
ris_output.append(f"ED - {editor['family']}, {editor['given']}") |
|
|
|
# Publisher |
|
ris_output.append(f"PB - {book_data['publisher']}") |
|
|
|
# Year |
|
year = book_data['year'] |
|
if 'issued' in item and item['issued'].get('date-parts'): |
|
year = item['issued']['date-parts'][0][0] |
|
ris_output.append(f"PY - {year}") |
|
|
|
# Pages |
|
if 'page' in item: |
|
page_range = item['page'] |
|
if '-' in page_range: |
|
start_page, end_page = page_range.split('-', 1) |
|
ris_output.append(f"SP - {start_page}") |
|
ris_output.append(f"EP - {end_page}") |
|
else: |
|
ris_output.append(f"SP - {page_range}") |
|
|
|
# ISBNs |
|
if book_data['isbn_electronic']: |
|
ris_output.append(f"SN - {book_data['isbn_electronic']}") |
|
if book_data['isbn_print']: |
|
ris_output.append(f"SN - {book_data['isbn_print']}") |
|
|
|
# DOI and URL |
|
if 'DOI' in item: |
|
ris_output.append(f"DO - {item['DOI']}") |
|
if 'URL' in item: |
|
ris_output.append(f"UR - {item['URL']}") |
|
|
|
ris_output.append("ER - ") |
|
ris_output.append("") |
|
|
|
print(f"Generated RIS entries: 1 book + {chapter_count} chapters = {chapter_count + 1} total", |
|
file=sys.stderr) |
|
|
|
return '\n'.join(ris_output) |
|
|
|
|
|
def main(): |
|
"""Main function to handle command line arguments and process ISBN.""" |
|
if len(sys.argv) < 2: |
|
print(__doc__) |
|
sys.exit(1) |
|
|
|
# Check if using --json option |
|
if sys.argv[1] == '--json': |
|
if len(sys.argv) < 3: |
|
print("Error: --json option requires a JSON file path", file=sys.stderr) |
|
print(__doc__) |
|
sys.exit(1) |
|
|
|
json_file = sys.argv[2] |
|
output_file = sys.argv[3] if len(sys.argv) > 3 else "bibliography.ris" |
|
|
|
print(f"Loading data from JSON file: {json_file}", file=sys.stderr) |
|
data = load_json_file(json_file) |
|
else: |
|
# ISBN mode |
|
isbn = sys.argv[1].strip() |
|
output_file = sys.argv[2] if len(sys.argv) > 2 else f"bibliography_{isbn}.ris" |
|
|
|
# Remove hyphens from ISBN if present |
|
isbn = isbn.replace('-', '') |
|
|
|
print(f"Fetching data for ISBN: {isbn}", file=sys.stderr) |
|
data = fetch_crossref_data(isbn) |
|
|
|
# Check if we got results |
|
total_results = data.get('message', {}).get('total-results', 0) |
|
if total_results == 0: |
|
print(f"Error: No results found in the data", file=sys.stderr) |
|
sys.exit(1) |
|
|
|
print(f"Found {total_results} items", file=sys.stderr) |
|
|
|
items = data['message']['items'] |
|
ris_content = generate_ris(items) |
|
|
|
# Write to file |
|
with open(output_file, 'w', encoding='utf-8') as f: |
|
f.write(ris_content) |
|
|
|
print(f"RIS bibliography written to: {output_file}", file=sys.stderr) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |