#!/usr/bin/env python """ Check if an item on archive.org has an acoustid. Usage: ./check_for_acoustid.py {item} Usage with GNU Parallel: cat itemlist.txt | parallel --max-procs=8 --group './check_for_acoustid.py {}' """ import sys import json import urllib def get_iterable(x): return (x,) if not isinstance(x, (tuple, list)) else x def iter_contains_prefix(iter, prefix): for s in iter: if s.startswith(prefix): return True return False def get_url(url): f = urllib.urlopen(url) c = f.read() f.close() return c def get_meta(item): j = get_url("http://archive.org/metadata/" + item) yield json.loads(j); def item_iterator(): filename = 'items' f = open("items") for line in f: yield line.strip() def file_has_audio(file): if (file.get('source') == 'original' and file['format'] in ('VBR MP3', 'AIFF', '24bit Flac', 'Flac', 'Apple Lossless Audio', 'Advanced Audio Coding')): return True item = sys.argv[1] for metadata in get_meta(item): files = metadata.get('files') if not files: continue for file in files: if file_has_audio(file): fexts = file.get('external-identifier', []) found_accoustid = iter_contains_prefix(get_iterable(fexts), 'urn:acoustid') if not found_accoustid: print metadata['metadata']['identifier'] break