Skip to content

Instantly share code, notes, and snippets.

@streeter
Last active July 24, 2016 03:18
Show Gist options
  • Select an option

  • Save streeter/8fedecdeec3d0c7f960a to your computer and use it in GitHub Desktop.

Select an option

Save streeter/8fedecdeec3d0c7f960a to your computer and use it in GitHub Desktop.

Revisions

  1. streeter revised this gist Jul 24, 2016. 1 changed file with 5 additions and 4 deletions.
    9 changes: 5 additions & 4 deletions backprint_scraper.py
    100644 → 100755
    Original file line number Diff line number Diff line change
    @@ -5,18 +5,19 @@

    import requests

    bibs = (897, 898)
    event = 138191
    bibs = (496, )

    url = 'http://www.backprint.com/facchinophotography/135707/{bib}'
    url = 'http://www.backprint.com/facchinophotography/{event}/{bib}'

    thumb_pattern = re.compile(r'http\:\/\/webres.backprint.com/.*?t\.jpg')

    for bib in bibs:
    try:
    res = requests.get(url.format(bib=bib))
    res = requests.get(url.format(event=event, bib=bib))
    res.raise_for_status()
    except requests.exceptions.HTTPError:
    print('[ERROR] Unable to get info for bib {}'.format(bib))
    print('[ERROR] Unable to get info for bib {}'.format(event, bib))
    continue

    # Get all the thumbnail links
  2. streeter created this gist Mar 11, 2016.
    49 changes: 49 additions & 0 deletions backprint_scraper.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,49 @@
    #!/usr/bin/env python

    import os
    import re

    import requests

    bibs = (897, 898)

    url = 'http://www.backprint.com/facchinophotography/135707/{bib}'

    thumb_pattern = re.compile(r'http\:\/\/webres.backprint.com/.*?t\.jpg')

    for bib in bibs:
    try:
    res = requests.get(url.format(bib=bib))
    res.raise_for_status()
    except requests.exceptions.HTTPError:
    print('[ERROR] Unable to get info for bib {}'.format(bib))
    continue

    # Get all the thumbnail links
    strings = thumb_pattern.findall(res.content)
    if not strings:
    print('[ERROR] Unable to find any matches for the thumbnail pattern!')
    continue

    # Create a list of all the large images. There are images at
    # 'f.jpg', 'h.jpg', and 't.jpg', the size specifier is case-insenstive.
    images = [thumb.replace('t.jpg', 'h.jpg') for thumb in strings]

    # Create a directory to store in
    photo_dir = 'photos_{}'.format(bib)
    if not os.path.isdir(photo_dir):
    os.makedirs(photo_dir)

    for image_url in images:
    filename = os.path.basename(image_url)
    try:
    r = requests.get(image_url)
    r.raise_for_status()
    except requests.exceptions.HTTPError:
    print('[ERROR] Unable to download the image: {}'.format(image_url))
    continue

    with open(os.path.join(photo_dir, filename), 'wb') as fd:
    for chunk in r.iter_content(512):
    fd.write(chunk)
    print('Downloaded ' + filename)