streeter · July 24, 2016 03:18 · Jul 24, 2016 · Mar 11, 2016
diff --git a/backprint_scraper.py b/backprint_scraper.py
@@ -5,18 +5,19 @@
 
 import requests
 
-bibs = (897, 898)
+event = 138191
+bibs = (496, )
 
-url = 'http://www.backprint.com/facchinophotography/135707/{bib}'
+url = 'http://www.backprint.com/facchinophotography/{event}/{bib}'
 
 thumb_pattern = re.compile(r'http\:\/\/webres.backprint.com/.*?t\.jpg')
 
 for bib in bibs:
     try:
-        res = requests.get(url.format(bib=bib))
+        res = requests.get(url.format(event=event, bib=bib))
         res.raise_for_status()
     except requests.exceptions.HTTPError:
-        print('[ERROR] Unable to get info for bib {}'.format(bib))
+        print('[ERROR] Unable to get info for bib {}'.format(event, bib))
         continue
 
     # Get all the thumbnail links

diff --git a/backprint_scraper.py b/backprint_scraper.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+
+import os
+import re
+
+import requests
+
+bibs = (897, 898)
+
+url = 'http://www.backprint.com/facchinophotography/135707/{bib}'
+
+thumb_pattern = re.compile(r'http\:\/\/webres.backprint.com/.*?t\.jpg')
+
+for bib in bibs:
+    try:
+        res = requests.get(url.format(bib=bib))
+        res.raise_for_status()
+    except requests.exceptions.HTTPError:
+        print('[ERROR] Unable to get info for bib {}'.format(bib))
+        continue
+
+    # Get all the thumbnail links
+    strings = thumb_pattern.findall(res.content)
+    if not strings:
+        print('[ERROR] Unable to find any matches for the thumbnail pattern!')
+        continue
+
+    # Create a list of all the large images. There are images at
+    # 'f.jpg', 'h.jpg', and 't.jpg', the size specifier is case-insenstive.
+    images = [thumb.replace('t.jpg', 'h.jpg') for thumb in strings]
+
+    # Create a directory to store in
+    photo_dir = 'photos_{}'.format(bib)
+    if not os.path.isdir(photo_dir):
+        os.makedirs(photo_dir)
+
+    for image_url in images:
+        filename = os.path.basename(image_url)
+        try:
+            r = requests.get(image_url)
+            r.raise_for_status()
+        except requests.exceptions.HTTPError:
+            print('[ERROR] Unable to download the image: {}'.format(image_url))
+            continue
+
+        with open(os.path.join(photo_dir, filename), 'wb') as fd:
+            for chunk in r.iter_content(512):
+                fd.write(chunk)
+        print('Downloaded ' + filename)
No results found