k-funk · November 2, 2022 13:40 · Dec 20, 2019 · Dec 18, 2019
diff --git a/scrape_google_photos_album.py b/scrape_google_photos_album.py
@@ -1,3 +1,4 @@
+# inspired by https://medium.com/p/d49f037c8e3c/responses/show (hopefully the regex is updated there when this one breaks)
 # also exists as a django-cms plugin at https://github.com/k-funk/djangocms-scrape-google-photos-album
 
 import logging

diff --git a/scrape_google_photos_album.py b/scrape_google_photos_album.py
@@ -0,0 +1,38 @@
+# also exists as a django-cms plugin at https://github.com/k-funk/djangocms-scrape-google-photos-album
+
+import logging
+import re
+
+import requests
+
+logger = logging.getLogger(__name__)
+
+# originally this was 139min chars. not actually sure the length they can be
+REGEX = r"(https:\/\/lh3\.googleusercontent\.com\/[a-zA-Z0-9\-_]{128,})"
+
+
+def get_photos_from_html(html):
+    # first and last elements are the album cover
+    return re.findall(REGEX, html)[1:-1]
+
+
+def get_photo_urls(album_url):
+    logger.info('Scraping Google Photos album at: {}'.format(album_url))
+
+    try:
+        r = requests.get(album_url)
+
+        photo_urls = get_photos_from_html(r.text) or []
+        if not len(photo_urls):
+            raise Exception('No photos found.')
+        logger.info("# of images: {}".format(len(photo_urls)))
+
+        photo_urls.reverse()  # makes the order appear the way it does on the website
+
+        return photo_urls
+    except Exception as err:
+        logger.error('Google Photos scraping failed:\n{}'.format(str(err)))
+    return []
+
+if __name__ == "__main__":
+  print(get_photo_urls('https://photos.app.goo.gl/...'))