# inspired by https://medium.com/p/d49f037c8e3c/responses/show (hopefully the regex is updated there when this one breaks) # also exists as a django-cms plugin at https://github.com/k-funk/djangocms-scrape-google-photos-album import logging import re import requests logger = logging.getLogger(__name__) # originally this was 139min chars. not actually sure the length they can be REGEX = r"(https:\/\/lh3\.googleusercontent\.com\/[a-zA-Z0-9\-_]{128,})" def get_photos_from_html(html): # first and last elements are the album cover return re.findall(REGEX, html)[1:-1] def get_photo_urls(album_url): logger.info('Scraping Google Photos album at: {}'.format(album_url)) try: r = requests.get(album_url) photo_urls = get_photos_from_html(r.text) or [] if not len(photo_urls): raise Exception('No photos found.') logger.info("# of images: {}".format(len(photo_urls))) photo_urls.reverse() # makes the order appear the way it does on the website return photo_urls except Exception as err: logger.error('Google Photos scraping failed:\n{}'.format(str(err))) return [] if __name__ == "__main__": print(get_photo_urls('https://photos.app.goo.gl/...'))