Last active
November 2, 2022 13:40
-
-
Save k-funk/b71d6a685201b96f50fe0a83c0e97aa6 to your computer and use it in GitHub Desktop.
Revisions
-
k-funk revised this gist
Dec 20, 2019 . 1 changed file with 1 addition and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,3 +1,4 @@ # inspired by https://medium.com/p/d49f037c8e3c/responses/show (hopefully the regex is updated there when this one breaks) # also exists as a django-cms plugin at https://github.com/k-funk/djangocms-scrape-google-photos-album import logging -
k-funk created this gist
Dec 18, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,38 @@ # also exists as a django-cms plugin at https://github.com/k-funk/djangocms-scrape-google-photos-album import logging import re import requests logger = logging.getLogger(__name__) # originally this was 139min chars. not actually sure the length they can be REGEX = r"(https:\/\/lh3\.googleusercontent\.com\/[a-zA-Z0-9\-_]{128,})" def get_photos_from_html(html): # first and last elements are the album cover return re.findall(REGEX, html)[1:-1] def get_photo_urls(album_url): logger.info('Scraping Google Photos album at: {}'.format(album_url)) try: r = requests.get(album_url) photo_urls = get_photos_from_html(r.text) or [] if not len(photo_urls): raise Exception('No photos found.') logger.info("# of images: {}".format(len(photo_urls))) photo_urls.reverse() # makes the order appear the way it does on the website return photo_urls except Exception as err: logger.error('Google Photos scraping failed:\n{}'.format(str(err))) return [] if __name__ == "__main__": print(get_photo_urls('https://photos.app.goo.gl/...'))