# inspired by https://medium.com/p/d49f037c8e3c/responses/show (hopefully the regex is updated there when this one breaks)
# also exists as a django-cms plugin at https://github.com/k-funk/djangocms-scrape-google-photos-album

import logging
import re

import requests

logger = logging.getLogger(__name__)

# originally this was 139min chars. not actually sure the length they can be
REGEX = r"(https:\/\/lh3\.googleusercontent\.com\/[a-zA-Z0-9\-_]{128,})"


def get_photos_from_html(html):
    # first and last elements are the album cover
    return re.findall(REGEX, html)[1:-1]


def get_photo_urls(album_url):
    logger.info('Scraping Google Photos album at: {}'.format(album_url))

    try:
        r = requests.get(album_url)

        photo_urls = get_photos_from_html(r.text) or []
        if not len(photo_urls):
            raise Exception('No photos found.')
        logger.info("# of images: {}".format(len(photo_urls)))

        photo_urls.reverse()  # makes the order appear the way it does on the website

        return photo_urls
    except Exception as err:
        logger.error('Google Photos scraping failed:\n{}'.format(str(err)))
    return []
    
if __name__ == "__main__":
  print(get_photo_urls('https://photos.app.goo.gl/...'))