Skip to content

Instantly share code, notes, and snippets.

@takidog
Last active December 30, 2020 06:35
Show Gist options
  • Select an option

  • Save takidog/e1dee25fb87c8a81ecb3370360274d77 to your computer and use it in GitHub Desktop.

Select an option

Save takidog/e1dee25fb87c8a81ecb3370360274d77 to your computer and use it in GitHub Desktop.

Revisions

  1. takidog revised this gist Dec 30, 2020. 1 changed file with 2 additions and 1 deletion.
    3 changes: 2 additions & 1 deletion crawler.py
    Original file line number Diff line number Diff line change
    @@ -11,7 +11,8 @@ def download_from_list(path: str, prefix: str, urls: list):
    img_request = requests.get(url_info['image_url'])
    sub_filename = '.png'
    if url_info.get("type", "") == 'animation':
    sub_filename = '.gif'
    # animation type is apng
    sub_filename = '_apng.png'
    with open(f"{path}{prefix}{url_info['image_id']}{sub_filename}", 'wb') as e:
    e.write(img_request.content)

  2. takidog revised this gist Dec 29, 2020. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion crawler.py
    Original file line number Diff line number Diff line change
    @@ -77,5 +77,5 @@ def get_urls_from_store(store_url: str) -> list:
    download_from_list(
    path="download/",
    prefix="animate_",
    urls=static_urls
    urls=animation_sticker_urls
    )
  3. takidog created this gist Dec 29, 2020.
    81 changes: 81 additions & 0 deletions crawler.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,81 @@
    import requests
    from lxml import etree
    import json


    def download_from_list(path: str, prefix: str, urls: list):
    # Just for debug.
    # Urls : list<dict>
    # Can use axel or any downloader.
    for url_info in urls:
    img_request = requests.get(url_info['image_url'])
    sub_filename = '.png'
    if url_info.get("type", "") == 'animation':
    sub_filename = '.gif'
    with open(f"{path}{prefix}{url_info['image_id']}{sub_filename}", 'wb') as e:
    e.write(img_request.content)


    def get_urls_from_store(store_url: str) -> list:
    """Get sticker from store.
    Args:
    store_url (str): store url.
    Raises:
    ValueError: status code error.
    Returns:
    list: stick urls. [
    {
    "image_id":"1234",
    "image_url":"https://stick"
    }
    ]
    """

    html = requests.get(store_url)
    if html.status_code != 200:
    raise ValueError("Get store html error.")

    # init etree xpath
    root = etree.HTML(html.text)
    animation_sticker_urls = []
    static_urls = []

    root = root.xpath("//li[@data-preview]")
    # animate
    for i in root:
    if i.get("data-preview"):
    img_info = json.loads(i.get("data-preview"))
    static_urls.append(
    {
    "type": "static",
    "image_id": img_info.get("id", None),
    "image_url": img_info.get("staticUrl", None),
    }
    )
    animation_sticker_urls.append(
    {
    "type": "animation",
    "image_id": img_info.get("id", None),
    "image_url": img_info.get("animationUrl", None),
    }
    )

    return static_urls, animation_sticker_urls


    if __name__ == "__main__":
    static_urls, animation_sticker_urls = get_urls_from_store(
    "")
    download_from_list(
    path="download/",
    prefix="static_",
    urls=static_urls
    )
    download_from_list(
    path="download/",
    prefix="animate_",
    urls=static_urls
    )