Skip to content

Instantly share code, notes, and snippets.

@palindrom615
Last active May 1, 2020 18:44
Show Gist options
  • Select an option

  • Save palindrom615/d16082ad8d9dd6841eaf20b6e9c1f6f7 to your computer and use it in GitHub Desktop.

Select an option

Save palindrom615/d16082ad8d9dd6841eaf20b6e9c1f6f7 to your computer and use it in GitHub Desktop.

Revisions

  1. palindrom615 revised this gist May 1, 2020. 3 changed files with 97 additions and 49 deletions.
    3 changes: 3 additions & 0 deletions .gitignore
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,3 @@
    .vscode
    img
    .DS_Store
    49 changes: 0 additions & 49 deletions app.py
    Original file line number Diff line number Diff line change
    @@ -1,49 +0,0 @@
    #!/usr/bin/env python3

    from pip._internal import main as pipmain

    pipmain(['install', 'requests'])

    import requests
    # from multiprocessing.pool import ThreadPool
    # from multiprocessing import Pool
    import os, sys

    def pager(cid, sq):
    pager_url = "https://entertain.naver.com/photo/issueItemList.json"
    page = 1
    entries = ['__dummy_initial_value__']
    while entries:
    print('page: ' + str(page), end="\r")
    params = {'cid': cid, 'page': page}
    entries = requests.get(pager_url, params=params).json()['results'][0]['thumbnails']
    page += 1
    yield [entry['thumbUrl'].split('?')[0] for entry in entries if sq in entry['title']]

    def download(url, download_path):
    name = download_path + '/' + url.split('/')[-1]
    with requests.get(url) as res:
    with open(name, 'wb') as file:
    for chunk in res:
    file.write(chunk)
    return name

    def main():
    #cid = sys.argv[1]
    CID = "1047153"
    idol_name = sys.argv[1]
    download_path = os.path.dirname(os.path.realpath(__file__)) + '/img/'+ idol_name

    if not os.path.exists(download_path):
    os.makedirs(download_path)

    # with ThreadPool(8) as t:
    for urls in pager(CID, idol_name):
    if urls:
    file=open(download_path + '/urls.txt', 'a')
    file.write('\n'.join(urls) + '\n')
    for url in urls:
    download(url, download_path)
    # t.imap_unordered(download, i)

    main()
    94 changes: 94 additions & 0 deletions dl.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,94 @@
    #!/usr/bin/env python3

    from pip._internal import main as pipmain

    pipmain(['install', 'requests'])

    import sys
    import os
    from queue import Queue
    import threading
    # import aiohttp
    # import asyncio
    import requests
    import time

    dlqueue = Queue()


    class ProducerThread(threading.Thread):
    def __init__(self, cid, search_query, total_thread, this_thread_num):
    super(ProducerThread, self).__init__()
    self.cid = cid
    self.search_query = search_query
    self.total_thread = total_thread
    self.this_thread_num = this_thread_num
    return

    def run(self):
    cid = self.cid
    search_query = self.search_query
    pager_url = "https://entertain.naver.com/photo/issueItemList.json"
    page = self.this_thread_num
    while True:
    print('page: ' + str(page), end="\r")
    params = {'cid': cid, 'page': page}
    entries = requests.get(pager_url, params=params).json()[
    'results'][0]['thumbnails']
    if not entries:
    break
    page += self.total_thread
    urls = [entry['thumbUrl'].split('?')[0]
    for entry in entries if search_query in entry['title']]
    for url in urls:
    dlqueue.put(url)
    return


    class ConsumerThread(threading.Thread):
    def __init__(self, download_path):
    super(ConsumerThread, self).__init__()
    self.download_path = download_path
    return

    def run(self):
    while not dlqueue.empty():
    url = dlqueue.get()
    download(url, self.download_path)
    return

    def download(url, download_path):
    name = download_path + '/' + url.split('/')[-1]
    with requests.get(url) as res:
    with open(name, 'wb') as file:
    for chunk in res:
    file.write(chunk)
    return name

    async def download_async(url, download_path):
    filename = download_path + '/' + url.split('/')[-1]
    async with aiohttp.ClientSession() as session:
    async with session.get(url) as res:
    with open(filename, 'wb') as file:
    for chunk in res:
    file.write(chunk)
    return filename

    if __name__ == '__main__':
    #cid = sys.argv[1]
    CID = "1047153"
    idol_name = sys.argv[1]
    download_path = os.path.dirname(
    os.path.realpath(__file__)) + '/img/' + idol_name

    if not os.path.exists(download_path):
    os.makedirs(download_path)
    total_thread = 8
    for i in range(total_thread):
    p = ProducerThread(cid=CID, search_query=idol_name, total_thread=total_thread, this_thread_num=i)
    c = ConsumerThread(download_path=download_path)

    p.start()
    time.sleep(2)
    c.start()
    time.sleep(2)
  2. palindrom615 revised this gist Sep 3, 2019. 1 changed file with 35 additions and 28 deletions.
    63 changes: 35 additions & 28 deletions app.py
    Original file line number Diff line number Diff line change
    @@ -1,42 +1,49 @@
    import requests
    from multiprocessing.pool import ThreadPool
    from multiprocessing import Pool
    import os, sys
    #!/usr/bin/env python3

    URL = "https://entertain.naver.com/photo/issueItemList.json"
    #cid = sys.argv[1]
    cid = "1047153"
    i_name = sys.argv[1]
    from pip._internal import main as pipmain

    this_dir = os.path.dirname(os.path.realpath(__file__))
    dl_dir = this_dir + '/img/'+ i_name
    pipmain(['install', 'requests'])

    if not os.path.exists(dl_dir):
    os.makedirs(dl_dir)
    import requests
    # from multiprocessing.pool import ThreadPool
    # from multiprocessing import Pool
    import os, sys

    def pager(cid):
    def pager(cid, sq):
    pager_url = "https://entertain.naver.com/photo/issueItemList.json"
    page = 1
    while True:
    entries = ['__dummy_initial_value__']
    while entries:
    print('page: ' + str(page), end="\r")
    params = {'cid': cid, 'page': page}
    entries = requests.get(URL, params=params).json()['results'][0]['thumbnails']
    if len(entries) == 0:
    break
    entries = requests.get(pager_url, params=params).json()['results'][0]['thumbnails']
    page += 1
    yield [x['thumbUrl'].split('?')[0] for x in entries if i_name in x['title']]
    yield [entry['thumbUrl'].split('?')[0] for entry in entries if sq in entry['title']]

    def download(url):
    name = dl_dir + '/' + url.split('/')[-1]
    print(name)
    def download(url, download_path):
    name = download_path + '/' + url.split('/')[-1]
    with requests.get(url) as res:
    with open(name, 'wb') as file:
    for chunk in res:
    file.write(chunk)
    return name

    # with ThreadPool(8) as t:
    for i in pager(cid):
    file=open(dl_dir + '/urls.txt', 'a')
    file.write('\n' + '\n'.join(i))
    for url in i:
    download(url)
    # t.imap_unordered(download, i)
    def main():
    #cid = sys.argv[1]
    CID = "1047153"
    idol_name = sys.argv[1]
    download_path = os.path.dirname(os.path.realpath(__file__)) + '/img/'+ idol_name

    if not os.path.exists(download_path):
    os.makedirs(download_path)

    # with ThreadPool(8) as t:
    for urls in pager(CID, idol_name):
    if urls:
    file=open(download_path + '/urls.txt', 'a')
    file.write('\n'.join(urls) + '\n')
    for url in urls:
    download(url, download_path)
    # t.imap_unordered(download, i)

    main()
  3. palindrom615 revised this gist Sep 3, 2019. 1 changed file with 9 additions and 8 deletions.
    17 changes: 9 additions & 8 deletions app.py
    Original file line number Diff line number Diff line change
    @@ -4,11 +4,12 @@
    import os, sys

    URL = "https://entertain.naver.com/photo/issueItemList.json"
    cid = sys.argv[1]
    # cid = "1058865"
    #cid = sys.argv[1]
    cid = "1047153"
    i_name = sys.argv[1]

    this_dir = os.path.dirname(sys.argv[0])
    dl_dir = this_dir + '/img/'+ cid
    this_dir = os.path.dirname(os.path.realpath(__file__))
    dl_dir = this_dir + '/img/'+ i_name

    if not os.path.exists(dl_dir):
    os.makedirs(dl_dir)
    @@ -17,15 +18,15 @@ def pager(cid):
    page = 1
    while True:
    params = {'cid': cid, 'page': page}
    response = requests.get(URL, params=params).json()
    res = list(map(lambda x: x['thumbUrl'].split('?')[0], response['results'][0]['thumbnails']))
    yield res
    if len(res) == 0:
    entries = requests.get(URL, params=params).json()['results'][0]['thumbnails']
    if len(entries) == 0:
    break
    page += 1
    yield [x['thumbUrl'].split('?')[0] for x in entries if i_name in x['title']]

    def download(url):
    name = dl_dir + '/' + url.split('/')[-1]
    print(name)
    with requests.get(url) as res:
    with open(name, 'wb') as file:
    for chunk in res:
  4. palindrom615 created this gist Sep 2, 2019.
    41 changes: 41 additions & 0 deletions app.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,41 @@
    import requests
    from multiprocessing.pool import ThreadPool
    from multiprocessing import Pool
    import os, sys

    URL = "https://entertain.naver.com/photo/issueItemList.json"
    cid = sys.argv[1]
    # cid = "1058865"

    this_dir = os.path.dirname(sys.argv[0])
    dl_dir = this_dir + '/img/'+ cid

    if not os.path.exists(dl_dir):
    os.makedirs(dl_dir)

    def pager(cid):
    page = 1
    while True:
    params = {'cid': cid, 'page': page}
    response = requests.get(URL, params=params).json()
    res = list(map(lambda x: x['thumbUrl'].split('?')[0], response['results'][0]['thumbnails']))
    yield res
    if len(res) == 0:
    break
    page += 1

    def download(url):
    name = dl_dir + '/' + url.split('/')[-1]
    with requests.get(url) as res:
    with open(name, 'wb') as file:
    for chunk in res:
    file.write(chunk)
    return name

    # with ThreadPool(8) as t:
    for i in pager(cid):
    file=open(dl_dir + '/urls.txt', 'a')
    file.write('\n' + '\n'.join(i))
    for url in i:
    download(url)
    # t.imap_unordered(download, i)