Last active
May 1, 2020 18:44
-
-
Save palindrom615/d16082ad8d9dd6841eaf20b6e9c1f6f7 to your computer and use it in GitHub Desktop.
Revisions
-
palindrom615 revised this gist
May 1, 2020 . 3 changed files with 97 additions and 49 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,3 @@ .vscode img .DS_Store This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,49 +0,0 @@ This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,94 @@ #!/usr/bin/env python3 from pip._internal import main as pipmain pipmain(['install', 'requests']) import sys import os from queue import Queue import threading # import aiohttp # import asyncio import requests import time dlqueue = Queue() class ProducerThread(threading.Thread): def __init__(self, cid, search_query, total_thread, this_thread_num): super(ProducerThread, self).__init__() self.cid = cid self.search_query = search_query self.total_thread = total_thread self.this_thread_num = this_thread_num return def run(self): cid = self.cid search_query = self.search_query pager_url = "https://entertain.naver.com/photo/issueItemList.json" page = self.this_thread_num while True: print('page: ' + str(page), end="\r") params = {'cid': cid, 'page': page} entries = requests.get(pager_url, params=params).json()[ 'results'][0]['thumbnails'] if not entries: break page += self.total_thread urls = [entry['thumbUrl'].split('?')[0] for entry in entries if search_query in entry['title']] for url in urls: dlqueue.put(url) return class ConsumerThread(threading.Thread): def __init__(self, download_path): super(ConsumerThread, self).__init__() self.download_path = download_path return def run(self): while not dlqueue.empty(): url = dlqueue.get() download(url, self.download_path) return def download(url, download_path): name = download_path + '/' + url.split('/')[-1] with requests.get(url) as res: with open(name, 'wb') as file: for chunk in res: file.write(chunk) return name async def download_async(url, download_path): filename = download_path + '/' + url.split('/')[-1] async with aiohttp.ClientSession() as session: async with session.get(url) as res: with open(filename, 'wb') as file: for chunk in res: file.write(chunk) return filename if __name__ == '__main__': #cid = sys.argv[1] CID = "1047153" idol_name = sys.argv[1] download_path = os.path.dirname( os.path.realpath(__file__)) + '/img/' + idol_name if not os.path.exists(download_path): os.makedirs(download_path) total_thread = 8 for i in range(total_thread): p = ProducerThread(cid=CID, search_query=idol_name, total_thread=total_thread, this_thread_num=i) c = ConsumerThread(download_path=download_path) p.start() time.sleep(2) c.start() time.sleep(2) -
palindrom615 revised this gist
Sep 3, 2019 . 1 changed file with 35 additions and 28 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,42 +1,49 @@ #!/usr/bin/env python3 from pip._internal import main as pipmain pipmain(['install', 'requests']) import requests # from multiprocessing.pool import ThreadPool # from multiprocessing import Pool import os, sys def pager(cid, sq): pager_url = "https://entertain.naver.com/photo/issueItemList.json" page = 1 entries = ['__dummy_initial_value__'] while entries: print('page: ' + str(page), end="\r") params = {'cid': cid, 'page': page} entries = requests.get(pager_url, params=params).json()['results'][0]['thumbnails'] page += 1 yield [entry['thumbUrl'].split('?')[0] for entry in entries if sq in entry['title']] def download(url, download_path): name = download_path + '/' + url.split('/')[-1] with requests.get(url) as res: with open(name, 'wb') as file: for chunk in res: file.write(chunk) return name def main(): #cid = sys.argv[1] CID = "1047153" idol_name = sys.argv[1] download_path = os.path.dirname(os.path.realpath(__file__)) + '/img/'+ idol_name if not os.path.exists(download_path): os.makedirs(download_path) # with ThreadPool(8) as t: for urls in pager(CID, idol_name): if urls: file=open(download_path + '/urls.txt', 'a') file.write('\n'.join(urls) + '\n') for url in urls: download(url, download_path) # t.imap_unordered(download, i) main() -
palindrom615 revised this gist
Sep 3, 2019 . 1 changed file with 9 additions and 8 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -4,11 +4,12 @@ import os, sys URL = "https://entertain.naver.com/photo/issueItemList.json" #cid = sys.argv[1] cid = "1047153" i_name = sys.argv[1] this_dir = os.path.dirname(os.path.realpath(__file__)) dl_dir = this_dir + '/img/'+ i_name if not os.path.exists(dl_dir): os.makedirs(dl_dir) @@ -17,15 +18,15 @@ def pager(cid): page = 1 while True: params = {'cid': cid, 'page': page} entries = requests.get(URL, params=params).json()['results'][0]['thumbnails'] if len(entries) == 0: break page += 1 yield [x['thumbUrl'].split('?')[0] for x in entries if i_name in x['title']] def download(url): name = dl_dir + '/' + url.split('/')[-1] print(name) with requests.get(url) as res: with open(name, 'wb') as file: for chunk in res: -
palindrom615 created this gist
Sep 2, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,41 @@ import requests from multiprocessing.pool import ThreadPool from multiprocessing import Pool import os, sys URL = "https://entertain.naver.com/photo/issueItemList.json" cid = sys.argv[1] # cid = "1058865" this_dir = os.path.dirname(sys.argv[0]) dl_dir = this_dir + '/img/'+ cid if not os.path.exists(dl_dir): os.makedirs(dl_dir) def pager(cid): page = 1 while True: params = {'cid': cid, 'page': page} response = requests.get(URL, params=params).json() res = list(map(lambda x: x['thumbUrl'].split('?')[0], response['results'][0]['thumbnails'])) yield res if len(res) == 0: break page += 1 def download(url): name = dl_dir + '/' + url.split('/')[-1] with requests.get(url) as res: with open(name, 'wb') as file: for chunk in res: file.write(chunk) return name # with ThreadPool(8) as t: for i in pager(cid): file=open(dl_dir + '/urls.txt', 'a') file.write('\n' + '\n'.join(i)) for url in i: download(url) # t.imap_unordered(download, i)