palindrom615 · May 1, 2020 18:44 · May 1, 2020 · Sep 3, 2019 · Sep 3, 2019 · Sep 2, 2019
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+.vscode
+img
+.DS_Store
diff --git a/app.py b/app.py
@@ -1,49 +0,0 @@
-#!/usr/bin/env python3
-
-from pip._internal import main as pipmain
-
-pipmain(['install', 'requests'])
-
-import requests
-# from multiprocessing.pool import ThreadPool
-# from multiprocessing import Pool
-import os, sys
-
-def pager(cid, sq):
-    pager_url = "https://entertain.naver.com/photo/issueItemList.json"
-    page = 1
-    entries = ['__dummy_initial_value__']
-    while entries:
-        print('page: ' + str(page), end="\r")
-        params = {'cid': cid, 'page': page}
-        entries = requests.get(pager_url, params=params).json()['results'][0]['thumbnails']
-        page += 1
-        yield [entry['thumbUrl'].split('?')[0] for entry in entries if sq in entry['title']]
-
-def download(url, download_path):
-    name = download_path + '/' + url.split('/')[-1]
-    with requests.get(url) as res:
-        with open(name, 'wb') as file:
-            for chunk in res:
-                    file.write(chunk)
-    return name
-
-def main():
-    #cid = sys.argv[1]
-    CID = "1047153"
-    idol_name = sys.argv[1]
-    download_path = os.path.dirname(os.path.realpath(__file__)) + '/img/'+ idol_name
-
-    if not os.path.exists(download_path):
-        os.makedirs(download_path)
-
-    # with ThreadPool(8) as t:
-    for urls in pager(CID, idol_name):
-        if urls:
-            file=open(download_path + '/urls.txt', 'a')
-            file.write('\n'.join(urls) + '\n')
-            for url in urls:
-                download(url, download_path)
-                # t.imap_unordered(download, i)
-
-main()

diff --git a/dl.py b/dl.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+
+from pip._internal import main as pipmain
+
+pipmain(['install', 'requests'])
+
+import sys
+import os
+from queue import Queue
+import threading
+# import aiohttp
+# import asyncio
+import requests
+import time
+
+dlqueue = Queue()
+
+
+class ProducerThread(threading.Thread):
+    def __init__(self, cid, search_query, total_thread, this_thread_num):
+        super(ProducerThread, self).__init__()
+        self.cid = cid
+        self.search_query = search_query
+        self.total_thread = total_thread
+        self.this_thread_num = this_thread_num
+        return
+
+    def run(self):
+        cid = self.cid
+        search_query = self.search_query
+        pager_url = "https://entertain.naver.com/photo/issueItemList.json"
+        page = self.this_thread_num
+        while True:
+            print('page: ' + str(page), end="\r")
+            params = {'cid': cid, 'page': page}
+            entries = requests.get(pager_url, params=params).json()[
+                'results'][0]['thumbnails']
+            if not entries:
+                break
+            page += self.total_thread
+            urls = [entry['thumbUrl'].split('?')[0]
+                        for entry in entries if search_query in entry['title']]
+            for url in urls:
+                dlqueue.put(url)
+        return
+
+
+class ConsumerThread(threading.Thread):
+    def __init__(self, download_path):
+        super(ConsumerThread, self).__init__()
+        self.download_path = download_path
+        return
+
+    def run(self):
+        while not dlqueue.empty():
+            url = dlqueue.get()
+            download(url, self.download_path)
+        return
+
+def download(url, download_path):
+    name = download_path + '/' + url.split('/')[-1]
+    with requests.get(url) as res:
+        with open(name, 'wb') as file:
+            for chunk in res:
+                    file.write(chunk)
+    return name
+
+async def download_async(url, download_path):
+    filename = download_path + '/' + url.split('/')[-1]
+    async with aiohttp.ClientSession() as session:
+        async with session.get(url) as res:
+            with open(filename, 'wb') as file:
+                for chunk in res:
+                    file.write(chunk)
+    return filename
+
+if __name__ == '__main__':
+    #cid = sys.argv[1]
+    CID = "1047153"
+    idol_name = sys.argv[1]
+    download_path = os.path.dirname(
+        os.path.realpath(__file__)) + '/img/' + idol_name
+
+    if not os.path.exists(download_path):
+        os.makedirs(download_path)
+    total_thread = 8
+    for i in range(total_thread):
+        p = ProducerThread(cid=CID, search_query=idol_name, total_thread=total_thread, this_thread_num=i)
+        c = ConsumerThread(download_path=download_path)
+
+        p.start()
+        time.sleep(2)
+        c.start()
+        time.sleep(2)
diff --git a/app.py b/app.py
@@ -1,42 +1,49 @@
-import requests
-from multiprocessing.pool import ThreadPool
-from multiprocessing import Pool
-import os, sys
+#!/usr/bin/env python3
 
-URL = "https://entertain.naver.com/photo/issueItemList.json"
-#cid = sys.argv[1]
-cid = "1047153"
-i_name = sys.argv[1]
+from pip._internal import main as pipmain
 
-this_dir = os.path.dirname(os.path.realpath(__file__))
-dl_dir = this_dir + '/img/'+ i_name
+pipmain(['install', 'requests'])
 
-if not os.path.exists(dl_dir):
-    os.makedirs(dl_dir)
+import requests
+# from multiprocessing.pool import ThreadPool
+# from multiprocessing import Pool
+import os, sys
 
-def pager(cid):
+def pager(cid, sq):
+    pager_url = "https://entertain.naver.com/photo/issueItemList.json"
     page = 1
-    while True:
+    entries = ['__dummy_initial_value__']
+    while entries:
+        print('page: ' + str(page), end="\r")
         params = {'cid': cid, 'page': page}
-        entries = requests.get(URL, params=params).json()['results'][0]['thumbnails']
-        if len(entries) == 0:
-            break
+        entries = requests.get(pager_url, params=params).json()['results'][0]['thumbnails']
         page += 1
-        yield [x['thumbUrl'].split('?')[0] for x in entries if i_name in x['title']]
+        yield [entry['thumbUrl'].split('?')[0] for entry in entries if sq in entry['title']]
 
-def download(url):
-    name = dl_dir + '/' + url.split('/')[-1]
-    print(name)
+def download(url, download_path):
+    name = download_path + '/' + url.split('/')[-1]
     with requests.get(url) as res:
         with open(name, 'wb') as file:
             for chunk in res:
                     file.write(chunk)
     return name
 
-# with ThreadPool(8) as t:
-for i in pager(cid):
-    file=open(dl_dir + '/urls.txt', 'a')
-    file.write('\n' + '\n'.join(i))
-    for url in i:
-        download(url)
-        # t.imap_unordered(download, i)
+def main():
+    #cid = sys.argv[1]
+    CID = "1047153"
+    idol_name = sys.argv[1]
+    download_path = os.path.dirname(os.path.realpath(__file__)) + '/img/'+ idol_name
+
+    if not os.path.exists(download_path):
+        os.makedirs(download_path)
+
+    # with ThreadPool(8) as t:
+    for urls in pager(CID, idol_name):
+        if urls:
+            file=open(download_path + '/urls.txt', 'a')
+            file.write('\n'.join(urls) + '\n')
+            for url in urls:
+                download(url, download_path)
+                # t.imap_unordered(download, i)
+
+main()
diff --git a/app.py b/app.py
@@ -4,11 +4,12 @@
 import os, sys
 
 URL = "https://entertain.naver.com/photo/issueItemList.json"
-cid = sys.argv[1]
-# cid = "1058865"
+#cid = sys.argv[1]
+cid = "1047153"
+i_name = sys.argv[1]
 
-this_dir = os.path.dirname(sys.argv[0])
-dl_dir = this_dir + '/img/'+ cid
+this_dir = os.path.dirname(os.path.realpath(__file__))
+dl_dir = this_dir + '/img/'+ i_name
 
 if not os.path.exists(dl_dir):
     os.makedirs(dl_dir)
@@ -17,15 +18,15 @@ def pager(cid):
     page = 1
     while True:
         params = {'cid': cid, 'page': page}
-        response = requests.get(URL, params=params).json()
-        res = list(map(lambda x: x['thumbUrl'].split('?')[0], response['results'][0]['thumbnails']))
-        yield res
-        if len(res) == 0:
+        entries = requests.get(URL, params=params).json()['results'][0]['thumbnails']
+        if len(entries) == 0:
             break
         page += 1
+        yield [x['thumbUrl'].split('?')[0] for x in entries if i_name in x['title']]
 
 def download(url):
     name = dl_dir + '/' + url.split('/')[-1]
+    print(name)
     with requests.get(url) as res:
         with open(name, 'wb') as file:
             for chunk in res:

diff --git a/app.py b/app.py
@@ -0,0 +1,41 @@
+import requests
+from multiprocessing.pool import ThreadPool
+from multiprocessing import Pool
+import os, sys
+
+URL = "https://entertain.naver.com/photo/issueItemList.json"
+cid = sys.argv[1]
+# cid = "1058865"
+
+this_dir = os.path.dirname(sys.argv[0])
+dl_dir = this_dir + '/img/'+ cid
+
+if not os.path.exists(dl_dir):
+    os.makedirs(dl_dir)
+
+def pager(cid):
+    page = 1
+    while True:
+        params = {'cid': cid, 'page': page}
+        response = requests.get(URL, params=params).json()
+        res = list(map(lambda x: x['thumbUrl'].split('?')[0], response['results'][0]['thumbnails']))
+        yield res
+        if len(res) == 0:
+            break
+        page += 1
+
+def download(url):
+    name = dl_dir + '/' + url.split('/')[-1]
+    with requests.get(url) as res:
+        with open(name, 'wb') as file:
+            for chunk in res:
+                    file.write(chunk)
+    return name
+
+# with ThreadPool(8) as t:
+for i in pager(cid):
+    file=open(dl_dir + '/urls.txt', 'a')
+    file.write('\n' + '\n'.join(i))
+    for url in i:
+        download(url)
+        # t.imap_unordered(download, i)
No results found