Skip to content

Instantly share code, notes, and snippets.

@PathToLife
Created April 17, 2020 18:14
Show Gist options
  • Select an option

  • Save PathToLife/e7945b11d07c2032b2ed6d5e0b753834 to your computer and use it in GitHub Desktop.

Select an option

Save PathToLife/e7945b11d07c2032b2ed6d5e0b753834 to your computer and use it in GitHub Desktop.

Revisions

  1. PathToLife created this gist Apr 17, 2020.
    41 changes: 41 additions & 0 deletions threaded_downloader.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,41 @@
    import os
    import requests
    from time import time as timer
    from typing import List, Tuple
    from multiprocessing.pool import ThreadPool
    import shutil


    def download_file(entry: Tuple[str, str]):
    path, uri = entry

    dirname = os.path.dirname(path)

    if not os.path.exists(dirname):
    try:
    os.makedirs(dirname)
    except FileExistsError:
    pass

    if not os.path.exists(path):
    r = requests.get(uri, stream=True)
    if r.status_code == 200:
    with open(path + '.crdownload', 'wb') as f:
    for chunk in r:
    f.write(chunk)
    shutil.move(path + '.crdownload', path)
    return path


    def bulk_download(path_url_list: List[Tuple[str, str]]):

    start = timer()

    print(f"Downloading {len(path_url_list)} files...")

    results = ThreadPool(8).imap_unordered(download_file, path_url_list)

    for res in results:
    print(res)

    print(f"Elapsed Time: {timer() - start}")