Skip to content

Instantly share code, notes, and snippets.

@jianjieluo
Last active September 27, 2020 08:13
Show Gist options
  • Select an option

  • Save jianjieluo/7b5f07992fe408a6201a1a90d25dc8ce to your computer and use it in GitHub Desktop.

Select an option

Save jianjieluo/7b5f07992fe408a6201a1a90d25dc8ce to your computer and use it in GitHub Desktop.

Revisions

  1. Jianjie(JJ) Luo revised this gist Sep 27, 2020. 1 changed file with 3 additions and 3 deletions.
    6 changes: 3 additions & 3 deletions download_tgif_test.py
    Original file line number Diff line number Diff line change
    @@ -42,14 +42,14 @@ def main():
    if not os.path.exists(output_dir):
    os.makedirs(output_dir)

    total_len = min(len(lines), args.end - args.begin)
    total_len = min(len(lines), args.end - args.begin + 1)
    with tqdm(total=total_len, ascii=True) as pbar:
    for i, line in enumerate(lines):
    pbar.update(1)

    if i < args.begin or i > args.end:
    continue

    pbar.update(1)

    gid = '%s_%d' % (split, i)
    gifUrl = line

  2. Jianjie(JJ) Luo created this gist Sep 25, 2020.
    84 changes: 84 additions & 0 deletions download_tgif_test.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,84 @@
    import os
    import sys
    import csv
    import argparse
    import numpy as np
    import urllib.request
    #from urllib.request import urlopen
    import socket
    socket.setdefaulttimeout(10)
    from urllib.parse import quote
    import urllib.request, urllib.error
    import requests
    from tqdm import tqdm

    OUTPUT_ROOT = 'gifs'

    def parse_args():
    parser = argparse.ArgumentParser(
    description='Arg parser'
    )
    parser.add_argument('--url', default='data/splits/val.txt', type=str)
    parser.add_argument('--begin', default=0,type=int)
    parser.add_argument('--end', default=80000,type=int)
    return parser.parse_args()

    def main():
    args = parse_args()
    print(args)

    opener=urllib.request.build_opener()
    opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
    urllib.request.install_opener(opener)

    with open(args.url) as fid:
    lines = [line.strip() for line in fid]

    fid1 = open('bad_gid_HTTP_' + str(args.begin) + '.txt', 'w')
    fid2 = open('bad_gid_URL_' + str(args.begin) + '.txt', 'w')

    split = os.path.basename(args.url).split('.')[0]
    output_dir = os.path.join(OUTPUT_ROOT, split)
    if not os.path.exists(output_dir):
    os.makedirs(output_dir)

    total_len = min(len(lines), args.end - args.begin)
    with tqdm(total=total_len, ascii=True) as pbar:
    for i, line in enumerate(lines):
    pbar.update(1)

    if i < args.begin or i > args.end:
    continue

    gid = '%s_%d' % (split, i)
    gifUrl = line

    url_ext = gifUrl.split('.')[-1]
    if url_ext == 'gifv' or url_ext == 'gif':
    ext = url_ext
    else:
    ext = 'gif'
    out_path = os.path.join(output_dir, gid + '.' + ext)

    if os.path.exists(out_path):
    continue

    #urllib.request.urlretrieve(quote(gifUrl, safe=':/=&?'), out_path)
    try:
    urllib.request.urlretrieve(quote(gifUrl, safe=':/=&?'), out_path)
    except urllib.error.HTTPError as e:
    print('HTTPError: {}'.format(e.code))
    fid1.write(gid + '\t' + str(e.code) +'\n')
    except urllib.error.URLError as e:
    print('URLError: {}'.format(e.reason))
    fid2.write(gid + '\n')
    except:
    print('Can not download the ' + gid + ' gif')
    else:
    pass


    print('finish')

    if __name__ == '__main__':
    main()