Last active
September 27, 2020 08:13
-
-
Save jianjieluo/7b5f07992fe408a6201a1a90d25dc8ce to your computer and use it in GitHub Desktop.
Revisions
-
Jianjie(JJ) Luo revised this gist
Sep 27, 2020 . 1 changed file with 3 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -42,14 +42,14 @@ def main(): if not os.path.exists(output_dir): os.makedirs(output_dir) total_len = min(len(lines), args.end - args.begin + 1) with tqdm(total=total_len, ascii=True) as pbar: for i, line in enumerate(lines): if i < args.begin or i > args.end: continue pbar.update(1) gid = '%s_%d' % (split, i) gifUrl = line -
Jianjie(JJ) Luo created this gist
Sep 25, 2020 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,84 @@ import os import sys import csv import argparse import numpy as np import urllib.request #from urllib.request import urlopen import socket socket.setdefaulttimeout(10) from urllib.parse import quote import urllib.request, urllib.error import requests from tqdm import tqdm OUTPUT_ROOT = 'gifs' def parse_args(): parser = argparse.ArgumentParser( description='Arg parser' ) parser.add_argument('--url', default='data/splits/val.txt', type=str) parser.add_argument('--begin', default=0,type=int) parser.add_argument('--end', default=80000,type=int) return parser.parse_args() def main(): args = parse_args() print(args) opener=urllib.request.build_opener() opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')] urllib.request.install_opener(opener) with open(args.url) as fid: lines = [line.strip() for line in fid] fid1 = open('bad_gid_HTTP_' + str(args.begin) + '.txt', 'w') fid2 = open('bad_gid_URL_' + str(args.begin) + '.txt', 'w') split = os.path.basename(args.url).split('.')[0] output_dir = os.path.join(OUTPUT_ROOT, split) if not os.path.exists(output_dir): os.makedirs(output_dir) total_len = min(len(lines), args.end - args.begin) with tqdm(total=total_len, ascii=True) as pbar: for i, line in enumerate(lines): pbar.update(1) if i < args.begin or i > args.end: continue gid = '%s_%d' % (split, i) gifUrl = line url_ext = gifUrl.split('.')[-1] if url_ext == 'gifv' or url_ext == 'gif': ext = url_ext else: ext = 'gif' out_path = os.path.join(output_dir, gid + '.' + ext) if os.path.exists(out_path): continue #urllib.request.urlretrieve(quote(gifUrl, safe=':/=&?'), out_path) try: urllib.request.urlretrieve(quote(gifUrl, safe=':/=&?'), out_path) except urllib.error.HTTPError as e: print('HTTPError: {}'.format(e.code)) fid1.write(gid + '\t' + str(e.code) +'\n') except urllib.error.URLError as e: print('URLError: {}'.format(e.reason)) fid2.write(gid + '\n') except: print('Can not download the ' + gid + ' gif') else: pass print('finish') if __name__ == '__main__': main()