-
-
Save azam/4de9426ff3beb379ae9d to your computer and use it in GitHub Desktop.
Revisions
-
azamshul revised this gist
Sep 6, 2014 . 1 changed file with 46 additions and 31 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -7,29 +7,44 @@ import shutil import urllib2 import socket import json import time import os USERNAME = "your_username_goes_here" TMP_FILE_NAME = "tmpfile" MAX_RETRIES = 5 SLEEP_TIME = 2 TIMEOUT = 5000 page = 1 has_more_page = True # Target Page api = "https://api.twitpic.com/2/users/show.json?username=%s&page=" % USERNAME # Get the data about the target page while has_more_page: print "Processing page: " + str(page) has_page_error = True for i in range(MAX_RETRIES): try: raw_data = urllib2.urlopen(api + str(page), timeout=TIMEOUT) has_page_error = False if i > 0: print "Retry successful page: " + str(page) break except urllib2.URLError, e: print "Failed retrieving page: " + str(page) time.sleep(SLEEP_TIME) except socket.timeout: print "Timeout retrieving page: " + str(page) time.sleep(SLEEP_TIME) if has_page_error: has_more_page = False break json_data = json.load(raw_data) @@ -41,12 +56,6 @@ # Get the info about each image on the page images = json_data["images"] page += 1 for item in images: @@ -57,24 +66,30 @@ file_name = file_id + "." + file_type if not os.path.exists(file_name): for i in range(MAX_RETRIES): # Remove temp file if exists try: os.remove(TMP_FILE_NAME) except OSError: pass try: # Save the file to temporary file req = urllib2.urlopen(file_url, timeout=TIMEOUT) with open(TMP_FILE_NAME, "wb") as tmp_file: shutil.copyfileobj(req, tmp_file) # Rename to actual file os.rename(TMP_FILE_NAME, file_name) # Set the file time os.utime(file_name,(file_time, file_time)) if i > 0: print "Retry successful for image ID: " + file_id break except urllib2.URLError, e: print "Failed retrieving image ID: " + file_id time.sleep(SLEEP_TIME) except socket.timeout: print "Timeout retrieving image ID: " + file_id time.sleep(SLEEP_TIME) -
azamshul revised this gist
Sep 5, 2014 . 1 changed file with 20 additions and 10 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -5,7 +5,7 @@ # # License: MIT import shutil import urllib2 import json import time @@ -25,7 +25,12 @@ # Get the data about the target page while has_more_page: print "Processing page: " + str(page) try: raw_data = urllib2.urlopen(api + str(page)) except urllib2.URLError, e: print "Failed retrieving page: " + str(page) break json_data = json.load(raw_data) # Save the page data @@ -58,13 +63,18 @@ except OSError: pass try: # Save the file to temporary file req = urllib2.urlopen(file_url) with open(TMP_FILE_NAME, "wb") as tmp_file: shutil.copyfileobj(req, tmp_file) # Rename to actual file os.rename(TMP_FILE_NAME, file_name) # Set the file time os.utime(file_name,(file_time, file_time)) except urllib2.URLError, e: print "Failed retrieving image ID: " + file_id else: print "Skipped image ID: " + file_id -
azamshul revised this gist
Sep 5, 2014 . 1 changed file with 32 additions and 7 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -12,14 +12,19 @@ import os USERNAME = "your_username_goes_here" TMP_FILE_NAME = "tmpfile" page = 1 has_more_page = True photo_count = -1 processed_photo_count = 0 # Target Page api = "https://api.twitpic.com/2/users/show.json?username=%s&page=" % USERNAME # Get the data about the target page while has_more_page: print "Processing page: " + str(page) raw_data = urllib2.urlopen(api + str(page)) json_data = json.load(raw_data) @@ -31,15 +36,35 @@ # Get the info about each image on the page images = json_data["images"] # Update photo count photo_count = int(json_data["photo_count"]) processed_photo_count += len(images) # Check if there is more page has_more_page = processed_photo_count < photo_count page += 1 for item in images: file_id = item["short_id"] file_type = item["type"] file_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S")) file_url = "https://twitpic.com/show/full/"+file_id file_name = file_id + "." + file_type if not os.path.exists(file_name): # Remove temp file if exists try: os.remove(TMP_FILE_NAME) except OSError: pass # Save the file to temporary file urllib.urlretrieve (file_url, TMP_FILE_NAME) # Rename to actual file os.rename(TMP_FILE_NAME, file_name) # Set the file time os.utime(file_name,(file_time, file_time)) else: print "Skipped image ID: " + file_id -
hugs revised this gist
Sep 5, 2014 . 1 changed file with 3 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -2,6 +2,8 @@ # # A cleaned-up fork of Terence Eden's original archiver: # http://shkspr.mobi/blog/2013/08/exporting-twitpic-images-python/ # # License: MIT import urllib import urllib2 @@ -33,7 +35,7 @@ file_id = item["short_id"] file_type = item["type"] file_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S")) file_url = "https://twitpic.com/show/full/"+file_id file_name = file_id + "." + file_type # Save the file -
hugs revised this gist
Sep 5, 2014 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -33,7 +33,7 @@ file_id = item["short_id"] file_type = item["type"] file_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S")) file_url = "http://twitpic.com/show/full/"+file_id file_name = file_id + "." + file_type # Save the file -
hugs revised this gist
Sep 5, 2014 . 1 changed file with 4 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -9,7 +9,7 @@ import time import os USERNAME = "your_username_goes_here" NUMBER_OF_PAGES_TO_DOWNLOAD = 5 # Target Page @@ -22,18 +22,18 @@ json_data = json.load(raw_data) # Save the page data page_file = open("page-%s.json" % page,"w") page_file.write(json.dumps(json_data, indent=2)) page_file.close() # Get the info about each image on the page images = json_data["images"] for item in images: file_id = item["short_id"] file_type = item["type"] file_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S")) file_url = "https://twitpic.com/show/full/"+file_id file_name = file_id + "." + file_type # Save the file -
hugs revised this gist
Sep 5, 2014 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,4 +1,4 @@ # Archive your Twitpic photos and metadata # # A cleaned-up fork of Terence Eden's original archiver: # http://shkspr.mobi/blog/2013/08/exporting-twitpic-images-python/ -
hugs revised this gist
Sep 5, 2014 . 1 changed file with 1 addition and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -21,6 +21,7 @@ raw_data = urllib2.urlopen(api + str(page)) json_data = json.load(raw_data) # Save the page data page_file = open('page-%s.json' % page,'w') page_file.write(json.dumps(json_data, indent=2)) page_file.close() -
hugs revised this gist
Sep 5, 2014 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,6 +1,6 @@ # Archive your TwitPic photos and metadata # # A cleaned-up fork of Terence Eden's original archiver: # http://shkspr.mobi/blog/2013/08/exporting-twitpic-images-python/ import urllib -
hugs revised this gist
Sep 5, 2014 . 1 changed file with 3 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,7 @@ # Archive your TwitPic photos and metadata # # A cleaned-up fork of Terence Eden's original exporter: # http://shkspr.mobi/blog/2013/08/exporting-twitpic-images-python/ import urllib import urllib2 -
hugs revised this gist
Sep 5, 2014 . 1 changed file with 0 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -4,8 +4,6 @@ import urllib import urllib2 import json import time import os -
hugs created this gist
Sep 5, 2014 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,42 @@ # Archive your TwitPic photos and metadata # A fork of Terence Eden's http://shkspr.mobi/blog/2013/08/exporting-twitpic-images-python/ import urllib import urllib2 import json import collections import HTMLParser import time import os USERNAME = 'your_username_goes_here' NUMBER_OF_PAGES_TO_DOWNLOAD = 5 # Target Page api = "https://api.twitpic.com/2/users/show.json?username=%s&page=" % USERNAME # Get the data about the target page for page in range(1, NUMBER_OF_PAGES_TO_DOWNLOAD+1): print page raw_data = urllib2.urlopen(api + str(page)) json_data = json.load(raw_data) page_file = open('page-%s.json' % page,'w') page_file.write(json.dumps(json_data, indent=2)) page_file.close() # Get the info about each image on the page images = json_data["images"] for item in images: file_id = item['short_id'] file_type = item["type"] file_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S")) file_url = "http://twitpic.com/show/full/"+file_id file_name = file_id + "." + file_type # Save the file urllib.urlretrieve (file_url, file_name) # Set the file time os.utime(file_name,(file_time, file_time))