# Archive your Twitpic photos and metadata # # A cleaned-up fork of Terence Eden's original archiver: # http://shkspr.mobi/blog/2013/08/exporting-twitpic-images-python/ # # License: MIT import shutil import urllib2 import socket import json import time import os USERNAME = "your_username_goes_here" TMP_FILE_NAME = "tmpfile" MAX_RETRIES = 5 SLEEP_TIME = 2 TIMEOUT = 5000 page = 1 has_more_page = True # Target Page api = "https://api.twitpic.com/2/users/show.json?username=%s&page=" % USERNAME # Get the data about the target page while has_more_page: print "Processing page: " + str(page) has_page_error = True for i in range(MAX_RETRIES): try: raw_data = urllib2.urlopen(api + str(page), timeout=TIMEOUT) has_page_error = False if i > 0: print "Retry successful page: " + str(page) break except urllib2.URLError, e: print "Failed retrieving page: " + str(page) time.sleep(SLEEP_TIME) except socket.timeout: print "Timeout retrieving page: " + str(page) time.sleep(SLEEP_TIME) if has_page_error: has_more_page = False break json_data = json.load(raw_data) # Save the page data page_file = open("page-%s.json" % page,"w") page_file.write(json.dumps(json_data, indent=2)) page_file.close() # Get the info about each image on the page images = json_data["images"] page += 1 for item in images: file_id = item["short_id"] file_type = item["type"] file_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S")) file_url = "https://twitpic.com/show/full/"+file_id file_name = file_id + "." + file_type if not os.path.exists(file_name): for i in range(MAX_RETRIES): # Remove temp file if exists try: os.remove(TMP_FILE_NAME) except OSError: pass try: # Save the file to temporary file req = urllib2.urlopen(file_url, timeout=TIMEOUT) with open(TMP_FILE_NAME, "wb") as tmp_file: shutil.copyfileobj(req, tmp_file) # Rename to actual file os.rename(TMP_FILE_NAME, file_name) # Set the file time os.utime(file_name,(file_time, file_time)) if i > 0: print "Retry successful for image ID: " + file_id break except urllib2.URLError, e: print "Failed retrieving image ID: " + file_id time.sleep(SLEEP_TIME) except socket.timeout: print "Timeout retrieving image ID: " + file_id time.sleep(SLEEP_TIME)