Skip to content

Instantly share code, notes, and snippets.

@azam
Forked from hugs/archive-twitpic-data.py
Last active August 29, 2015 14:06
Show Gist options
  • Select an option

  • Save azam/4de9426ff3beb379ae9d to your computer and use it in GitHub Desktop.

Select an option

Save azam/4de9426ff3beb379ae9d to your computer and use it in GitHub Desktop.
Twitpic Image and Data Archiver. Added exception handling, timeout, retry to page info and image file download
# Archive your Twitpic photos and metadata
#
# A cleaned-up fork of Terence Eden's original archiver:
# http://shkspr.mobi/blog/2013/08/exporting-twitpic-images-python/
#
# License: MIT
import shutil
import urllib2
import json
import time
import os
USERNAME = "your_username_goes_here"
TMP_FILE_NAME = "tmpfile"
page = 1
has_more_page = True
photo_count = -1
processed_photo_count = 0
# Target Page
api = "https://api.twitpic.com/2/users/show.json?username=%s&page=" % USERNAME
# Get the data about the target page
while has_more_page:
print "Processing page: " + str(page)
try:
raw_data = urllib2.urlopen(api + str(page))
except urllib2.URLError, e:
print "Failed retrieving page: " + str(page)
break
json_data = json.load(raw_data)
# Save the page data
page_file = open("page-%s.json" % page,"w")
page_file.write(json.dumps(json_data, indent=2))
page_file.close()
# Get the info about each image on the page
images = json_data["images"]
# Update photo count
photo_count = int(json_data["photo_count"])
processed_photo_count += len(images)
# Check if there is more page
has_more_page = processed_photo_count < photo_count
page += 1
for item in images:
file_id = item["short_id"]
file_type = item["type"]
file_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S"))
file_url = "https://twitpic.com/show/full/"+file_id
file_name = file_id + "." + file_type
if not os.path.exists(file_name):
# Remove temp file if exists
try:
os.remove(TMP_FILE_NAME)
except OSError:
pass
try:
# Save the file to temporary file
req = urllib2.urlopen(file_url)
with open(TMP_FILE_NAME, "wb") as tmp_file:
shutil.copyfileobj(req, tmp_file)
# Rename to actual file
os.rename(TMP_FILE_NAME, file_name)
# Set the file time
os.utime(file_name,(file_time, file_time))
except urllib2.URLError, e:
print "Failed retrieving image ID: " + file_id
else:
print "Skipped image ID: " + file_id
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment