azam · August 29, 2015 14:06 · Sep 6, 2014 · Sep 5, 2014 · Sep 5, 2014 · Sep 5, 2014
diff --git a/archive-twitpic-data.py b/archive-twitpic-data.py
@@ -7,29 +7,44 @@
 
 import shutil
 import urllib2
+import socket
 import json
 import time
 import os
 
 USERNAME = "your_username_goes_here"
 TMP_FILE_NAME = "tmpfile"
+MAX_RETRIES = 5
+SLEEP_TIME = 2
+TIMEOUT = 5000
 
 page = 1
 has_more_page = True
-photo_count = -1
-processed_photo_count = 0
 
 # Target Page
 api = "https://api.twitpic.com/2/users/show.json?username=%s&page=" % USERNAME
 
 # Get the data about the target page
 while has_more_page:
   print "Processing page: " + str(page)
-  try:
-    raw_data = urllib2.urlopen(api + str(page))
-  except urllib2.URLError, e:
-    print "Failed retrieving page: " + str(page)
-    break
+
+  has_page_error = True
+  for i in range(MAX_RETRIES):
+    try:
+      raw_data = urllib2.urlopen(api + str(page), timeout=TIMEOUT)
+      has_page_error = False
+      if i > 0:
+        print "Retry successful page: " + str(page)
+      break
+    except urllib2.URLError, e:
+      print "Failed retrieving page: " + str(page)
+      time.sleep(SLEEP_TIME)
+    except socket.timeout:
+      print "Timeout retrieving page: " + str(page)
+      time.sleep(SLEEP_TIME)
+  if has_page_error:
+    has_more_page = False
+    break  
 
   json_data = json.load(raw_data)
 
@@ -41,12 +56,6 @@
   # Get the info about each image on the page
   images = json_data["images"]
 
-  # Update photo count
-  photo_count = int(json_data["photo_count"])
-  processed_photo_count += len(images)
-
-  # Check if there is more page
-  has_more_page = processed_photo_count < photo_count
   page += 1
 
   for item in images:
@@ -57,24 +66,30 @@
     file_name = file_id + "." + file_type
 
     if not os.path.exists(file_name):
-      # Remove temp file if exists
-      try:
-        os.remove(TMP_FILE_NAME)
-      except OSError:
-        pass
+      for i in range(MAX_RETRIES):
+        # Remove temp file if exists
+        try:
+          os.remove(TMP_FILE_NAME)
+        except OSError:
+          pass
+        try:
+          # Save the file to temporary file
+          req = urllib2.urlopen(file_url, timeout=TIMEOUT)
+          with open(TMP_FILE_NAME, "wb") as tmp_file:
+              shutil.copyfileobj(req, tmp_file)
 
-      try:
-        # Save the file to temporary file
-        req = urllib2.urlopen(file_url)
-        with open(TMP_FILE_NAME, "wb") as tmp_file:
-            shutil.copyfileobj(req, tmp_file)
+          # Rename to actual file
+          os.rename(TMP_FILE_NAME, file_name)
 
-        # Rename to actual file
-        os.rename(TMP_FILE_NAME, file_name)
+          # Set the file time
+          os.utime(file_name,(file_time, file_time))
 
-        # Set the file time
-        os.utime(file_name,(file_time, file_time))
-      except urllib2.URLError, e:
-        print "Failed retrieving image ID: " + file_id
-    else:
-      print "Skipped image ID: " + file_id
+          if i > 0:
+            print "Retry successful for image ID: " + file_id
+          break
+        except urllib2.URLError, e:
+          print "Failed retrieving image ID: " + file_id
+          time.sleep(SLEEP_TIME)
+        except socket.timeout:
+          print "Timeout retrieving image ID: " + file_id
+          time.sleep(SLEEP_TIME)
diff --git a/archive-twitpic-data.py b/archive-twitpic-data.py
@@ -5,7 +5,7 @@
 #
 # License: MIT
 
-import urllib
+import shutil
 import urllib2
 import json
 import time
@@ -25,7 +25,12 @@
 # Get the data about the target page
 while has_more_page:
   print "Processing page: " + str(page)
-  raw_data = urllib2.urlopen(api + str(page))
+  try:
+    raw_data = urllib2.urlopen(api + str(page))
+  except urllib2.URLError, e:
+    print "Failed retrieving page: " + str(page)
+    break
+
   json_data = json.load(raw_data)
 
   # Save the page data
@@ -58,13 +63,18 @@
       except OSError:
         pass
 
-      # Save the file to temporary file
-      urllib.urlretrieve (file_url, TMP_FILE_NAME)
-
-      # Rename to actual file
-      os.rename(TMP_FILE_NAME, file_name)
-
-      # Set the file time
-      os.utime(file_name,(file_time, file_time))
+      try:
+        # Save the file to temporary file
+        req = urllib2.urlopen(file_url)
+        with open(TMP_FILE_NAME, "wb") as tmp_file:
+            shutil.copyfileobj(req, tmp_file)
+
+        # Rename to actual file
+        os.rename(TMP_FILE_NAME, file_name)
+
+        # Set the file time
+        os.utime(file_name,(file_time, file_time))
+      except urllib2.URLError, e:
+        print "Failed retrieving image ID: " + file_id
     else:
       print "Skipped image ID: " + file_id
diff --git a/archive-twitpic-data.py b/archive-twitpic-data.py
@@ -12,14 +12,19 @@
 import os
 
 USERNAME = "your_username_goes_here"
-NUMBER_OF_PAGES_TO_DOWNLOAD = 5
+TMP_FILE_NAME = "tmpfile"
+
+page = 1
+has_more_page = True
+photo_count = -1
+processed_photo_count = 0
 
 # Target Page
 api = "https://api.twitpic.com/2/users/show.json?username=%s&page=" % USERNAME
 
 # Get the data about the target page
-for page in range(1, NUMBER_OF_PAGES_TO_DOWNLOAD+1):
-  print page
+while has_more_page:
+  print "Processing page: " + str(page)
   raw_data = urllib2.urlopen(api + str(page))
   json_data = json.load(raw_data)
 
@@ -31,15 +36,35 @@
   # Get the info about each image on the page
   images = json_data["images"]
 
+  # Update photo count
+  photo_count = int(json_data["photo_count"])
+  processed_photo_count += len(images)
+
+  # Check if there is more page
+  has_more_page = processed_photo_count < photo_count
+  page += 1
+
   for item in images:
     file_id = item["short_id"]
     file_type = item["type"]
     file_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S"))
     file_url = "https://twitpic.com/show/full/"+file_id
     file_name = file_id + "." + file_type
 
-    # Save the file
-    urllib.urlretrieve (file_url, file_name)
+    if not os.path.exists(file_name):
+      # Remove temp file if exists
+      try:
+        os.remove(TMP_FILE_NAME)
+      except OSError:
+        pass
 
-    # Set the file time
-    os.utime(file_name,(file_time, file_time))
+      # Save the file to temporary file
+      urllib.urlretrieve (file_url, TMP_FILE_NAME)
+
+      # Rename to actual file
+      os.rename(TMP_FILE_NAME, file_name)
+
+      # Set the file time
+      os.utime(file_name,(file_time, file_time))
+    else:
+      print "Skipped image ID: " + file_id
diff --git a/archive-twitpic-data.py b/archive-twitpic-data.py
@@ -2,6 +2,8 @@
 #
 # A cleaned-up fork of Terence Eden's original archiver:
 # http://shkspr.mobi/blog/2013/08/exporting-twitpic-images-python/
+#
+# License: MIT
 
 import urllib
 import urllib2
@@ -33,7 +35,7 @@
     file_id = item["short_id"]
     file_type = item["type"]
     file_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S"))
-    file_url = "http://twitpic.com/show/full/"+file_id
+    file_url = "https://twitpic.com/show/full/"+file_id
     file_name = file_id + "." + file_type
 
     # Save the file

diff --git a/archive-twitpic-data.py b/archive-twitpic-data.py
@@ -33,7 +33,7 @@
     file_id = item["short_id"]
     file_type = item["type"]
     file_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S"))
-    file_url = "https://twitpic.com/show/full/"+file_id
+    file_url = "http://twitpic.com/show/full/"+file_id
     file_name = file_id + "." + file_type
 
     # Save the file

diff --git a/archive-twitpic-data.py b/archive-twitpic-data.py
@@ -9,7 +9,7 @@
 import time
 import os
 
-USERNAME = 'your_username_goes_here'
+USERNAME = "your_username_goes_here"
 NUMBER_OF_PAGES_TO_DOWNLOAD = 5
 
 # Target Page
@@ -22,18 +22,18 @@
   json_data = json.load(raw_data)
 
   # Save the page data
-  page_file = open('page-%s.json' % page,'w')
+  page_file = open("page-%s.json" % page,"w")
   page_file.write(json.dumps(json_data, indent=2))
   page_file.close()
 
   # Get the info about each image on the page
   images = json_data["images"]
 
   for item in images:
-    file_id = item['short_id']
+    file_id = item["short_id"]
     file_type = item["type"]
     file_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S"))
-    file_url = "http://twitpic.com/show/full/"+file_id
+    file_url = "https://twitpic.com/show/full/"+file_id
     file_name = file_id + "." + file_type
 
     # Save the file

diff --git a/archive-twitpic-data.py b/archive-twitpic-data.py
@@ -1,4 +1,4 @@
-# Archive your TwitPic photos and metadata
+# Archive your Twitpic photos and metadata
 #
 # A cleaned-up fork of Terence Eden's original archiver:
 # http://shkspr.mobi/blog/2013/08/exporting-twitpic-images-python/

diff --git a/archive-twitpic-data.py b/archive-twitpic-data.py
@@ -21,6 +21,7 @@
   raw_data = urllib2.urlopen(api + str(page))
   json_data = json.load(raw_data)
 
+  # Save the page data
   page_file = open('page-%s.json' % page,'w')
   page_file.write(json.dumps(json_data, indent=2))
   page_file.close()

diff --git a/archive-twitpic-data.py b/archive-twitpic-data.py
@@ -1,6 +1,6 @@
 # Archive your TwitPic photos and metadata
 #
-# A cleaned-up fork of Terence Eden's original exporter:
+# A cleaned-up fork of Terence Eden's original archiver:
 # http://shkspr.mobi/blog/2013/08/exporting-twitpic-images-python/
 
 import urllib

diff --git a/archive-twitpic-data.py b/archive-twitpic-data.py
@@ -1,5 +1,7 @@
 # Archive your TwitPic photos and metadata
-# A fork of Terence Eden's http://shkspr.mobi/blog/2013/08/exporting-twitpic-images-python/
+#
+# A cleaned-up fork of Terence Eden's original exporter:
+# http://shkspr.mobi/blog/2013/08/exporting-twitpic-images-python/
 
 import urllib
 import urllib2

diff --git a/archive-twitpic-data.py b/archive-twitpic-data.py
@@ -4,8 +4,6 @@
 import urllib
 import urllib2
 import json
-import collections
-import HTMLParser
 import time
 import os
 

diff --git a/archive-twitpic-data.py b/archive-twitpic-data.py
@@ -0,0 +1,42 @@
+# Archive your TwitPic photos and metadata
+# A fork of Terence Eden's http://shkspr.mobi/blog/2013/08/exporting-twitpic-images-python/
+
+import urllib
+import urllib2
+import json
+import collections
+import HTMLParser
+import time
+import os
+
+USERNAME = 'your_username_goes_here'
+NUMBER_OF_PAGES_TO_DOWNLOAD = 5
+
+# Target Page
+api = "https://api.twitpic.com/2/users/show.json?username=%s&page=" % USERNAME
+
+# Get the data about the target page
+for page in range(1, NUMBER_OF_PAGES_TO_DOWNLOAD+1):
+  print page
+  raw_data = urllib2.urlopen(api + str(page))
+  json_data = json.load(raw_data)
+
+  page_file = open('page-%s.json' % page,'w')
+  page_file.write(json.dumps(json_data, indent=2))
+  page_file.close()
+
+  # Get the info about each image on the page
+  images = json_data["images"]
+
+  for item in images:
+    file_id = item['short_id']
+    file_type = item["type"]
+    file_time = time.mktime(time.strptime(item["timestamp"], "%Y-%m-%d %H:%M:%S"))
+    file_url = "http://twitpic.com/show/full/"+file_id
+    file_name = file_id + "." + file_type
+
+    # Save the file
+    urllib.urlretrieve (file_url, file_name)
+
+    # Set the file time
+    os.utime(file_name,(file_time, file_time))
No results found