andrecurvello · November 30, 2015 03:22 · Oct 17, 2014 · Oct 17, 2014
diff --git a/tweets.py b/tweets.py
@@ -4,24 +4,24 @@
 import json, sys, glob, os
 
 
-
 def main(args):
 	if len(args) < 2:
 		print "usage: python tweets.py #hashtag"
+		print ""
 		sys.exit(1)
 	hashtag = args[1]
-	lastfilenum = lastFile()
-	lastfile = "tweets." + str(lastfilenum) + ".json"
+	lastfilenum = lastFile(hashtag)
+	lastfile = hashtag + "-tweets." + str(lastfilenum) + ".json"
 	tID = 0
 	if lastfilenum == 0:
 		currentfile = lastfile
 	else:
 		tID = mostRecentTweetID(lastfile)
-		currentfile = "tweets."+ str(int(lastfilenum) + 1) + ".json"
+		currentfile = hashtag + "-tweets."+ str(int(lastfilenum) + 1) + ".json"
 	getTweets(currentfile, tID, hashtag)
 
-def lastFile():
-	files = versorted(glob.glob("tweets.*.json"))
+def lastFile(hashtag):
+	files = versorted(glob.glob(hashtag + "-tweets.*.json"))
 	if len(files) == 0:
 		return 0
 	lastfilenum = files[len(files)-1].split('.')[1]
@@ -47,9 +47,9 @@ def getTweets(currentfile, tID, hashtag):
 		CONSUMER_SECRET,
 		ACCESS_TOKEN_KEY,
 		ACCESS_TOKEN_SECRET)
-
-	r = TwitterRestPager(api, 'search/tweets', {'q':hashtag, 'count':100})
 
+	r = TwitterRestPager(api, 'search/tweets', {'q':hashtag, 'count':100})
+	msgs = 1
 	file = open(currentfile, 'w')
 	for item in r.get_iterator():
 		if 'text' in item:
@@ -59,6 +59,7 @@ def getTweets(currentfile, tID, hashtag):
 			file.write("\n")
 			sys.stdout.write('\r' + str(msgs) + " " + str(item['created_at']))
 			sys.stdout.flush()
+			msgs = msgs + 1
 		elif 'message' in item and item['code'] == 88:
 			print 'SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message']
 			break

diff --git a/tweets.py b/tweets.py
@@ -0,0 +1,72 @@
+from TwitterAPI import TwitterAPI
+from TwitterAPI import TwitterRestPager
+from natsort import versorted
+import json, sys, glob, os
+
+
+
+def main(args):
+	if len(args) < 2:
+		print "usage: python tweets.py #hashtag"
+		sys.exit(1)
+	hashtag = args[1]
+	lastfilenum = lastFile()
+	lastfile = "tweets." + str(lastfilenum) + ".json"
+	tID = 0
+	if lastfilenum == 0:
+		currentfile = lastfile
+	else:
+		tID = mostRecentTweetID(lastfile)
+		currentfile = "tweets."+ str(int(lastfilenum) + 1) + ".json"
+	getTweets(currentfile, tID, hashtag)
+
+def lastFile():
+	files = versorted(glob.glob("tweets.*.json"))
+	if len(files) == 0:
+		return 0
+	lastfilenum = files[len(files)-1].split('.')[1]
+	return lastfilenum
+
+def mostRecentTweetID(lastfile):
+	filename = "tweets." + str(lastfile)
+	file = open(lastfile, 'r')
+	line = file.readline()
+	data = json.loads(line)
+	file.close()
+	id = data['id']
+	return id
+
+def getTweets(currentfile, tID, hashtag):
+	CONSUMER_KEY = 'YOURCONSUMERKEY'
+	CONSUMER_SECRET = 'YOURCONSUMERSECRET'
+	ACCESS_TOKEN_KEY = 'YOURACCESSTOKENKEY'
+	ACCESS_TOKEN_SECRET = 'YOURACCESSTOKENSECRET'
+
+	api = TwitterAPI(
+		CONSUMER_KEY,
+		CONSUMER_SECRET,
+		ACCESS_TOKEN_KEY,
+		ACCESS_TOKEN_SECRET)
+
+	r = TwitterRestPager(api, 'search/tweets', {'q':hashtag, 'count':100})
+
+	file = open(currentfile, 'w')
+	for item in r.get_iterator():
+		if 'text' in item:
+			if item['id'] <= tID:
+				break
+			file.write(json.dumps(item))
+			file.write("\n")
+			sys.stdout.write('\r' + str(msgs) + " " + str(item['created_at']))
+			sys.stdout.flush()
+		elif 'message' in item and item['code'] == 88:
+			print 'SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message']
+			break
+
+	if file.tell() == 0:
+		os.remove(currentfile)
+	else:
+		file.close()
+
+if __name__ == "__main__":
+	sys.exit(not main(sys.argv))