from TwitterAPI import TwitterAPI from TwitterAPI import TwitterRestPager from natsort import versorted import json, sys, glob, os def main(args): if len(args) < 2: print "usage: python tweets.py #hashtag" print "" sys.exit(1) hashtag = args[1] lastfilenum = lastFile(hashtag) lastfile = hashtag + "-tweets." + str(lastfilenum) + ".json" tID = 0 if lastfilenum == 0: currentfile = lastfile else: tID = mostRecentTweetID(lastfile) currentfile = hashtag + "-tweets."+ str(int(lastfilenum) + 1) + ".json" getTweets(currentfile, tID, hashtag) def lastFile(hashtag): files = versorted(glob.glob(hashtag + "-tweets.*.json")) if len(files) == 0: return 0 lastfilenum = files[len(files)-1].split('.')[1] return lastfilenum def mostRecentTweetID(lastfile): filename = "tweets." + str(lastfile) file = open(lastfile, 'r') line = file.readline() data = json.loads(line) file.close() id = data['id'] return id def getTweets(currentfile, tID, hashtag): CONSUMER_KEY = 'YOURCONSUMERKEY' CONSUMER_SECRET = 'YOURCONSUMERSECRET' ACCESS_TOKEN_KEY = 'YOURACCESSTOKENKEY' ACCESS_TOKEN_SECRET = 'YOURACCESSTOKENSECRET' api = TwitterAPI( CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET) r = TwitterRestPager(api, 'search/tweets', {'q':hashtag, 'count':100}) msgs = 1 file = open(currentfile, 'w') for item in r.get_iterator(): if 'text' in item: if item['id'] <= tID: break file.write(json.dumps(item)) file.write("\n") sys.stdout.write('\r' + str(msgs) + " " + str(item['created_at'])) sys.stdout.flush() msgs = msgs + 1 elif 'message' in item and item['code'] == 88: print 'SUSPEND, RATE LIMIT EXCEEDED: %s\n' % item['message'] break if file.tell() == 0: os.remove(currentfile) else: file.close() if __name__ == "__main__": sys.exit(not main(sys.argv))