# dont keep this script in public_html!!!! import twitter, sys,json,csv, time def twitter_user_timeline(twitter_api, q): ''' get last tweet ID so we can paginate down from that 200 at a time to 3,200 ''' user_timeline = twitter_api.statuses.user_timeline(screen_name=q,count=1) print user_timeline[0]['id'] ids = [user_timeline[0]['id']] statuses = [] for i in range(0, 16): ## iterate through all tweets available with thsi API = 3,200 ## tweet extract method with the last list item as the max_id user_timeline = twitter_api.statuses.user_timeline(screen_name=q, count=200, include_retweets=False, max_id=ids[-1]) # note a negative index means counting from end not the start of the array statuses += user_timeline # time.sleep(300) ## 5 minute rest between api calls, uncomment this if your being limited for tweet in user_timeline: ids.append(tweet['id']) ## append those tweet id's print tweet['id'] # I like to watch return statuses ''' helper functions, clean data, unpack dictionaries ''' def getVal(val): clean = "" if isinstance(val, bool): return val if isinstance(val, int): return val if val: clean = val.encode('utf-8') return clean def getLng(val): if isinstance(val, dict): return val['coordinates'][0] def getLat(val): if isinstance(val, dict): return val['coordinates'][1] def getPlace(val): if isinstance(val, dict): return val['full_name'].encode('utf-8') # == OAuth Authentication == # The consumer keys can be found on your application's Details consumer_key="" consumer_secret="" # Create an access token under the the "Your access token" section access_token="" access_token_secret="" auth = twitter.oauth.OAuth(access_token, access_token_secret, consumer_key, consumer_secret) twitter_api = twitter.Twitter(auth=auth) twitter_api.retry = True # should prevent rate limit errors but sleep anyway as this only sleeps for 30 seconds # Sample usage q = "David_Cameron" results = twitter_user_timeline(twitter_api, q) print len(results) # Show one sample search result by slicing the list... # print json.dumps(results[0], indent=1) csvfile = open(q + '_timeline.csv', 'w') csvwriter = csv.writer(csvfile) csvwriter.writerow(['created_at', 'user-screen_name', 'text', 'coordinates lng', 'coordinates lat', 'place', 'user-location', 'user-geo_enabled', 'user-lang', 'user-time_zone', 'user-statuses_count', 'user-followers_count', 'user-created_at']) for tweet in results: csvwriter.writerow([tweet['created_at'], getVal(tweet['user']['screen_name']), getVal(tweet['text']), getLng(tweet['coordinates']), getLat(tweet['coordinates']), getPlace(tweet['place']), getVal(tweet['user']['location']), getVal(tweet['user']['geo_enabled']), getVal(tweet['user']['lang']), getVal(tweet['user']['time_zone']), getVal(tweet['user']['statuses_count']), getVal(tweet['user']['followers_count']), getVal(tweet['user']['created_at']) ]) print "done"