Last active
December 15, 2015 12:01
-
-
Save revox/15455afb530211b1f37f to your computer and use it in GitHub Desktop.
Revisions
-
revox renamed this gist
Dec 15, 2015 . 1 changed file with 4 additions and 6 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,6 +1,5 @@ # dont keep this script in public_html!!!! import twitter, sys,json,csv, time def twitter_user_timeline(twitter_api, q): ''' get last tweet ID so we can paginate down from that 200 at a time to 3,200 @@ -16,9 +15,8 @@ def twitter_user_timeline(twitter_api, q): statuses += user_timeline # time.sleep(300) ## 5 minute rest between api calls, uncomment this if your being limited for tweet in user_timeline: ids.append(tweet['id']) ## append those tweet id's print tweet['id'] # I like to watch return statuses ''' helper functions, clean data, unpack dictionaries ''' @@ -60,7 +58,7 @@ def getPlace(val): twitter_api.retry = True # should prevent rate limit errors but sleep anyway as this only sleeps for 30 seconds # Sample usage q = "David_Cameron" results = twitter_user_timeline(twitter_api, q) print len(results) # Show one sample search result by slicing the list... -
revox created this gist
Dec 15, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,98 @@ # dont keep this script in public_html!!!! import twitter import sys,json,csv, time def twitter_user_timeline(twitter_api, q): ''' get last tweet ID so we can paginate down from that 200 at a time to 3,200 ''' user_timeline = twitter_api.statuses.user_timeline(screen_name=q,count=1) print user_timeline[0]['id'] ids = [user_timeline[0]['id']] statuses = [] for i in range(0, 16): ## iterate through all tweets available with thsi API = 3,200 ## tweet extract method with the last list item as the max_id user_timeline = twitter_api.statuses.user_timeline(screen_name=q, count=200, include_retweets=False, max_id=ids[-1]) # note a negative index means counting from end not the start of the array statuses += user_timeline # time.sleep(300) ## 5 minute rest between api calls, uncomment this if your being limited for tweet in user_timeline: # print tweet['text'] ## print the tweet ids.append(tweet['id']) ## append tweet id's print tweet['id'] return statuses ''' helper functions, clean data, unpack dictionaries ''' def getVal(val): clean = "" if isinstance(val, bool): return val if isinstance(val, int): return val if val: clean = val.encode('utf-8') return clean def getLng(val): if isinstance(val, dict): return val['coordinates'][0] def getLat(val): if isinstance(val, dict): return val['coordinates'][1] def getPlace(val): if isinstance(val, dict): return val['full_name'].encode('utf-8') # == OAuth Authentication == # The consumer keys can be found on your application's Details consumer_key="" consumer_secret="" # Create an access token under the the "Your access token" section access_token="" access_token_secret="" auth = twitter.oauth.OAuth(access_token, access_token_secret, consumer_key, consumer_secret) twitter_api = twitter.Twitter(auth=auth) twitter_api.retry = True # should prevent rate limit errors but sleep anyway as this only sleeps for 30 seconds # Sample usage q = "NordfrontSE" results = twitter_user_timeline(twitter_api, q) print len(results) # Show one sample search result by slicing the list... # print json.dumps(results[0], indent=1) csvfile = open(q + '_timeline.csv', 'w') csvwriter = csv.writer(csvfile) csvwriter.writerow(['created_at', 'user-screen_name', 'text', 'coordinates lng', 'coordinates lat', 'place', 'user-location', 'user-geo_enabled', 'user-lang', 'user-time_zone', 'user-statuses_count', 'user-followers_count', 'user-created_at']) for tweet in results: csvwriter.writerow([tweet['created_at'], getVal(tweet['user']['screen_name']), getVal(tweet['text']), getLng(tweet['coordinates']), getLat(tweet['coordinates']), getPlace(tweet['place']), getVal(tweet['user']['location']), getVal(tweet['user']['geo_enabled']), getVal(tweet['user']['lang']), getVal(tweet['user']['time_zone']), getVal(tweet['user']['statuses_count']), getVal(tweet['user']['followers_count']), getVal(tweet['user']['created_at']) ]) print "done"