Skip to content

Instantly share code, notes, and snippets.

@revox
Last active December 15, 2015 12:01
Show Gist options
  • Select an option

  • Save revox/15455afb530211b1f37f to your computer and use it in GitHub Desktop.

Select an option

Save revox/15455afb530211b1f37f to your computer and use it in GitHub Desktop.

Revisions

  1. revox renamed this gist Dec 15, 2015. 1 changed file with 4 additions and 6 deletions.
    10 changes: 4 additions & 6 deletions twitter_suer_timeline.py → twitter_user_timeline.py
    Original file line number Diff line number Diff line change
    @@ -1,6 +1,5 @@
    # dont keep this script in public_html!!!!
    import twitter
    import sys,json,csv, time
    import twitter, sys,json,csv, time

    def twitter_user_timeline(twitter_api, q):
    ''' get last tweet ID so we can paginate down from that 200 at a time to 3,200
    @@ -16,9 +15,8 @@ def twitter_user_timeline(twitter_api, q):
    statuses += user_timeline
    # time.sleep(300) ## 5 minute rest between api calls, uncomment this if your being limited
    for tweet in user_timeline:
    # print tweet['text'] ## print the tweet
    ids.append(tweet['id']) ## append tweet id's
    print tweet['id']
    ids.append(tweet['id']) ## append those tweet id's
    print tweet['id'] # I like to watch
    return statuses

    ''' helper functions, clean data, unpack dictionaries '''
    @@ -60,7 +58,7 @@ def getPlace(val):
    twitter_api.retry = True # should prevent rate limit errors but sleep anyway as this only sleeps for 30 seconds

    # Sample usage
    q = "NordfrontSE"
    q = "David_Cameron"
    results = twitter_user_timeline(twitter_api, q)
    print len(results)
    # Show one sample search result by slicing the list...
  2. revox created this gist Dec 15, 2015.
    98 changes: 98 additions & 0 deletions twitter_suer_timeline.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,98 @@
    # dont keep this script in public_html!!!!
    import twitter
    import sys,json,csv, time

    def twitter_user_timeline(twitter_api, q):
    ''' get last tweet ID so we can paginate down from that 200 at a time to 3,200
    '''
    user_timeline = twitter_api.statuses.user_timeline(screen_name=q,count=1)
    print user_timeline[0]['id']
    ids = [user_timeline[0]['id']]
    statuses = []
    for i in range(0, 16): ## iterate through all tweets available with thsi API = 3,200
    ## tweet extract method with the last list item as the max_id
    user_timeline = twitter_api.statuses.user_timeline(screen_name=q,
    count=200, include_retweets=False, max_id=ids[-1]) # note a negative index means counting from end not the start of the array
    statuses += user_timeline
    # time.sleep(300) ## 5 minute rest between api calls, uncomment this if your being limited
    for tweet in user_timeline:
    # print tweet['text'] ## print the tweet
    ids.append(tweet['id']) ## append tweet id's
    print tweet['id']
    return statuses

    ''' helper functions, clean data, unpack dictionaries '''
    def getVal(val):
    clean = ""
    if isinstance(val, bool):
    return val
    if isinstance(val, int):
    return val
    if val:
    clean = val.encode('utf-8')
    return clean

    def getLng(val):
    if isinstance(val, dict):
    return val['coordinates'][0]

    def getLat(val):
    if isinstance(val, dict):
    return val['coordinates'][1]

    def getPlace(val):
    if isinstance(val, dict):
    return val['full_name'].encode('utf-8')

    # == OAuth Authentication ==
    # The consumer keys can be found on your application's Details
    consumer_key=""
    consumer_secret=""

    # Create an access token under the the "Your access token" section
    access_token=""
    access_token_secret=""
    auth = twitter.oauth.OAuth(access_token,
    access_token_secret,
    consumer_key,
    consumer_secret)
    twitter_api = twitter.Twitter(auth=auth)
    twitter_api.retry = True # should prevent rate limit errors but sleep anyway as this only sleeps for 30 seconds

    # Sample usage
    q = "NordfrontSE"
    results = twitter_user_timeline(twitter_api, q)
    print len(results)
    # Show one sample search result by slicing the list...
    # print json.dumps(results[0], indent=1)
    csvfile = open(q + '_timeline.csv', 'w')
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(['created_at',
    'user-screen_name',
    'text',
    'coordinates lng',
    'coordinates lat',
    'place',
    'user-location',
    'user-geo_enabled',
    'user-lang',
    'user-time_zone',
    'user-statuses_count',
    'user-followers_count',
    'user-created_at'])
    for tweet in results:
    csvwriter.writerow([tweet['created_at'],
    getVal(tweet['user']['screen_name']),
    getVal(tweet['text']),
    getLng(tweet['coordinates']),
    getLat(tweet['coordinates']),
    getPlace(tweet['place']),
    getVal(tweet['user']['location']),
    getVal(tweet['user']['geo_enabled']),
    getVal(tweet['user']['lang']),
    getVal(tweet['user']['time_zone']),
    getVal(tweet['user']['statuses_count']),
    getVal(tweet['user']['followers_count']),
    getVal(tweet['user']['created_at'])
    ])
    print "done"