-
-
Save brenorb/1ec2afb8d66af850acc294309b9e49ea to your computer and use it in GitHub Desktop.
| #!/usr/bin/env python | |
| # encoding: utf-8 | |
| import tweepy #https://github.com/tweepy/tweepy | |
| import csv | |
| #Twitter API credentials | |
| consumer_key = "" | |
| consumer_secret = "" | |
| access_key = "" | |
| access_secret = "" | |
| def get_all_tweets(screen_name): | |
| #Twitter only allows access to a users most recent 3240 tweets with this method | |
| #authorize twitter, initialize tweepy | |
| auth = tweepy.OAuthHandler(consumer_key, consumer_secret) | |
| auth.set_access_token(access_key, access_secret) | |
| api = tweepy.API(auth) | |
| #initialize a list to hold all the tweepy Tweets | |
| alltweets = [] | |
| #make initial request for most recent tweets (200 is the maximum allowed count) | |
| new_tweets = api.user_timeline(screen_name = screen_name,count=200) | |
| #save most recent tweets | |
| alltweets.extend(new_tweets) | |
| #save the id of the oldest tweet less one | |
| oldest = alltweets[-1].id - 1 | |
| #keep grabbing tweets until there are no tweets left to grab | |
| while len(new_tweets) > 0: | |
| print("getting tweets before {}".format(oldest)) | |
| #all subsiquent requests use the max_id param to prevent duplicates | |
| new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest) | |
| #save most recent tweets | |
| alltweets.extend(new_tweets) | |
| #update the id of the oldest tweet less one | |
| oldest = alltweets[-1].id - 1 | |
| print("...{} tweets downloaded so far".format(len(alltweets))) | |
| #transform the tweepy tweets into a 2D array that will populate the csv | |
| outtweets = [[tweet.id_str, tweet.created_at, tweet.text.encode("utf-8")] for tweet in alltweets] | |
| #write the csv | |
| with open('{}_tweets.csv'.format(screen_name), 'w') as f: | |
| writer = csv.writer(f) | |
| writer.writerow(["id","created_at","text"]) | |
| writer.writerows(outtweets) | |
| print('{}_tweets.csv was successfully created.'.format(screen_name)) | |
| pass | |
| if __name__ == '__main__': | |
| #pass in the username of the account you want to download | |
| get_all_tweets("J_tsar") |
Amazing. How do I remove "b'" from showing up before every tweet?
Can I use this to get the URLs of tweets ?
Amazing. How do I remove "b'" from showing up before every tweet?
try this: https://www.kaggle.com/code/asimislam/text-mining-wordcloud-from-tweets-ripharambe
Don't you need elevated access to do this stuff?
Not at the time I wrote it. It uses only web scraping, not an API. There's a long time I don't use it, so things might be a little different right now.
Amazing. How do I remove "b'" from showing up before every tweet?
This means it's byte type. I believe that using a .decode() method like below:
name_byte = b'Alice'
name_str = name_byte.decode()
print(name_str)
> Alice
Can I use this to get the URLs of tweets ?
There's a long time I don't use it, but if I remember well, if the tweet link is https://twitter.com/Twitter/status/1577730467436138524, the tweet id is the long number at the end: 1577730467436138524. So you just get 'https://twitter.com/'+ screen_name +'/status/' + id and it will work.
Don't you need elevated access to do this stuff?