-
Star
(105)
You must be signed in to star a gist -
Fork
(72)
You must be signed in to fork a gist
-
-
Save bonzanini/af0463b927433c73784d to your computer and use it in GitHub Desktop.
| consumer_key = 'your-consumer-key' | |
| consumer_secret = 'your-consumer-secret' | |
| access_token = 'your-access-token' | |
| access_secret = 'your-access-secret' |
| # To run this code, first edit config.py with your configuration, then: | |
| # | |
| # mkdir data | |
| # python twitter_stream_download.py -q apple -d data | |
| # | |
| # It will produce the list of tweets for the query "apple" | |
| # in the file data/stream_apple.json | |
| import tweepy | |
| from tweepy import Stream | |
| from tweepy import OAuthHandler | |
| from tweepy.streaming import StreamListener | |
| import time | |
| import argparse | |
| import string | |
| import config | |
| import json | |
| def get_parser(): | |
| """Get parser for command line arguments.""" | |
| parser = argparse.ArgumentParser(description="Twitter Downloader") | |
| parser.add_argument("-q", | |
| "--query", | |
| dest="query", | |
| help="Query/Filter", | |
| default='-') | |
| parser.add_argument("-d", | |
| "--data-dir", | |
| dest="data_dir", | |
| help="Output/Data Directory") | |
| return parser | |
| class MyListener(StreamListener): | |
| """Custom StreamListener for streaming data.""" | |
| def __init__(self, data_dir, query): | |
| query_fname = format_filename(query) | |
| self.outfile = "%s/stream_%s.json" % (data_dir, query_fname) | |
| def on_data(self, data): | |
| try: | |
| with open(self.outfile, 'a') as f: | |
| f.write(data) | |
| print(data) | |
| return True | |
| except BaseException as e: | |
| print("Error on_data: %s" % str(e)) | |
| time.sleep(5) | |
| return True | |
| def on_error(self, status): | |
| print(status) | |
| return True | |
| def format_filename(fname): | |
| """Convert file name into a safe string. | |
| Arguments: | |
| fname -- the file name to convert | |
| Return: | |
| String -- converted file name | |
| """ | |
| return ''.join(convert_valid(one_char) for one_char in fname) | |
| def convert_valid(one_char): | |
| """Convert a character into '_' if invalid. | |
| Arguments: | |
| one_char -- the char to convert | |
| Return: | |
| Character -- converted char | |
| """ | |
| valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits) | |
| if one_char in valid_chars: | |
| return one_char | |
| else: | |
| return '_' | |
| @classmethod | |
| def parse(cls, api, raw): | |
| status = cls.first_parse(api, raw) | |
| setattr(status, 'json', json.dumps(raw)) | |
| return status | |
| if __name__ == '__main__': | |
| parser = get_parser() | |
| args = parser.parse_args() | |
| auth = OAuthHandler(config.consumer_key, config.consumer_secret) | |
| auth.set_access_token(config.access_token, config.access_secret) | |
| api = tweepy.API(auth) | |
| twitter_stream = Stream(auth, MyListener(args.data_dir, args.query)) | |
| twitter_stream.filter(track=[args.query]) |
I'm using Python 3.7.0 and downloaded Tweepy 3.6.0
And after running config.py (which ends successfully) and doing the mkdir data step. I get the following error when running the twitter_stream_download.py
**C:\Users\pbajp\Git\datasci_course_materials\assignment1\alternate>python twitter_stream_download.py -q apple -d data
Traceback (most recent call last):
File "twitter_stream_download.py", line 9, in
import tweepy
File "C:\Users\pbajp\AppData\Local\Programs\Python\Python37\lib\site-packages\tweepy_init.py", line 17, in
from tweepy.streaming import Stream, StreamListener
File "C:\Users\pbajp\AppData\Local\Programs\Python\Python37\lib\site-packages\tweepy\streaming.py", line 358
def start(self, async):
^
SyntaxError: invalid syntax**
Can anyone guide me on next steps to debug?
@rsathishr
It is "write" not "Write"
Hello everyone.
First of all, thank you for your work @bonzanini !
I'm trying to search for tweets from two weeks ago until now. Can I transform your code to do that ?
Work fine. Thank you for your work @bonzanini
For those who are facing the following error:
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
You have to create a folder named "data" in the same directory, for the code to work. Hope this helps.
maybe I am totally missing something, but it sure seems to me that the script is totally functional without import json or the @classmethod
For those who are facing the following error:
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'You have to create a folder named "data" in the same directory, for the code to work. Hope this helps.
@arnabghose997. I still face the same problem after creating folder 'data' in the same directory
Any idea how to resolve this error please
runfile('C:/Users/chhaj/OneDrive/Desktop/test4 tweet search.py', wdir='C:/Users/chhaj/OneDrive/Desktop')
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Traceback (most recent call last):
File "", line 1, in
runfile('C:/Users/chhaj/OneDrive/Desktop/test4 tweet search.py', wdir='C:/Users/chhaj/OneDrive/Desktop')
File "C:\Users\chhaj\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)
File "C:\Users\chhaj\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/chhaj/OneDrive/Desktop/test4 tweet search.py", line 95, in
twitter_stream.filter(track=[args.query])
File "C:\Users\chhaj\Anaconda3\lib\site-packages\tweepy\streaming.py", line 453, in filter
self._start(is_async)
File "C:\Users\chhaj\Anaconda3\lib\site-packages\tweepy\streaming.py", line 368, in _start
self._run()
File "C:\Users\chhaj\Anaconda3\lib\site-packages\tweepy\streaming.py", line 269, in _run
self._read_loop(resp)
File "C:\Users\chhaj\Anaconda3\lib\site-packages\tweepy\streaming.py", line 331, in _read_loop
self._data(next_status_obj)
File "C:\Users\chhaj\Anaconda3\lib\site-packages\tweepy\streaming.py", line 303, in _data
if self.listener.on_data(data) is False:
File "C:/Users/chhaj/OneDrive/Desktop/test4 tweet search.py", line 50, in on_data
time.sleep(5)
KeyboardInterrupt
worked just fine from cmd python 3.8.5 just needed to create a data sub-folder within the assignment
@pbajpai2 i dont know if you've fixed that one yet. If you use different IDE/Interpreter when try to edit the two files, it might be the problem. In my case, i used Anaconda so i had to use the Ana Prompt to run it properly.
@markgillis0 unfortunately exact phrase matching is not supported by the twitter streaming API yet: https://dev.twitter.com/streaming/overview/request-parameters#track on the other side, it is supported by the search API
I was thinking something very similar to this original comment about multiple character searches. What's the probability that this is a feature used elsewhere? If high probabilty, how would you begin to build it out? ballpark estimates.
@SjorsG
Are you sure you have a file called "config.py" in the same folder, that has a variable in it that's called "consumer_key", that has your key assigned to it?
consumer_key = 'YOURCONSUMERKEYHERE'