Skip to content

Instantly share code, notes, and snippets.

@michaelort33
Forked from vickyqian/twitter crawler.txt
Last active April 9, 2019 19:34
Show Gist options
  • Save michaelort33/13982ee6491acb6cb1f7eee35f765933 to your computer and use it in GitHub Desktop.
Save michaelort33/13982ee6491acb6cb1f7eee35f765933 to your computer and use it in GitHub Desktop.

Revisions

  1. Michael Ort revised this gist Apr 9, 2019. No changes.
  2. Michael Ort revised this gist Apr 9, 2019. No changes.
  3. Michael Ort revised this gist Apr 9, 2019. 4 changed files with 0 additions and 1461695 deletions.
    Binary file modified all.pkl
    Binary file not shown.
    1,456,096 changes: 0 additions & 1,456,096 deletions all_tweet_history.csv
    0 additions, 1,456,096 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
    5,599 changes: 0 additions & 5,599 deletions personalities_3_words.csv
    0 additions, 5,599 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
    Binary file modified personalities_3_words.pkl
    Binary file not shown.
  4. @FredPintFed FredPintFed revised this gist Feb 6, 2019. 1 changed file with 0 additions and 0 deletions.
    Binary file added personalities_3_words.pkl
    Binary file not shown.
  5. @FredPintFed FredPintFed revised this gist Feb 6, 2019. 3 changed files with 0 additions and 0 deletions.
    Binary file removed 6.pkl
    Binary file not shown.
    File renamed without changes.
    File renamed without changes.
  6. @FredPintFed FredPintFed revised this gist Feb 6, 2019. 3 changed files with 1456104 additions and 1 deletion.
    9 changes: 8 additions & 1 deletion TweetScrape.py
    Original file line number Diff line number Diff line change
    @@ -12,6 +12,10 @@
    import time
    import numpy as np

    #import os
    #os.chdir('/home/michael/Documents/DeeperSignals')


    ####input your credentials here
    consumer_key = 'kmflAhDlFLYvVLzUhUTv5Agbw'
    consumer_secret = 'D8Giq3GMjIMaBQN0Qw63tsoHpajA7hNKy2ouo1XeqQP46SP38C'
    @@ -39,7 +43,7 @@
    chunks = int(np.floor(len(all_names)/500))-1
    chunk_size=500
    idx = 0
    for i in range(7,chunks):
    for i in range(chunks):
    some_names = all_names.loc[i*chunk_size:(i+1)*500-1,]
    for name in tqdm(some_names):
    status_cursor = tweepy.Cursor(api.user_timeline, screen_name=name, count=200,tweet_mode='extended')
    @@ -52,3 +56,6 @@

    all_history = pd.DataFrame(past_tweets).transpose()
    all_history.to_pickle('all.pkl')



    1,456,096 changes: 1,456,096 additions & 0 deletions all.csv
    1,456,096 additions, 0 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
    Binary file modified all.pkl
    Binary file not shown.
  7. @FredPintFed FredPintFed revised this gist Feb 6, 2019. 5 changed files with 5639 additions and 16 deletions.
    1 change: 1 addition & 0 deletions .~lock.tweets.csv#
    Original file line number Diff line number Diff line change
    @@ -0,0 +1 @@
    ,michael,michaelxps,04.02.2019 14:14,file:///home/michael/.config/libreoffice/4;
    Binary file added 6.pkl
    Binary file not shown.
    55 changes: 39 additions & 16 deletions TweetScrape.py
    Original file line number Diff line number Diff line change
    @@ -7,25 +7,48 @@
    """

    import tweepy
    import csv
    import pandas as pd
    from tqdm import tqdm
    import time
    import numpy as np

    ####input your credentials here
    consumer_key = ''
    consumer_secret = ''
    access_token = ''
    access_token_secret = ''
    consumer_key = 'kmflAhDlFLYvVLzUhUTv5Agbw'
    consumer_secret = 'D8Giq3GMjIMaBQN0Qw63tsoHpajA7hNKy2ouo1XeqQP46SP38C'
    access_token = '4266439228-U0ySetwuTNEtz3ZGdyPWKVrfIGKY866EMIrzNbN'
    access_token_secret = 'CVynCeCX22NtK8iygTHGWjota2JhGTfPgk2CO1MUnPfBK'

    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth,wait_on_rate_limit=True)
    #####United Airlines
    # Open/Create a file to append data
    csvFile = open('ua.csv', 'a')
    #Use csv Writer
    csvWriter = csv.writer(csvFile)

    for tweet in tweepy.Cursor(api.search,q="#unitedAIRLINES",count=100,
    api = tweepy.API(auth,wait_on_rate_limit=True,timeout=1000)

    personality = {}
    for idx,tweet in enumerate(tweepy.Cursor(api.search,q="#YourPersonalityIn3Words",count=100,
    lang="en",
    since="2017-04-03").items():
    print (tweet.created_at, tweet.text)
    csvWriter.writerow([tweet.created_at, tweet.text.encode('utf-8')])
    since="2017-04-03").items()):
    personality[idx]=[tweet.created_at, tweet.text.encode('utf-8'),tweet.author.screen_name]

    persons = pd.DataFrame(personality).transpose()

    persons.to_csv('persons.csv',index=False)

    past_tweets = {}

    all_names = persons.loc[:,2]

    chunks = int(np.floor(len(all_names)/500))-1
    chunk_size=500
    idx = 0
    for i in range(7,chunks):
    some_names = all_names.loc[i*chunk_size:(i+1)*500-1,]
    for name in tqdm(some_names):
    status_cursor = tweepy.Cursor(api.user_timeline, screen_name=name, count=200,tweet_mode='extended')
    status_list = status_cursor.iterator.next()
    for each_tweet in range(len(status_list)):
    text=status_list[each_tweet]._json['full_text']
    past_tweets[idx]=[name,text]
    idx += 1
    time.sleep(900)

    all_history = pd.DataFrame(past_tweets).transpose()
    all_history.to_pickle('all.pkl')
    Binary file added all.pkl
    Binary file not shown.
    5,599 changes: 5,599 additions & 0 deletions persons.csv
    5,599 additions, 0 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
  8. @FredPintFed FredPintFed created this gist Feb 4, 2019.
    31 changes: 31 additions & 0 deletions TweetScrape.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,31 @@
    #!/usr/bin/env python3
    # -*- coding: utf-8 -*-
    """
    Created on Mon Feb 4 12:37:38 2019
    @author: michael
    """

    import tweepy
    import csv
    import pandas as pd
    ####input your credentials here
    consumer_key = ''
    consumer_secret = ''
    access_token = ''
    access_token_secret = ''

    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth,wait_on_rate_limit=True)
    #####United Airlines
    # Open/Create a file to append data
    csvFile = open('ua.csv', 'a')
    #Use csv Writer
    csvWriter = csv.writer(csvFile)

    for tweet in tweepy.Cursor(api.search,q="#unitedAIRLINES",count=100,
    lang="en",
    since="2017-04-03").items():
    print (tweet.created_at, tweet.text)
    csvWriter.writerow([tweet.created_at, tweet.text.encode('utf-8')])