michaelort33 · April 9, 2019 19:34 · Apr 9, 2019 · Apr 9, 2019 · Apr 9, 2019 · Feb 6, 2019
diff --git a/all.pkl b/all.pkl
diff --git a/all_tweet_history.csv b/all_tweet_history.csv
diff --git a/personalities_3_words.csv b/personalities_3_words.csv
diff --git a/personalities_3_words.pkl b/personalities_3_words.pkl
diff --git a/personalities_3_words.pkl b/personalities_3_words.pkl
diff --git a/6.pkl b/6.pkl
diff --git a/all.csv → all_tweet_history.csv b/all.csv → all_tweet_history.csv
diff --git a/persons.csv → personalities_3_words.csv b/persons.csv → personalities_3_words.csv
diff --git a/TweetScrape.py b/TweetScrape.py
@@ -12,6 +12,10 @@
 import time
 import numpy as np
 
+#import os
+#os.chdir('/home/michael/Documents/DeeperSignals')
+
+
 ####input your credentials here
 consumer_key = 'kmflAhDlFLYvVLzUhUTv5Agbw'
 consumer_secret = 'D8Giq3GMjIMaBQN0Qw63tsoHpajA7hNKy2ouo1XeqQP46SP38C'
@@ -39,7 +43,7 @@
 chunks = int(np.floor(len(all_names)/500))-1
 chunk_size=500
 idx = 0
-for i in range(7,chunks):
+for i in range(chunks):
     some_names = all_names.loc[i*chunk_size:(i+1)*500-1,]
     for name in tqdm(some_names):
         status_cursor = tweepy.Cursor(api.user_timeline, screen_name=name, count=200,tweet_mode='extended')
@@ -52,3 +56,6 @@
 
 all_history = pd.DataFrame(past_tweets).transpose()
 all_history.to_pickle('all.pkl')
+
+
+
diff --git a/all.csv b/all.csv
diff --git a/all.pkl b/all.pkl
diff --git a/.~lock.tweets.csv# b/.~lock.tweets.csv#
@@ -0,0 +1 @@
+,michael,michaelxps,04.02.2019 14:14,file:///home/michael/.config/libreoffice/4;
diff --git a/6.pkl b/6.pkl
diff --git a/TweetScrape.py b/TweetScrape.py
@@ -7,25 +7,48 @@
 """
 
 import tweepy
-import csv
 import pandas as pd
+from tqdm import tqdm
+import time
+import numpy as np
+
 ####input your credentials here
-consumer_key = ''
-consumer_secret = ''
-access_token = ''
-access_token_secret = ''
+consumer_key = 'kmflAhDlFLYvVLzUhUTv5Agbw'
+consumer_secret = 'D8Giq3GMjIMaBQN0Qw63tsoHpajA7hNKy2ouo1XeqQP46SP38C'
+access_token = '4266439228-U0ySetwuTNEtz3ZGdyPWKVrfIGKY866EMIrzNbN'
+access_token_secret = 'CVynCeCX22NtK8iygTHGWjota2JhGTfPgk2CO1MUnPfBK'
 
 auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
 auth.set_access_token(access_token, access_token_secret)
-api = tweepy.API(auth,wait_on_rate_limit=True)
-#####United Airlines
-# Open/Create a file to append data
-csvFile = open('ua.csv', 'a')
-#Use csv Writer
-csvWriter = csv.writer(csvFile)
-
-for tweet in tweepy.Cursor(api.search,q="#unitedAIRLINES",count=100,
+api = tweepy.API(auth,wait_on_rate_limit=True,timeout=1000)
+
+personality = {}
+for idx,tweet in enumerate(tweepy.Cursor(api.search,q="#YourPersonalityIn3Words",count=100,
                            lang="en",
-                           since="2017-04-03").items():
-    print (tweet.created_at, tweet.text)
-    csvWriter.writerow([tweet.created_at, tweet.text.encode('utf-8')])
+                           since="2017-04-03").items()):
+    personality[idx]=[tweet.created_at, tweet.text.encode('utf-8'),tweet.author.screen_name]
+
+persons = pd.DataFrame(personality).transpose()
+
+persons.to_csv('persons.csv',index=False)
+
+past_tweets = {}
+
+all_names = persons.loc[:,2]
+
+chunks = int(np.floor(len(all_names)/500))-1
+chunk_size=500
+idx = 0
+for i in range(7,chunks):
+    some_names = all_names.loc[i*chunk_size:(i+1)*500-1,]
+    for name in tqdm(some_names):
+        status_cursor = tweepy.Cursor(api.user_timeline, screen_name=name, count=200,tweet_mode='extended')
+        status_list = status_cursor.iterator.next()
+        for each_tweet in range(len(status_list)):
+            text=status_list[each_tweet]._json['full_text']
+            past_tweets[idx]=[name,text]
+            idx += 1
+    time.sleep(900)
+
+all_history = pd.DataFrame(past_tweets).transpose()
+all_history.to_pickle('all.pkl')
diff --git a/all.pkl b/all.pkl
diff --git a/persons.csv b/persons.csv
diff --git a/TweetScrape.py b/TweetScrape.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Feb  4 12:37:38 2019
+
+@author: michael
+"""
+
+import tweepy
+import csv
+import pandas as pd
+####input your credentials here
+consumer_key = ''
+consumer_secret = ''
+access_token = ''
+access_token_secret = ''
+
+auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
+auth.set_access_token(access_token, access_token_secret)
+api = tweepy.API(auth,wait_on_rate_limit=True)
+#####United Airlines
+# Open/Create a file to append data
+csvFile = open('ua.csv', 'a')
+#Use csv Writer
+csvWriter = csv.writer(csvFile)
+
+for tweet in tweepy.Cursor(api.search,q="#unitedAIRLINES",count=100,
+                           lang="en",
+                           since="2017-04-03").items():
+    print (tweet.created_at, tweet.text)
+    csvWriter.writerow([tweet.created_at, tweet.text.encode('utf-8')])
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		,michael,michaelxps,04.02.2019 14:14,file:///home/michael/.config/libreoffice/4;