import tweepy import re import collections import subprocess import json from flask import Flask, render_template, request from langdetect import detect app = Flask(__name__) app.config['UPLOAD_FOLDER'] = './' def get_twitter_api(): keys = [] with open('keys.txt') as stream: keys = [line.strip() for line in stream] [consumer_key, consumer_secret, access_token, access_token_secret] = keys auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) return tweepy.API(auth) @app.route('/') def home(): return render_template('home.html') @app.route('/trends') def get_trends(): api = get_twitter_api() US_WOEID = '23424977' response = api.trends_place(id=US_WOEID) trends_names = [x['name'] for x in response[0]['trends']] return json.dumps(trends_names) @app.route('/search_trends', methods=['GET', 'POST']) def search_trends(): tweets = [] if request.files.get('file'): tweets = [x.decode().strip() for x in request.files['file'].stream.readlines()] tweets_file = 'tweets_upload.txt' else: tweets_file = 'tweets_trends.txt' api = get_twitter_api() trends_querys = json.loads(request.form['data']) tweets = [] max_tweets = 20 try: for i, query in enumerate(trends_querys): print("Getting trend {0}/{1}".format(i+1, len(trends_querys))) tweets += [status.text for status in tweepy.Cursor(api.search, language='en', q=query, ).items(max_tweets)] except: tweets = None if tweets: with open(tweets_file, 'w') as stream: stream.write("\n".join(tweets)) p = subprocess.Popen(["../ark-tweet-nlp-0.3.2/runTagger.sh", "--no-confidence", "--input-format", "text", "--output-format", "pretsv", "--quiet", tweets_file], stdout=subprocess.PIPE) (output, err) = p.communicate() token_list_temp = re.sub("\n", "\t", output.decode()).split("\t") i = 0 token_list = [] while i < len(token_list_temp): token_list.append(token_list_temp[i:i+2]) i += 3 multiword_patterns = [ ["^", "^"], ["N", "^"], ["^", "N"], ["N", "N"], ["A", "N"], ["A", "^"], ["V", "N"], ["V", "^"], ["V", "T"], ["R", "V"], ["V", "T", "T"], ["V", "T", "P"], ["V", "D", "N"], ["V", "D", "^"], ["N", "O", "N"], ["^", "O", "N"], ["N", "O", "^"], ["^", "O", "^"], ["D", "D", "N"], ["D", "D", "^"], ["V", "D", "N"], ["V", "D", "^"], ["V", "T", "P"], ["N", "N", "N"], ["N", "N", "^"], ["N", "^", "N"], ["^", "N", "N"], ["N", "^", "^"], ["^", "N", "^"], ["^", "^", "N"], ["^", "^", "^"], ["A", "N", "N"], ["A", "N", "^"], ["A", "^", "N"], ["A", "^", "^"], ["N", "A", "N"], ["^", "A", "^"], ["N", "A", "^"], ["^", "A", "N"], ["A", "A", "N"], ["A", "A", "^"], ["N", "P", "N"], ["^", "P", "N"], ["N", "P", "^"], ["^", "P", "^"], ["N", "P", "A", "N"], ["^", "P", "A", "N"], ["N", "P", "A", "^"], ["^", "P", "A", "^"], ["N", "P", "D", "N"], ["^", "P", "D", "N"], ["N", "P", "D", "^"], ["^", "P", "D", "^"], ["N", "P", "N", "N"], ["^", "P", "N", "N"], ["N", "P", "^", "N"], ["N", "P", "N", "^"], ["N", "P", "^", "^"], ["^", "P", "N", "^"], ["^", "P", "^", "N"], ["^", "P", "^", "^"], ["N", "N", "P", "N"], ["N", "N", "P", "^"], ["N", "^", "P", "N"], ["^", "N", "P", "N"], ["^", "^", "P", "N"], ["^", "N", "P", "^"], ["N", "^", "P", "^"], ["^", "^", "P", "^"]] dict_multiword = collections.defaultdict(int) dict_word = collections.defaultdict(int) last_tweet = '' for group in token_list: if len(group) == 2: [tweet, tag] = group if last_tweet != tweet: words = tweet.split() tags = tag.split() word1, word2, word3 = '', '', '' tag1, tag2, tag3 = '', '', '' pair = "" for i in range(len(words)): word = words[i].lower() tag = tags[i] if tag in ["N", "A", "V", "R", "P", "O"]: dict_word[word] += 1 words_total += 1 if [tag1, tag] in multiword_patterns: multiword = word1 + " " + word dict_multiword[multiword] += 1 if [tag2, tag1, tag] in multiword_patterns: multiword = word2 + " " + word1 + " " + word dict_multiword[multiword] += 1 if [tag3, tag2, tag1, tag] in multiword_patterns: multiword = word3 + " " + word2 + " " + word1 + " " + word dict_multiword[multiword] += 1 word3 = word2 tag3 = tag2 word2 = word1 tag2 = tag1 word1 = word tag1 = tag last_tweet = tweet dict_multiword_score = collections.defaultdict(int) for key, val in dict_multiword.items(): words = key.split() score = val for word in words: if word in dict_word: score -= dict_word[word] - val if score > 0: lang = '' try: lang = detect(key) except: pass if lang == 'en': dict_multiword_score[key] = score ordered_dict_multiword_score = collections.OrderedDict( sorted(dict_multiword_score.items(), key=lambda t: t[1], reverse=True)) return json.dumps(ordered_dict_multiword_score) if __name__ == '__main__': app.debug = True app.run()