import re import json # save the positive words into a list called p_list with open('positive.txt') as f: p_txt = f.read() p_txt = re.sub('[,\.()":;!@#$%^&*\d]|\'s|\'', '', p_txt) p_list = p_txt.replace('\n',' ').replace(' ',' ').lower().split(' ') # test if cool is in the list print 'cool is in the postive list: ', 'cool' in p_list # save the negative words into a list called n_list with open('negative.txt') as f: n_txt = f.read() n_txt = re.sub('[,\.()":;!@#$%^&*\d]|\'s|\'', '', n_txt) n_list = n_txt.replace('\n',' ').replace(' ',' ').lower().split(' ') # test if abrade is in the list print 'abrade is in the negative list: ', 'abrade' in n_list # test if cool is in the list print 'cool is in the negative list: ', 'cool' in p_list # process the tweets with open('data.txt') as f: txt = f.read() txt = re.sub('[,\.()":;!@#$%^&*\d]|\'s|\'', '', txt) word_list = txt.replace('\n',' ').replace(' ',' ').lower().split(' ') # create empty dictionaries word_count_dict = {} word_count_positive = {} word_count_negative= {} for word in word_list: # count all words frequency if word in word_count_dict.keys(): word_count_dict[word] += 1 else: word_count_dict[word] = 1 # count if it is a positive word if word in p_list: if word in word_count_positive.keys(): word_count_positive[word] += 1 else: word_count_positive[word] = 1 # else see if it is a negative word elif word in n_list: if word in word_count_negative.keys(): word_count_negative[word] += 1 else: word_count_negative[word] = 1 else: # do nothing pass list_dict = sorted(word_count_dict.items(), key=lambda x:x[1], reverse=True) list_positive = sorted(word_count_positive.items(), key=lambda x:x[1], reverse=True) list_negative = sorted(word_count_negative.items(), key=lambda x:x[1], reverse=True) with open('word_count.csv', 'w')as f1: for i in list_dict: f1.write('%s,%s\n' %(i[0],str(i[1]))) with open('word_positive.csv', 'w')as f1: for i in list_positive: f1.write('%s,%s\n' %(i[0],str(i[1]))) with open('word_negative.csv', 'w')as f1: for i in list_negative: f1.write('%s,%s\n' %(i[0],str(i[1])))