Skip to content

Instantly share code, notes, and snippets.

@paeltech
Forked from octoparse/c5a44b9288ab-code.py
Created December 31, 2019 13:04
Show Gist options
  • Save paeltech/eee5b71e17a1dc11dbf8db15cd926ba6 to your computer and use it in GitHub Desktop.
Save paeltech/eee5b71e17a1dc11dbf8db15cd926ba6 to your computer and use it in GitHub Desktop.

Revisions

  1. @octoparse octoparse created this gist Apr 16, 2019.
    67 changes: 67 additions & 0 deletions c5a44b9288ab-code.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,67 @@
    import re
    import json

    # save the positive words into a list called p_list
    with open('positive.txt') as f:
    p_txt = f.read()
    p_txt = re.sub('[,\.()":;!@#$%^&*\d]|\'s|\'', '', p_txt)
    p_list = p_txt.replace('\n',' ').replace(' ',' ').lower().split(' ')
    # test if cool is in the list
    print 'cool is in the postive list: ', 'cool' in p_list

    # save the negative words into a list called n_list
    with open('negative.txt') as f:
    n_txt = f.read()
    n_txt = re.sub('[,\.()":;!@#$%^&*\d]|\'s|\'', '', n_txt)
    n_list = n_txt.replace('\n',' ').replace(' ',' ').lower().split(' ')
    # test if abrade is in the list
    print 'abrade is in the negative list: ', 'abrade' in n_list
    # test if cool is in the list
    print 'cool is in the negative list: ', 'cool' in p_list

    # process the tweets
    with open('data.txt') as f:

    txt = f.read()
    txt = re.sub('[,\.()":;!@#$%^&*\d]|\'s|\'', '', txt)
    word_list = txt.replace('\n',' ').replace(' ',' ').lower().split(' ')

    # create empty dictionaries
    word_count_dict = {}
    word_count_positive = {}
    word_count_negative= {}

    for word in word_list:
    # count all words frequency
    if word in word_count_dict.keys():
    word_count_dict[word] += 1
    else:
    word_count_dict[word] = 1
    # count if it is a positive word
    if word in p_list:
    if word in word_count_positive.keys():
    word_count_positive[word] += 1
    else:
    word_count_positive[word] = 1
    # else see if it is a negative word
    elif word in n_list:
    if word in word_count_negative.keys():
    word_count_negative[word] += 1
    else:
    word_count_negative[word] = 1
    else: # do nothing
    pass

    list_dict = sorted(word_count_dict.items(), key=lambda x:x[1], reverse=True)
    list_positive = sorted(word_count_positive.items(), key=lambda x:x[1], reverse=True)
    list_negative = sorted(word_count_negative.items(), key=lambda x:x[1], reverse=True)

    with open('word_count.csv', 'w')as f1:
    for i in list_dict:
    f1.write('%s,%s\n' %(i[0],str(i[1])))
    with open('word_positive.csv', 'w')as f1:
    for i in list_positive:
    f1.write('%s,%s\n' %(i[0],str(i[1])))
    with open('word_negative.csv', 'w')as f1:
    for i in list_negative:
    f1.write('%s,%s\n' %(i[0],str(i[1])))