Skip to content

Instantly share code, notes, and snippets.

@mmmayo13
Last active November 7, 2019 11:58
Show Gist options
  • Select an option

  • Save mmmayo13/4fb6adb245532622d3e9ec12a8bb1f29 to your computer and use it in GitHub Desktop.

Select an option

Save mmmayo13/4fb6adb245532622d3e9ec12a8bb1f29 to your computer and use it in GitHub Desktop.

Revisions

  1. mmmayo13 revised this gist Nov 7, 2019. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion vocabulary.py
    Original file line number Diff line number Diff line change
    @@ -8,7 +8,7 @@ def __init__(self, name):
    self.word2index = {}
    self.word2count = {}
    self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
    self.num_words = 3 # Count SOS, EOS, PAD
    self.num_words = 3
    self.num_sentences = 0
    self.longest_sentence = 0

  2. mmmayo13 created this gist Nov 7, 2019.
    41 changes: 41 additions & 0 deletions vocabulary.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,41 @@
    class Vocabulary:
    PAD_token = 0 # Used for padding short sentences
    SOS_token = 1 # Start-of-sentence token
    EOS_token = 2 # End-of-sentence token

    def __init__(self, name):
    self.name = name
    self.word2index = {}
    self.word2count = {}
    self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
    self.num_words = 3 # Count SOS, EOS, PAD
    self.num_sentences = 0
    self.longest_sentence = 0

    def add_word(self, word):
    if word not in self.word2index:
    # First entry of word into vocabulary
    self.word2index[word] = self.num_words
    self.word2count[word] = 1
    self.index2word[self.num_words] = word
    self.num_words += 1
    else:
    # Word exists; increase word count
    self.word2count[word] += 1

    def add_sentence(self, sentence):
    sentence_len = 0
    for word in sentence.split(' '):
    sentence_len += 1
    self.add_word(word)
    if sentence_len > self.longest_sentence:
    # This is the longest sentence
    self.longest_sentence = sentence_len
    # Count the number of sentences
    self.num_sentences += 1

    def to_word(self, index):
    return self.index2word[index]

    def to_index(self, word):
    return self.word2index[word]