Last active
November 7, 2019 11:58
-
-
Save mmmayo13/4fb6adb245532622d3e9ec12a8bb1f29 to your computer and use it in GitHub Desktop.
Revisions
-
mmmayo13 revised this gist
Nov 7, 2019 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,7 +8,7 @@ def __init__(self, name): self.word2index = {} self.word2count = {} self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"} self.num_words = 3 self.num_sentences = 0 self.longest_sentence = 0 -
mmmayo13 created this gist
Nov 7, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,41 @@ class Vocabulary: PAD_token = 0 # Used for padding short sentences SOS_token = 1 # Start-of-sentence token EOS_token = 2 # End-of-sentence token def __init__(self, name): self.name = name self.word2index = {} self.word2count = {} self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"} self.num_words = 3 # Count SOS, EOS, PAD self.num_sentences = 0 self.longest_sentence = 0 def add_word(self, word): if word not in self.word2index: # First entry of word into vocabulary self.word2index[word] = self.num_words self.word2count[word] = 1 self.index2word[self.num_words] = word self.num_words += 1 else: # Word exists; increase word count self.word2count[word] += 1 def add_sentence(self, sentence): sentence_len = 0 for word in sentence.split(' '): sentence_len += 1 self.add_word(word) if sentence_len > self.longest_sentence: # This is the longest sentence self.longest_sentence = sentence_len # Count the number of sentences self.num_sentences += 1 def to_word(self, index): return self.index2word[index] def to_index(self, word): return self.word2index[word]