Created
September 16, 2024 11:41
-
-
Save btbytes/2ed6c8f5a73b2e61a45653b4e15ae9ac to your computer and use it in GitHub Desktop.
Revisions
-
btbytes created this gist
Sep 16, 2024 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,26 @@ # Sample text text = "the cat sat on the mat the dog sat on the floor" # Tokenize the text words = text.split() # Build bigram model bigrams = defaultdict(list) for i in range(len(words) - 1): bigrams[words[i]].append(words[i + 1]) # Function to generate text def generate_text(start_word, num_words): current_word = start_word result = [current_word] for _ in range(num_words - 1): if current_word in bigrams: next_word = random.choice(bigrams[current_word]) result.append(next_word) current_word = next_word else: break return ' '.join(result) # Generate text print(generate_text("the", 5))