from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer from nltk.stem.porter import * stemmer = PorterStemmer() tokens = word_tokenize(sentence) #Stemming stemed_tokens = [] for word in tokens: stemed_tokens.append(stemmer.stem(word)) #Lemmatization lemmatizer = WordNetLemmatizer() nltk_lemma_list = [] for word in stemed_tokens: nltk_lemma_list.append(lemmatizer.lemmatize(word)) print("Stemming + Lemmatization:") print(nltk_lemma_list) """ Stemming + Lemmatization: ['follow', 'the', 'debut', 'of', 'bitcoin', 'futur', 'etf', 'in', 'the', 'unit', 'state', ',', 'the', 'crypto', 'market', 'is', 'abuzz', 'with', 'talk', 'of', 'an', 'impend', 'ether', 'etf.speak', 'on', 'a', 'show', 'on', 'cnbc', ',', 'michael', 'sonnenshein', ',', 'ceo', 'of', 'grayscal', '--', 'an', 'asset', 'manag', 'compani', 'with', '$', '52', 'billion', 'in', 'asset', 'under', 'manag', '--', 'say', 'it', 'is', 'possibl', '.', 'He', 'said', 'it', "'stand", 'to', 'reason', "'", 'the', 'secur', 'and', 'exchang', 'committe', '(', 'sec', ')', 'will', 'proactiv', 'consid', 'bring', 'ethereum', 'etf', 'and', 'other', 'similar', 'product', 'in', 'the', 'US', 'market.canada', 'alreadi', 'ha', 'bitcoin', ',', ' ethereum', 'etfswhil', 'US', 'regul', 'have', 'allow', 'bitcoin', 'futur', 'etf', 'to', 'be', 'trade', 'on', 'the', 'exchang', ',', 'canada', 'ha', 'allow', 'both', 'bitcoin', 'and', 'ethereum', 'etf', '.'] """