news_content = df[df.category=='reliable'].news_content.tolist() tf_vectorizer = CountVectorizer(strip_accents = 'unicode', stop_words = 'english', lowercase = True, token_pattern = r'\b[a-zA-Z]{3,}\b', max_df = 0.5, min_df = 10) tfidf_vectorizer = TfidfVectorizer(**tf_vectorizer.get_params()) dtm_tfidf = tfidf_vectorizer.fit_transform(news_content) lda_tfidf = LatentDirichletAllocation(n_topics=20, random_state=0) lda_tfidf.fit(dtm_tfidf) vis_data = pyLDAvis.sklearn.prepare(lda_tfidf, dtm_tfidf, tf_vectorizer) pyLDAvis.display(vis_data)