Last active
October 30, 2020 05:20
-
-
Save lucinda-lim/478ab20a313e8cf7ed43fd616ec38ae2 to your computer and use it in GitHub Desktop.
Revisions
-
Lim Cai Xian Lucinda renamed this gist
Oct 30, 2020 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
Lim Cai Xian Lucinda revised this gist
Oct 29, 2020 . 1 changed file with 4 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -25,4 +25,7 @@ plt.plot(x, coherence_values) plt.xlabel("Num Topics") plt.ylabel("Coherence score") plt.legend(("coherence_values"), loc='best') plt.show() ##print values for m, cv in zip(x, coherence_values): print("Num Topics =", m, " has Coherence Value of", round(cv, 4)) -
Lim Cai Xian Lucinda created this gist
Oct 29, 2020 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,28 @@ def compute_coherence_values(dictionary, corpus, texts, limit, start=2, step=3): coherence_values = [] model_list = [] for num_topics in range(start, limit, step): model = gensim.models.ldamodel.LdaModel(corpus=corpus, id2word=id2word, num_topics=num_topics, random_state=100, update_every=1, chunksize=100, passes=10, alpha='auto', per_word_topics=True) model_list.append(model) coherencemodel = CoherenceModel(model=model, texts=texts, dictionary=dictionary, coherence='c_v') coherence_values.append(coherencemodel.get_coherence()) return model_list, coherence_values ## ---------------------------------------------------------------------------------------------- model_list, coherence_values = compute_coherence_values(dictionary=id2word, corpus=corpus, texts=data_lemmatized, start=2, limit=21, step=1) ## visualize limit=21; start=2; step=1; x = range(start, limit, step) plt.plot(x, coherence_values) plt.xlabel("Num Topics") plt.ylabel("Coherence score") plt.legend(("coherence_values"), loc='best') plt.show()