lucinda-lim · October 30, 2020 05:20 · Oct 30, 2020 · Oct 29, 2020 · Oct 29, 2020
diff --git a/topic_coherence → topic_coherence.py b/topic_coherence → topic_coherence.py
diff --git a/topic_coherence b/topic_coherence
@@ -25,4 +25,7 @@ plt.plot(x, coherence_values)
 plt.xlabel("Num Topics")
 plt.ylabel("Coherence score")
 plt.legend(("coherence_values"), loc='best')
-plt.show()
+plt.show()
+##print values
+for m, cv in zip(x, coherence_values):
+    print("Num Topics =", m, " has Coherence Value of", round(cv, 4))
diff --git a/topic_coherence b/topic_coherence
@@ -0,0 +1,28 @@
+def compute_coherence_values(dictionary, corpus, texts, limit, start=2, step=3):
+    coherence_values = []
+    model_list = []
+    for num_topics in range(start, limit, step):
+        model = gensim.models.ldamodel.LdaModel(corpus=corpus,
+                                           id2word=id2word,
+                                           num_topics=num_topics, 
+                                           random_state=100,
+                                           update_every=1,
+                                           chunksize=100,
+                                           passes=10,
+                                           alpha='auto',
+                                           per_word_topics=True)
+        model_list.append(model)
+        coherencemodel = CoherenceModel(model=model, texts=texts, dictionary=dictionary, coherence='c_v')
+        coherence_values.append(coherencemodel.get_coherence())
+    return model_list, coherence_values
+## ----------------------------------------------------------------------------------------------
+model_list, coherence_values = compute_coherence_values(dictionary=id2word, corpus=corpus, texts=data_lemmatized, start=2, limit=21, step=1)
+
+## visualize
+limit=21; start=2; step=1;
+x = range(start, limit, step)
+plt.plot(x, coherence_values)
+plt.xlabel("Num Topics")
+plt.ylabel("Coherence score")
+plt.legend(("coherence_values"), loc='best')
+plt.show()
No results found