Skip to content

Instantly share code, notes, and snippets.

@lucinda-lim
Last active October 30, 2020 05:20
Show Gist options
  • Save lucinda-lim/478ab20a313e8cf7ed43fd616ec38ae2 to your computer and use it in GitHub Desktop.
Save lucinda-lim/478ab20a313e8cf7ed43fd616ec38ae2 to your computer and use it in GitHub Desktop.
def compute_coherence_values(dictionary, corpus, texts, limit, start=2, step=3):
coherence_values = []
model_list = []
for num_topics in range(start, limit, step):
model = gensim.models.ldamodel.LdaModel(corpus=corpus,
id2word=id2word,
num_topics=num_topics,
random_state=100,
update_every=1,
chunksize=100,
passes=10,
alpha='auto',
per_word_topics=True)
model_list.append(model)
coherencemodel = CoherenceModel(model=model, texts=texts, dictionary=dictionary, coherence='c_v')
coherence_values.append(coherencemodel.get_coherence())
return model_list, coherence_values
## ----------------------------------------------------------------------------------------------
model_list, coherence_values = compute_coherence_values(dictionary=id2word, corpus=corpus, texts=data_lemmatized, start=2, limit=21, step=1)
## visualize
limit=21; start=2; step=1;
x = range(start, limit, step)
plt.plot(x, coherence_values)
plt.xlabel("Num Topics")
plt.ylabel("Coherence score")
plt.legend(("coherence_values"), loc='best')
plt.show()
##print values
for m, cv in zip(x, coherence_values):
print("Num Topics =", m, " has Coherence Value of", round(cv, 4))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment