def compute_coherence_values(dictionary, corpus, texts, limit, start=2, step=3): coherence_values = [] model_list = [] for num_topics in range(start, limit, step): model = gensim.models.ldamodel.LdaModel(corpus=corpus, id2word=id2word, num_topics=num_topics, random_state=100, update_every=1, chunksize=100, passes=10, alpha='auto', per_word_topics=True) model_list.append(model) coherencemodel = CoherenceModel(model=model, texts=texts, dictionary=dictionary, coherence='c_v') coherence_values.append(coherencemodel.get_coherence()) return model_list, coherence_values ## ---------------------------------------------------------------------------------------------- model_list, coherence_values = compute_coherence_values(dictionary=id2word, corpus=corpus, texts=data_lemmatized, start=2, limit=21, step=1) ## visualize limit=21; start=2; step=1; x = range(start, limit, step) plt.plot(x, coherence_values) plt.xlabel("Num Topics") plt.ylabel("Coherence score") plt.legend(("coherence_values"), loc='best') plt.show() ##print values for m, cv in zip(x, coherence_values): print("Num Topics =", m, " has Coherence Value of", round(cv, 4))