lucinda-lim · October 30, 2020 05:20
diff --git a/topic_coherence.py b/topic_coherence.py
 def compute_coherence_values(dictionary, corpus, texts, limit, start=2, step=3):
    coherence_values = []
    model_list = []
    for num_topics in range(start, limit, step):
        model = gensim.models.ldamodel.LdaModel(corpus=corpus,
                                           id2word=id2word,
                                           num_topics=num_topics, 
                                           random_state=100,
                                           update_every=1,
                                           chunksize=100,
                                           passes=10,
                                           alpha='auto',
                                           per_word_topics=True)
        model_list.append(model)
        coherencemodel = CoherenceModel(model=model, texts=texts, dictionary=dictionary, coherence='c_v')
        coherence_values.append(coherencemodel.get_coherence())
    return model_list, coherence_values
 ## ----------------------------------------------------------------------------------------------
 model_list, coherence_values = compute_coherence_values(dictionary=id2word, corpus=corpus, texts=data_lemmatized, start=2, limit=21, step=1)

 ## visualize
 limit=21; start=2; step=1;
 x = range(start, limit, step)
 plt.plot(x, coherence_values)
 plt.xlabel("Num Topics")
 plt.ylabel("Coherence score")
 plt.legend(("coherence_values"), loc='best')
 plt.show()
 ##print values
 for m, cv in zip(x, coherence_values):
    print("Num Topics =", m, " has Coherence Value of", round(cv, 4))
	def compute_coherence_values(dictionary, corpus, texts, limit, start=2, step=3):
	coherence_values = []
	model_list = []
	for num_topics in range(start, limit, step):
	model = gensim.models.ldamodel.LdaModel(corpus=corpus,
	id2word=id2word,
	num_topics=num_topics,
	random_state=100,
	update_every=1,
	chunksize=100,
	passes=10,
	alpha='auto',
	per_word_topics=True)
	model_list.append(model)
	coherencemodel = CoherenceModel(model=model, texts=texts, dictionary=dictionary, coherence='c_v')
	coherence_values.append(coherencemodel.get_coherence())
	return model_list, coherence_values
	## ----------------------------------------------------------------------------------------------
	model_list, coherence_values = compute_coherence_values(dictionary=id2word, corpus=corpus, texts=data_lemmatized, start=2, limit=21, step=1)

	## visualize
	limit=21; start=2; step=1;
	x = range(start, limit, step)
	plt.plot(x, coherence_values)
	plt.xlabel("Num Topics")
	plt.ylabel("Coherence score")
	plt.legend(("coherence_values"), loc='best')
	plt.show()
	##print values
	for m, cv in zip(x, coherence_values):
	print("Num Topics =", m, " has Coherence Value of", round(cv, 4))