dpasch01 · June 15, 2017 10:31
diff --git a/LDA Parameter Tuning with R b/LDA Parameter Tuning with R
 #!/usr/bin/env Rscript
 args = commandArgs(trailingOnly=TRUE)

 # USAGE: $ Rscript --vanilla ldatuning.r input.txt 10 200

 # https://cran.r-project.org/web/packages/ldatuning/vignettes/topics.html
 library("ldatuning")
 library(tm)
 library(SnowballC)

 # The script accepts 3 parameters, the first one is the document containing
 # the text for the tuning to be applied on, the other two are the numbers of
 # the minimum and maximum number of topics to be checked.
 if (length(args)!=3) {
  stop("Please provide the required parameters for LDA Tuning.", call.=FALSE)
 }

 if (!file.exists(args[1])) {
 	stop("The file provided does not exist.", call.=FALSE)
 }

 lines = readLines(args[1])
 from_topics = as.integer(args[2])
 to_topics = as.integer(args[3])

 # Following is the processing of the text file e.g converting to vector space,
 # removing punctuation, numbers etc, and converting the data into DTM - Document
 # Term Matrix object
 doc.vec <- VectorSource(lines)
 doc.corpus <- Corpus(doc.vec)
 summary(doc.corpus)

 doc.corpus <- tm_map(doc.corpus, tolower)
 doc.corpus <- tm_map(doc.corpus, removePunctuation)
 doc.corpus <- tm_map(doc.corpus, removeNumbers)
 doc.corpus <- tm_map(doc.corpus, removeWords, stopwords("english"))

 doc.corpus <- tm_map(doc.corpus, stemDocument)
 doc.corpus <- tm_map(doc.corpus, stripWhitespace)

 DTM <- DocumentTermMatrix(doc.corpus)

 # Remove any empty entries in the vector space so as for the LDATuning to work.
 rowTotals <- apply(DTM , 1, sum) 
 DTM <- DTM[rowTotals> 0, ]    

 # Execute the LDATuning with the metrics of Griffiths2004, CaoJuan2009, Arun2010
 # and Deveaud2014 on 2 cores with Gibbs sampling, and plot the results.

 result <- FindTopicsNumber(
  DTM,
  topics = seq(from = from_topics, to = to_topics, by = 1),
  metrics = c("Griffiths2004", "CaoJuan2009", "Arun2010", "Deveaud2014"),
  method = "Gibbs",
  control = list(seed = 77),
  mc.cores = 2L,
  verbose = TRUE
 )

 FindTopicsNumber_plot(result)
	#!/usr/bin/env Rscript
	args = commandArgs(trailingOnly=TRUE)

	# USAGE: $ Rscript --vanilla ldatuning.r input.txt 10 200

	# https://cran.r-project.org/web/packages/ldatuning/vignettes/topics.html
	library("ldatuning")
	library(tm)
	library(SnowballC)

	# The script accepts 3 parameters, the first one is the document containing
	# the text for the tuning to be applied on, the other two are the numbers of
	# the minimum and maximum number of topics to be checked.
	if (length(args)!=3) {
	stop("Please provide the required parameters for LDA Tuning.", call.=FALSE)
	}

	if (!file.exists(args[1])) {
	stop("The file provided does not exist.", call.=FALSE)
	}

	lines = readLines(args[1])
	from_topics = as.integer(args[2])
	to_topics = as.integer(args[3])

	# Following is the processing of the text file e.g converting to vector space,
	# removing punctuation, numbers etc, and converting the data into DTM - Document
	# Term Matrix object
	doc.vec <- VectorSource(lines)
	doc.corpus <- Corpus(doc.vec)
	summary(doc.corpus)

	doc.corpus <- tm_map(doc.corpus, tolower)
	doc.corpus <- tm_map(doc.corpus, removePunctuation)
	doc.corpus <- tm_map(doc.corpus, removeNumbers)
	doc.corpus <- tm_map(doc.corpus, removeWords, stopwords("english"))

	doc.corpus <- tm_map(doc.corpus, stemDocument)
	doc.corpus <- tm_map(doc.corpus, stripWhitespace)

	DTM <- DocumentTermMatrix(doc.corpus)

	# Remove any empty entries in the vector space so as for the LDATuning to work.
	rowTotals <- apply(DTM , 1, sum)
	DTM <- DTM[rowTotals> 0, ]

	# Execute the LDATuning with the metrics of Griffiths2004, CaoJuan2009, Arun2010
	# and Deveaud2014 on 2 cores with Gibbs sampling, and plot the results.

	result <- FindTopicsNumber(
	DTM,
	topics = seq(from = from_topics, to = to_topics, by = 1),
	metrics = c("Griffiths2004", "CaoJuan2009", "Arun2010", "Deveaud2014"),
	method = "Gibbs",
	control = list(seed = 77),
	mc.cores = 2L,
	verbose = TRUE
	)

	FindTopicsNumber_plot(result)
No results found