mendelevium · August 29, 2015 14:14
diff --git a/r_tricks.r b/r_tricks.r
 # Run r script
 Rscript myscript.R  # from command line
 source('myscript.R')  # within R

 ### Use getopt to parse arguments!!!
 library('getopt')

 # set up command line arguments
 # 0: no argument
 # 1: required argument
 # 2: optional argument
 spec = matrix(c(
   'input', 'i', 1, "character",
   'outPrefix', 'o', 1, "character",
   'help', 'h', 0, "logical"
 ), byrow=TRUE, ncol=4);
 opt = getopt(spec);

 # print out help msg
 if ( !is.null(opt$help) ) {
    cat(getopt(spec, usage=TRUE));
    q(status=1);
 }
 ### end getopt parsing

 # the "las" argument in plotting can adjust axis labels

 # simple reading of data frame
 df <- read.delim('tab_delim_file.txt',  # file to read
                 sep='\t',  # tab delimiter
                 row.names=1)  # use first row as name for rows

 # simple stat info
 sd(df)  # standard deviation

 rowSums(df)  # sum by rows

 # divide columns by column sums
 dfNorm <- sweep(df, 2, colSums(df), "/")

 # merge to data frames (similar to pandas)
 merged_df <- merge(df1, df2, 
                   by.x=foo, by.y=bar,
                   all=T)  # without all=T it is an inner join

 # get attributes from certain objects
 attr(myObj, "attrname")

 # convert integer factor into numeric
 myNumeric <- as.numeric(as.character(myFactor))

 # get/set the names of a data.frame
 colnames(df)  # return column names
 colnames(df) <- c('Col1', ...)  # set column names
 rownames(df)  # return row names
 rownames(df) <- myVar  # set row names

 # read a simple list of genes
 mygenes <- read.table("single_column.txt", header=FALSE)
 mygenes <- mygenes[,1]  # select the only column in the file

 # select rows by rownames
 df[mygenes,]

 # select rows by column value
 df[df$total>10,]

 # check if an element is in a vector
 'b' %in% myvector
 df$col1 %in% myvector  # returns a boolean vector

 # get name of an object
 names(myobj)

 # print data types of dataframe
 str(df)

 # concatenate two strings together
 paste('Hello ', 'world', sep='')

 # merge data frames by rows/cols
 rbind  # row wise
 cbind  # column wise

 # subsets of data
 df.sub <- subset(df, Column1<13)
 df.sub <- df[df$Column1<13,]

 # reorder factors for plotting
 bymedian <- with(InsectSprays, reorder(spray, count, FUN=median))

 # make a vector of NA's
 na_vec <- rep(NA, 10)

 # sampling
 sample(10)  # random permutation of 1..10
 sample(c(1, 7, 3, 9), 5, replace=TRUE)  # sample with replacement 5 elements
 df <- df[sample(nrow(df)),]  # randomly shuffle order of rows

 # KDE
 density(df$Column1)

 # save plots
 png()  # pdf(), jpeg(), etc
 hist(df$Column1)
 dev.off()

 # simple R ML
 data(iris)
 # which function
 testidx <- which(1:length(iris[,1])%%5==0)

 # grab everything not in ids
 iristrain<-iris[-testidx,]

 # naive bayes
 nbmodel<-NaiveBayes(Species~., data=iristrain)
 prediction<-predict(nbmodel, iristest[,-5])
 attributes(prediction)  # check attributes of object
 table(prediction$class, iristest[,5])  # get confusion matrix

 # decision tree
 library(rpart)
 treemodel<-rpart(Species~., data=iristrain)
 plot(treemodel)  # plot decision tree
 text(treemodel, use.n=T, cex=.6)  # add texts for decision tree
 prediction<-predict(treemodel, newdata=iristest, type='class')
 table(prediction, iristest$Species)  # print confusion matrix

 #svm
 library(e1071)
 svmpred<-svm(Species~., data=iristrain)
 prediction<-predict(svmpred, iristest)
 table(prediction, iristest$Species)
 plot(svmpred, iris, Petal.Width~Petal.Length, slice=list(Sepal.Width=3, Sepal.Length=4))
 tune<-tune.svm(Species~., data=iristrain, gamma=10^(-5:0), cost=10^(0:5))
 summary(tune)  # get best parameters

 # roc curve
 nbmodel<-NaiveBayes(Species~., data=iristrain)
 prediction<-predict(nbmodel, iristest[,-5])
 score<-prediction$poserior[,c("verginica")]
 score<-nbprediction$poserior[,c("virginica")]
 actual_class<-iristest$Species=='virginica'
	# Run r script
	Rscript myscript.R # from command line
	source('myscript.R') # within R

	### Use getopt to parse arguments!!!
	library('getopt')

	# set up command line arguments
	# 0: no argument
	# 1: required argument
	# 2: optional argument
	spec = matrix(c(
	'input', 'i', 1, "character",
	'outPrefix', 'o', 1, "character",
	'help', 'h', 0, "logical"
	), byrow=TRUE, ncol=4);
	opt = getopt(spec);

	# print out help msg
	if ( !is.null(opt$help) ) {
	cat(getopt(spec, usage=TRUE));
	q(status=1);
	}
	### end getopt parsing

	# the "las" argument in plotting can adjust axis labels

	# simple reading of data frame
	df <- read.delim('tab_delim_file.txt', # file to read
	sep='\t', # tab delimiter
	row.names=1) # use first row as name for rows

	# simple stat info
	sd(df) # standard deviation

	rowSums(df) # sum by rows

	# divide columns by column sums
	dfNorm <- sweep(df, 2, colSums(df), "/")

	# merge to data frames (similar to pandas)
	merged_df <- merge(df1, df2,
	by.x=foo, by.y=bar,
	all=T) # without all=T it is an inner join

	# get attributes from certain objects
	attr(myObj, "attrname")

	# convert integer factor into numeric
	myNumeric <- as.numeric(as.character(myFactor))

	# get/set the names of a data.frame
	colnames(df) # return column names
	colnames(df) <- c('Col1', ...) # set column names
	rownames(df) # return row names
	rownames(df) <- myVar # set row names

	# read a simple list of genes
	mygenes <- read.table("single_column.txt", header=FALSE)
	mygenes <- mygenes[,1] # select the only column in the file

	# select rows by rownames
	df[mygenes,]

	# select rows by column value
	df[df$total>10,]

	# check if an element is in a vector
	'b' %in% myvector
	df$col1 %in% myvector # returns a boolean vector

	# get name of an object
	names(myobj)

	# print data types of dataframe
	str(df)

	# concatenate two strings together
	paste('Hello ', 'world', sep='')

	# merge data frames by rows/cols
	rbind # row wise
	cbind # column wise

	# subsets of data
	df.sub <- subset(df, Column1<13)
	df.sub <- df[df$Column1<13,]

	# reorder factors for plotting
	bymedian <- with(InsectSprays, reorder(spray, count, FUN=median))

	# make a vector of NA's
	na_vec <- rep(NA, 10)

	# sampling
	sample(10) # random permutation of 1..10
	sample(c(1, 7, 3, 9), 5, replace=TRUE) # sample with replacement 5 elements
	df <- df[sample(nrow(df)),] # randomly shuffle order of rows

	# KDE
	density(df$Column1)

	# save plots
	png() # pdf(), jpeg(), etc
	hist(df$Column1)
	dev.off()

	# simple R ML
	data(iris)
	# which function
	testidx <- which(1:length(iris[,1])%%5==0)

	# grab everything not in ids
	iristrain<-iris[-testidx,]

	# naive bayes
	nbmodel<-NaiveBayes(Species~., data=iristrain)
	prediction<-predict(nbmodel, iristest[,-5])
	attributes(prediction) # check attributes of object
	table(prediction$class, iristest[,5]) # get confusion matrix

	# decision tree
	library(rpart)
	treemodel<-rpart(Species~., data=iristrain)
	plot(treemodel) # plot decision tree
	text(treemodel, use.n=T, cex=.6) # add texts for decision tree
	prediction<-predict(treemodel, newdata=iristest, type='class')
	table(prediction, iristest$Species) # print confusion matrix

	#svm
	library(e1071)
	svmpred<-svm(Species~., data=iristrain)
	prediction<-predict(svmpred, iristest)
	table(prediction, iristest$Species)
	plot(svmpred, iris, Petal.Width~Petal.Length, slice=list(Sepal.Width=3, Sepal.Length=4))
	tune<-tune.svm(Species~., data=iristrain, gamma=10^(-5:0), cost=10^(0:5))
	summary(tune) # get best parameters

	# roc curve
	nbmodel<-NaiveBayes(Species~., data=iristrain)
	prediction<-predict(nbmodel, iristest[,-5])
	score<-prediction$poserior[,c("verginica")]
	score<-nbprediction$poserior[,c("virginica")]
	actual_class<-iristest$Species=='virginica'