# Run r script
Rscript myscript.R  # from command line
source('myscript.R')  # within R

### Use getopt to parse arguments!!!
library('getopt')

# set up command line arguments
# 0: no argument
# 1: required argument
# 2: optional argument
spec = matrix(c(
   'input', 'i', 1, "character",
   'outPrefix', 'o', 1, "character",
   'help', 'h', 0, "logical"
), byrow=TRUE, ncol=4);
opt = getopt(spec);

# print out help msg
if ( !is.null(opt$help) ) {
    cat(getopt(spec, usage=TRUE));
    q(status=1);
}
### end getopt parsing

# the "las" argument in plotting can adjust axis labels

# simple reading of data frame
df <- read.delim('tab_delim_file.txt',  # file to read
                 sep='\t',  # tab delimiter
                 row.names=1)  # use first row as name for rows

# simple stat info
sd(df)  # standard deviation

rowSums(df)  # sum by rows

# divide columns by column sums
dfNorm <- sweep(df, 2, colSums(df), "/")

# merge to data frames (similar to pandas)
merged_df <- merge(df1, df2, 
                   by.x=foo, by.y=bar,
                   all=T)  # without all=T it is an inner join

# get attributes from certain objects
attr(myObj, "attrname")

# convert integer factor into numeric
myNumeric <- as.numeric(as.character(myFactor))

# get/set the names of a data.frame
colnames(df)  # return column names
colnames(df) <- c('Col1', ...)  # set column names
rownames(df)  # return row names
rownames(df) <- myVar  # set row names

# read a simple list of genes
mygenes <- read.table("single_column.txt", header=FALSE)
mygenes <- mygenes[,1]  # select the only column in the file

# select rows by rownames
df[mygenes,]

# select rows by column value
df[df$total>10,]

# check if an element is in a vector
'b' %in% myvector
df$col1 %in% myvector  # returns a boolean vector

# get name of an object
names(myobj)

# print data types of dataframe
str(df)

# concatenate two strings together
paste('Hello ', 'world', sep='')

# merge data frames by rows/cols
rbind  # row wise
cbind  # column wise

# subsets of data
df.sub <- subset(df, Column1<13)
df.sub <- df[df$Column1<13,]

# reorder factors for plotting
bymedian <- with(InsectSprays, reorder(spray, count, FUN=median))

# make a vector of NA's
na_vec <- rep(NA, 10)

# sampling
sample(10)  # random permutation of 1..10
sample(c(1, 7, 3, 9), 5, replace=TRUE)  # sample with replacement 5 elements
df <- df[sample(nrow(df)),]  # randomly shuffle order of rows

# KDE
density(df$Column1)

# save plots
png()  # pdf(), jpeg(), etc
hist(df$Column1)
dev.off()

# simple R ML
data(iris)
# which function
testidx <- which(1:length(iris[,1])%%5==0)

# grab everything not in ids
iristrain<-iris[-testidx,]

# naive bayes
nbmodel<-NaiveBayes(Species~., data=iristrain)
prediction<-predict(nbmodel, iristest[,-5])
attributes(prediction)  # check attributes of object
table(prediction$class, iristest[,5])  # get confusion matrix

# decision tree
library(rpart)
treemodel<-rpart(Species~., data=iristrain)
plot(treemodel)  # plot decision tree
text(treemodel, use.n=T, cex=.6)  # add texts for decision tree
prediction<-predict(treemodel, newdata=iristest, type='class')
table(prediction, iristest$Species)  # print confusion matrix

#svm
library(e1071)
svmpred<-svm(Species~., data=iristrain)
prediction<-predict(svmpred, iristest)
table(prediction, iristest$Species)
plot(svmpred, iris, Petal.Width~Petal.Length, slice=list(Sepal.Width=3, Sepal.Length=4))
tune<-tune.svm(Species~., data=iristrain, gamma=10^(-5:0), cost=10^(0:5))
summary(tune)  # get best parameters

# roc curve
nbmodel<-NaiveBayes(Species~., data=iristrain)
prediction<-predict(nbmodel, iristest[,-5])
score<-prediction$poserior[,c("verginica")]
score<-nbprediction$poserior[,c("virginica")]
actual_class<-iristest$Species=='virginica'