# Run r script Rscript myscript.R # from command line source('myscript.R') # within R ### Use getopt to parse arguments!!! library('getopt') # set up command line arguments # 0: no argument # 1: required argument # 2: optional argument spec = matrix(c( 'input', 'i', 1, "character", 'outPrefix', 'o', 1, "character", 'help', 'h', 0, "logical" ), byrow=TRUE, ncol=4); opt = getopt(spec); # print out help msg if ( !is.null(opt$help) ) { cat(getopt(spec, usage=TRUE)); q(status=1); } ### end getopt parsing # the "las" argument in plotting can adjust axis labels # simple reading of data frame df <- read.delim('tab_delim_file.txt', # file to read sep='\t', # tab delimiter row.names=1) # use first row as name for rows # simple stat info sd(df) # standard deviation rowSums(df) # sum by rows # divide columns by column sums dfNorm <- sweep(df, 2, colSums(df), "/") # merge to data frames (similar to pandas) merged_df <- merge(df1, df2, by.x=foo, by.y=bar, all=T) # without all=T it is an inner join # get attributes from certain objects attr(myObj, "attrname") # convert integer factor into numeric myNumeric <- as.numeric(as.character(myFactor)) # get/set the names of a data.frame colnames(df) # return column names colnames(df) <- c('Col1', ...) # set column names rownames(df) # return row names rownames(df) <- myVar # set row names # read a simple list of genes mygenes <- read.table("single_column.txt", header=FALSE) mygenes <- mygenes[,1] # select the only column in the file # select rows by rownames df[mygenes,] # select rows by column value df[df$total>10,] # check if an element is in a vector 'b' %in% myvector df$col1 %in% myvector # returns a boolean vector # get name of an object names(myobj) # print data types of dataframe str(df) # concatenate two strings together paste('Hello ', 'world', sep='') # merge data frames by rows/cols rbind # row wise cbind # column wise # subsets of data df.sub <- subset(df, Column1<13) df.sub <- df[df$Column1<13,] # reorder factors for plotting bymedian <- with(InsectSprays, reorder(spray, count, FUN=median)) # make a vector of NA's na_vec <- rep(NA, 10) # sampling sample(10) # random permutation of 1..10 sample(c(1, 7, 3, 9), 5, replace=TRUE) # sample with replacement 5 elements df <- df[sample(nrow(df)),] # randomly shuffle order of rows # KDE density(df$Column1) # save plots png() # pdf(), jpeg(), etc hist(df$Column1) dev.off() # simple R ML data(iris) # which function testidx <- which(1:length(iris[,1])%%5==0) # grab everything not in ids iristrain<-iris[-testidx,] # naive bayes nbmodel<-NaiveBayes(Species~., data=iristrain) prediction<-predict(nbmodel, iristest[,-5]) attributes(prediction) # check attributes of object table(prediction$class, iristest[,5]) # get confusion matrix # decision tree library(rpart) treemodel<-rpart(Species~., data=iristrain) plot(treemodel) # plot decision tree text(treemodel, use.n=T, cex=.6) # add texts for decision tree prediction<-predict(treemodel, newdata=iristest, type='class') table(prediction, iristest$Species) # print confusion matrix #svm library(e1071) svmpred<-svm(Species~., data=iristrain) prediction<-predict(svmpred, iristest) table(prediction, iristest$Species) plot(svmpred, iris, Petal.Width~Petal.Length, slice=list(Sepal.Width=3, Sepal.Length=4)) tune<-tune.svm(Species~., data=iristrain, gamma=10^(-5:0), cost=10^(0:5)) summary(tune) # get best parameters # roc curve nbmodel<-NaiveBayes(Species~., data=iristrain) prediction<-predict(nbmodel, iristest[,-5]) score<-prediction$poserior[,c("verginica")] score<-nbprediction$poserior[,c("virginica")] actual_class<-iristest$Species=='virginica'