-
-
Save mendelevium/81a33c6089560963b82c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Run r script | |
| Rscript myscript.R # from command line | |
| source('myscript.R') # within R | |
| ### Use getopt to parse arguments!!! | |
| library('getopt') | |
| # set up command line arguments | |
| # 0: no argument | |
| # 1: required argument | |
| # 2: optional argument | |
| spec = matrix(c( | |
| 'input', 'i', 1, "character", | |
| 'outPrefix', 'o', 1, "character", | |
| 'help', 'h', 0, "logical" | |
| ), byrow=TRUE, ncol=4); | |
| opt = getopt(spec); | |
| # print out help msg | |
| if ( !is.null(opt$help) ) { | |
| cat(getopt(spec, usage=TRUE)); | |
| q(status=1); | |
| } | |
| ### end getopt parsing | |
| # the "las" argument in plotting can adjust axis labels | |
| # simple reading of data frame | |
| df <- read.delim('tab_delim_file.txt', # file to read | |
| sep='\t', # tab delimiter | |
| row.names=1) # use first row as name for rows | |
| # simple stat info | |
| sd(df) # standard deviation | |
| rowSums(df) # sum by rows | |
| # divide columns by column sums | |
| dfNorm <- sweep(df, 2, colSums(df), "/") | |
| # merge to data frames (similar to pandas) | |
| merged_df <- merge(df1, df2, | |
| by.x=foo, by.y=bar, | |
| all=T) # without all=T it is an inner join | |
| # get attributes from certain objects | |
| attr(myObj, "attrname") | |
| # convert integer factor into numeric | |
| myNumeric <- as.numeric(as.character(myFactor)) | |
| # get/set the names of a data.frame | |
| colnames(df) # return column names | |
| colnames(df) <- c('Col1', ...) # set column names | |
| rownames(df) # return row names | |
| rownames(df) <- myVar # set row names | |
| # read a simple list of genes | |
| mygenes <- read.table("single_column.txt", header=FALSE) | |
| mygenes <- mygenes[,1] # select the only column in the file | |
| # select rows by rownames | |
| df[mygenes,] | |
| # select rows by column value | |
| df[df$total>10,] | |
| # check if an element is in a vector | |
| 'b' %in% myvector | |
| df$col1 %in% myvector # returns a boolean vector | |
| # get name of an object | |
| names(myobj) | |
| # print data types of dataframe | |
| str(df) | |
| # concatenate two strings together | |
| paste('Hello ', 'world', sep='') | |
| # merge data frames by rows/cols | |
| rbind # row wise | |
| cbind # column wise | |
| # subsets of data | |
| df.sub <- subset(df, Column1<13) | |
| df.sub <- df[df$Column1<13,] | |
| # reorder factors for plotting | |
| bymedian <- with(InsectSprays, reorder(spray, count, FUN=median)) | |
| # make a vector of NA's | |
| na_vec <- rep(NA, 10) | |
| # sampling | |
| sample(10) # random permutation of 1..10 | |
| sample(c(1, 7, 3, 9), 5, replace=TRUE) # sample with replacement 5 elements | |
| df <- df[sample(nrow(df)),] # randomly shuffle order of rows | |
| # KDE | |
| density(df$Column1) | |
| # save plots | |
| png() # pdf(), jpeg(), etc | |
| hist(df$Column1) | |
| dev.off() | |
| # simple R ML | |
| data(iris) | |
| # which function | |
| testidx <- which(1:length(iris[,1])%%5==0) | |
| # grab everything not in ids | |
| iristrain<-iris[-testidx,] | |
| # naive bayes | |
| nbmodel<-NaiveBayes(Species~., data=iristrain) | |
| prediction<-predict(nbmodel, iristest[,-5]) | |
| attributes(prediction) # check attributes of object | |
| table(prediction$class, iristest[,5]) # get confusion matrix | |
| # decision tree | |
| library(rpart) | |
| treemodel<-rpart(Species~., data=iristrain) | |
| plot(treemodel) # plot decision tree | |
| text(treemodel, use.n=T, cex=.6) # add texts for decision tree | |
| prediction<-predict(treemodel, newdata=iristest, type='class') | |
| table(prediction, iristest$Species) # print confusion matrix | |
| #svm | |
| library(e1071) | |
| svmpred<-svm(Species~., data=iristrain) | |
| prediction<-predict(svmpred, iristest) | |
| table(prediction, iristest$Species) | |
| plot(svmpred, iris, Petal.Width~Petal.Length, slice=list(Sepal.Width=3, Sepal.Length=4)) | |
| tune<-tune.svm(Species~., data=iristrain, gamma=10^(-5:0), cost=10^(0:5)) | |
| summary(tune) # get best parameters | |
| # roc curve | |
| nbmodel<-NaiveBayes(Species~., data=iristrain) | |
| prediction<-predict(nbmodel, iristest[,-5]) | |
| score<-prediction$poserior[,c("verginica")] | |
| score<-nbprediction$poserior[,c("virginica")] | |
| actual_class<-iristest$Species=='virginica' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment