mendelevium · August 29, 2015 14:14 · Jan 28, 2015 · Nov 12, 2014 · Nov 12, 2014 · Aug 13, 2014
diff --git a/r_tricks.r b/r_tricks.r
@@ -1,6 +1,6 @@
 # Run r script
 Rscript myscript.R  # from command line
-souce('myscript.R')  # within R
+source('myscript.R')  # within R
 
 ### Use getopt to parse arguments!!!
 library('getopt')

diff --git a/r_tricks.r b/r_tricks.r
@@ -46,6 +46,9 @@ merged_df <- merge(df1, df2,
 # get attributes from certain objects
 attr(myObj, "attrname")
 
+# convert integer factor into numeric
+myNumeric <- as.numeric(as.character(myFactor))
+
 # get/set the names of a data.frame
 colnames(df)  # return column names
 colnames(df) <- c('Col1', ...)  # set column names

diff --git a/r_tricks.r b/r_tricks.r
@@ -42,7 +42,10 @@ dfNorm <- sweep(df, 2, colSums(df), "/")
 merged_df <- merge(df1, df2, 
                    by.x=foo, by.y=bar,
                    all=T)  # without all=T it is an inner join
-
+
+# get attributes from certain objects
+attr(myObj, "attrname")
+
 # get/set the names of a data.frame
 colnames(df)  # return column names
 colnames(df) <- c('Col1', ...)  # set column names

diff --git a/r_tricks.r b/r_tricks.r
@@ -37,6 +37,11 @@ rowSums(df)  # sum by rows
 
 # divide columns by column sums
 dfNorm <- sweep(df, 2, colSums(df), "/")
+
+# merge to data frames (similar to pandas)
+merged_df <- merge(df1, df2, 
+                   by.x=foo, by.y=bar,
+                   all=T)  # without all=T it is an inner join
 
 # get/set the names of a data.frame
 colnames(df)  # return column names

diff --git a/r_tricks.r b/r_tricks.r
@@ -23,15 +23,16 @@ if ( !is.null(opt$help) ) {
 }
 ### end getopt parsing
 
-# getopt package will parse command line arguments
-
 # the "las" argument in plotting can adjust axis labels
 
 # simple reading of data frame
 df <- read.delim('tab_delim_file.txt',  # file to read
                  sep='\t',  # tab delimiter
                  row.names=1)  # use first row as name for rows
-
+
+# simple stat info
+sd(df)  # standard deviation
+
 rowSums(df)  # sum by rows
 
 # divide columns by column sums

diff --git a/r_tricks.r b/r_tricks.r
@@ -6,6 +6,9 @@ souce('myscript.R')  # within R
 library('getopt')
 
 # set up command line arguments
+# 0: no argument
+# 1: required argument
+# 2: optional argument
 spec = matrix(c(
    'input', 'i', 1, "character",
    'outPrefix', 'o', 1, "character",

diff --git a/r_tricks.r b/r_tricks.r
@@ -2,6 +2,24 @@
 Rscript myscript.R  # from command line
 souce('myscript.R')  # within R
 
+### Use getopt to parse arguments!!!
+library('getopt')
+
+# set up command line arguments
+spec = matrix(c(
+   'input', 'i', 1, "character",
+   'outPrefix', 'o', 1, "character",
+   'help', 'h', 0, "logical"
+), byrow=TRUE, ncol=4);
+opt = getopt(spec);
+
+# print out help msg
+if ( !is.null(opt$help) ) {
+    cat(getopt(spec, usage=TRUE));
+    q(status=1);
+}
+### end getopt parsing
+
 # getopt package will parse command line arguments
 
 # the "las" argument in plotting can adjust axis labels

diff --git a/r_tricks.r b/r_tricks.r
@@ -42,6 +42,9 @@ names(myobj)
 # print data types of dataframe
 str(df)
 
+# concatenate two strings together
+paste('Hello ', 'world', sep='')
+
 # merge data frames by rows/cols
 rbind  # row wise
 cbind  # column wise

diff --git a/r_tricks.r b/r_tricks.r
@@ -50,6 +50,9 @@ cbind  # column wise
 df.sub <- subset(df, Column1<13)
 df.sub <- df[df$Column1<13,]
 
+# reorder factors for plotting
+bymedian <- with(InsectSprays, reorder(spray, count, FUN=median))
+
 # make a vector of NA's
 na_vec <- rep(NA, 10)
 

diff --git a/r_tricks.r b/r_tricks.r
@@ -4,6 +4,8 @@ souce('myscript.R')  # within R
 
 # getopt package will parse command line arguments
 
+# the "las" argument in plotting can adjust axis labels
+
 # simple reading of data frame
 df <- read.delim('tab_delim_file.txt',  # file to read
                  sep='\t',  # tab delimiter

diff --git a/r_tricks.r b/r_tricks.r
@@ -48,6 +48,9 @@ cbind  # column wise
 df.sub <- subset(df, Column1<13)
 df.sub <- df[df$Column1<13,]
 
+# make a vector of NA's
+na_vec <- rep(NA, 10)
+
 # sampling
 sample(10)  # random permutation of 1..10
 sample(c(1, 7, 3, 9), 5, replace=TRUE)  # sample with replacement 5 elements

diff --git a/r_tricks.r b/r_tricks.r
@@ -28,7 +28,7 @@ mygenes <- mygenes[,1]  # select the only column in the file
 df[mygenes,]
 
 # select rows by column value
-df[df$total > 10,]
+df[df$total>10,]
 
 # check if an element is in a vector
 'b' %in% myvector
@@ -48,6 +48,11 @@ cbind  # column wise
 df.sub <- subset(df, Column1<13)
 df.sub <- df[df$Column1<13,]
 
+# sampling
+sample(10)  # random permutation of 1..10
+sample(c(1, 7, 3, 9), 5, replace=TRUE)  # sample with replacement 5 elements
+df <- df[sample(nrow(df)),]  # randomly shuffle order of rows
+
 # KDE
 density(df$Column1)
 

diff --git a/r_tricks.r b/r_tricks.r
@@ -11,6 +11,9 @@ df <- read.delim('tab_delim_file.txt',  # file to read
 
 rowSums(df)  # sum by rows
 
+# divide columns by column sums
+dfNorm <- sweep(df, 2, colSums(df), "/")
+
 # get/set the names of a data.frame
 colnames(df)  # return column names
 colnames(df) <- c('Col1', ...)  # set column names

diff --git a/r_tricks.r b/r_tricks.r
@@ -51,4 +51,42 @@ density(df$Column1)
 # save plots
 png()  # pdf(), jpeg(), etc
 hist(df$Column1)
-dev.off()
+dev.off()
+
+# simple R ML
+data(iris)
+# which function
+testidx <- which(1:length(iris[,1])%%5==0)
+
+# grab everything not in ids
+iristrain<-iris[-testidx,]
+
+# naive bayes
+nbmodel<-NaiveBayes(Species~., data=iristrain)
+prediction<-predict(nbmodel, iristest[,-5])
+attributes(prediction)  # check attributes of object
+table(prediction$class, iristest[,5])  # get confusion matrix
+
+# decision tree
+library(rpart)
+treemodel<-rpart(Species~., data=iristrain)
+plot(treemodel)  # plot decision tree
+text(treemodel, use.n=T, cex=.6)  # add texts for decision tree
+prediction<-predict(treemodel, newdata=iristest, type='class')
+table(prediction, iristest$Species)  # print confusion matrix
+
+#svm
+library(e1071)
+svmpred<-svm(Species~., data=iristrain)
+prediction<-predict(svmpred, iristest)
+table(prediction, iristest$Species)
+plot(svmpred, iris, Petal.Width~Petal.Length, slice=list(Sepal.Width=3, Sepal.Length=4))
+tune<-tune.svm(Species~., data=iristrain, gamma=10^(-5:0), cost=10^(0:5))
+summary(tune)  # get best parameters
+
+# roc curve
+nbmodel<-NaiveBayes(Species~., data=iristrain)
+prediction<-predict(nbmodel, iristest[,-5])
+score<-prediction$poserior[,c("verginica")]
+score<-nbprediction$poserior[,c("virginica")]
+actual_class<-iristest$Species=='virginica'
diff --git a/r_tricks.r b/r_tricks.r
@@ -1,3 +1,9 @@
+# Run r script
+Rscript myscript.R  # from command line
+souce('myscript.R')  # within R
+
+# getopt package will parse command line arguments
+
 # simple reading of data frame
 df <- read.delim('tab_delim_file.txt',  # file to read
                  sep='\t',  # tab delimiter

diff --git a/r_tricks.r b/r_tricks.r
@@ -29,4 +29,20 @@ df$col1 %in% myvector  # returns a boolean vector
 names(myobj)
 
 # print data types of dataframe
-str(df)
+str(df)
+
+# merge data frames by rows/cols
+rbind  # row wise
+cbind  # column wise
+
+# subsets of data
+df.sub <- subset(df, Column1<13)
+df.sub <- df[df$Column1<13,]
+
+# KDE
+density(df$Column1)
+
+# save plots
+png()  # pdf(), jpeg(), etc
+hist(df$Column1)
+dev.off()
diff --git a/r_tricks.r b/r_tricks.r
@@ -18,6 +18,9 @@ mygenes <- mygenes[,1]  # select the only column in the file
 # select rows by rownames
 df[mygenes,]
 
+# select rows by column value
+df[df$total > 10,]
+
 # check if an element is in a vector
 'b' %in% myvector
 df$col1 %in% myvector  # returns a boolean vector

diff --git a/r_tricks.r b/r_tricks.r
@@ -23,4 +23,7 @@ df[mygenes,]
 df$col1 %in% myvector  # returns a boolean vector
 
 # get name of an object
-names(myobj)
+names(myobj)
+
+# print data types of dataframe
+str(df)
diff --git a/r_tricks.r b/r_tricks.r
@@ -20,4 +20,7 @@ df[mygenes,]
 
 # check if an element is in a vector
 'b' %in% myvector
-df$col1 %in% myvector  # returns a boolean vector
+df$col1 %in% myvector  # returns a boolean vector
+
+# get name of an object
+names(myobj)
diff --git a/r_tricks.r b/r_tricks.r
@@ -16,4 +16,8 @@ mygenes <- read.table("single_column.txt", header=FALSE)
 mygenes <- mygenes[,1]  # select the only column in the file
 
 # select rows by rownames
-df[mygenes,]
+df[mygenes,]
+
+# check if an element is in a vector
+'b' %in% myvector
+df$col1 %in% myvector  # returns a boolean vector
diff --git a/r_tricks.r b/r_tricks.r
@@ -9,4 +9,11 @@ rowSums(df)  # sum by rows
 colnames(df)  # return column names
 colnames(df) <- c('Col1', ...)  # set column names
 rownames(df)  # return row names
-rownames(df) <- myVar  # set row names
+rownames(df) <- myVar  # set row names
+
+# read a simple list of genes
+mygenes <- read.table("single_column.txt", header=FALSE)
+mygenes <- mygenes[,1]  # select the only column in the file
+
+# select rows by rownames
+df[mygenes,]
diff --git a/r_tricks.r b/r_tricks.r
@@ -3,4 +3,10 @@ df <- read.delim('tab_delim_file.txt',  # file to read
                  sep='\t',  # tab delimiter
                  row.names=1)  # use first row as name for rows
 
-rowSums(df)  # sum by rows
+rowSums(df)  # sum by rows
+
+# get/set the names of a data.frame
+colnames(df)  # return column names
+colnames(df) <- c('Col1', ...)  # set column names
+rownames(df)  # return row names
+rownames(df) <- myVar  # set row names
diff --git a/r_tricks.r b/r_tricks.r
@@ -0,0 +1,6 @@
+# simple reading of data frame
+df <- read.delim('tab_delim_file.txt',  # file to read
+                 sep='\t',  # tab delimiter
+                 row.names=1)  # use first row as name for rows
+
+rowSums(df)  # sum by rows
No results found