Skip to content

Instantly share code, notes, and snippets.

@mendelevium
Forked from ctokheim/r_tricks.r
Last active August 29, 2015 14:14
Show Gist options
  • Save mendelevium/81a33c6089560963b82c to your computer and use it in GitHub Desktop.
Save mendelevium/81a33c6089560963b82c to your computer and use it in GitHub Desktop.
# Run r script
Rscript myscript.R # from command line
source('myscript.R') # within R
### Use getopt to parse arguments!!!
library('getopt')
# set up command line arguments
# 0: no argument
# 1: required argument
# 2: optional argument
spec = matrix(c(
'input', 'i', 1, "character",
'outPrefix', 'o', 1, "character",
'help', 'h', 0, "logical"
), byrow=TRUE, ncol=4);
opt = getopt(spec);
# print out help msg
if ( !is.null(opt$help) ) {
cat(getopt(spec, usage=TRUE));
q(status=1);
}
### end getopt parsing
# the "las" argument in plotting can adjust axis labels
# simple reading of data frame
df <- read.delim('tab_delim_file.txt', # file to read
sep='\t', # tab delimiter
row.names=1) # use first row as name for rows
# simple stat info
sd(df) # standard deviation
rowSums(df) # sum by rows
# divide columns by column sums
dfNorm <- sweep(df, 2, colSums(df), "/")
# merge to data frames (similar to pandas)
merged_df <- merge(df1, df2,
by.x=foo, by.y=bar,
all=T) # without all=T it is an inner join
# get attributes from certain objects
attr(myObj, "attrname")
# convert integer factor into numeric
myNumeric <- as.numeric(as.character(myFactor))
# get/set the names of a data.frame
colnames(df) # return column names
colnames(df) <- c('Col1', ...) # set column names
rownames(df) # return row names
rownames(df) <- myVar # set row names
# read a simple list of genes
mygenes <- read.table("single_column.txt", header=FALSE)
mygenes <- mygenes[,1] # select the only column in the file
# select rows by rownames
df[mygenes,]
# select rows by column value
df[df$total>10,]
# check if an element is in a vector
'b' %in% myvector
df$col1 %in% myvector # returns a boolean vector
# get name of an object
names(myobj)
# print data types of dataframe
str(df)
# concatenate two strings together
paste('Hello ', 'world', sep='')
# merge data frames by rows/cols
rbind # row wise
cbind # column wise
# subsets of data
df.sub <- subset(df, Column1<13)
df.sub <- df[df$Column1<13,]
# reorder factors for plotting
bymedian <- with(InsectSprays, reorder(spray, count, FUN=median))
# make a vector of NA's
na_vec <- rep(NA, 10)
# sampling
sample(10) # random permutation of 1..10
sample(c(1, 7, 3, 9), 5, replace=TRUE) # sample with replacement 5 elements
df <- df[sample(nrow(df)),] # randomly shuffle order of rows
# KDE
density(df$Column1)
# save plots
png() # pdf(), jpeg(), etc
hist(df$Column1)
dev.off()
# simple R ML
data(iris)
# which function
testidx <- which(1:length(iris[,1])%%5==0)
# grab everything not in ids
iristrain<-iris[-testidx,]
# naive bayes
nbmodel<-NaiveBayes(Species~., data=iristrain)
prediction<-predict(nbmodel, iristest[,-5])
attributes(prediction) # check attributes of object
table(prediction$class, iristest[,5]) # get confusion matrix
# decision tree
library(rpart)
treemodel<-rpart(Species~., data=iristrain)
plot(treemodel) # plot decision tree
text(treemodel, use.n=T, cex=.6) # add texts for decision tree
prediction<-predict(treemodel, newdata=iristest, type='class')
table(prediction, iristest$Species) # print confusion matrix
#svm
library(e1071)
svmpred<-svm(Species~., data=iristrain)
prediction<-predict(svmpred, iristest)
table(prediction, iristest$Species)
plot(svmpred, iris, Petal.Width~Petal.Length, slice=list(Sepal.Width=3, Sepal.Length=4))
tune<-tune.svm(Species~., data=iristrain, gamma=10^(-5:0), cost=10^(0:5))
summary(tune) # get best parameters
# roc curve
nbmodel<-NaiveBayes(Species~., data=iristrain)
prediction<-predict(nbmodel, iristest[,-5])
score<-prediction$poserior[,c("verginica")]
score<-nbprediction$poserior[,c("virginica")]
actual_class<-iristest$Species=='virginica'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment