Skip to content

Instantly share code, notes, and snippets.

@mendelevium
Forked from ctokheim/r_tricks.r
Last active August 29, 2015 14:14
Show Gist options
  • Select an option

  • Save mendelevium/81a33c6089560963b82c to your computer and use it in GitHub Desktop.

Select an option

Save mendelevium/81a33c6089560963b82c to your computer and use it in GitHub Desktop.

Revisions

  1. mendelevium revised this gist Jan 28, 2015. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -1,6 +1,6 @@
    # Run r script
    Rscript myscript.R # from command line
    souce('myscript.R') # within R
    source('myscript.R') # within R

    ### Use getopt to parse arguments!!!
    library('getopt')
  2. @ctokheim ctokheim revised this gist Nov 12, 2014. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -46,6 +46,9 @@ merged_df <- merge(df1, df2,
    # get attributes from certain objects
    attr(myObj, "attrname")

    # convert integer factor into numeric
    myNumeric <- as.numeric(as.character(myFactor))

    # get/set the names of a data.frame
    colnames(df) # return column names
    colnames(df) <- c('Col1', ...) # set column names
  3. @ctokheim ctokheim revised this gist Nov 12, 2014. 1 changed file with 4 additions and 1 deletion.
    5 changes: 4 additions & 1 deletion r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -42,7 +42,10 @@ dfNorm <- sweep(df, 2, colSums(df), "/")
    merged_df <- merge(df1, df2,
    by.x=foo, by.y=bar,
    all=T) # without all=T it is an inner join


    # get attributes from certain objects
    attr(myObj, "attrname")

    # get/set the names of a data.frame
    colnames(df) # return column names
    colnames(df) <- c('Col1', ...) # set column names
  4. @ctokheim ctokheim revised this gist Aug 13, 2014. 1 changed file with 5 additions and 0 deletions.
    5 changes: 5 additions & 0 deletions r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -37,6 +37,11 @@ rowSums(df) # sum by rows

    # divide columns by column sums
    dfNorm <- sweep(df, 2, colSums(df), "/")

    # merge to data frames (similar to pandas)
    merged_df <- merge(df1, df2,
    by.x=foo, by.y=bar,
    all=T) # without all=T it is an inner join

    # get/set the names of a data.frame
    colnames(df) # return column names
  5. @ctokheim ctokheim revised this gist Jun 10, 2014. 1 changed file with 4 additions and 3 deletions.
    7 changes: 4 additions & 3 deletions r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -23,15 +23,16 @@ if ( !is.null(opt$help) ) {
    }
    ### end getopt parsing

    # getopt package will parse command line arguments

    # the "las" argument in plotting can adjust axis labels

    # simple reading of data frame
    df <- read.delim('tab_delim_file.txt', # file to read
    sep='\t', # tab delimiter
    row.names=1) # use first row as name for rows


    # simple stat info
    sd(df) # standard deviation

    rowSums(df) # sum by rows

    # divide columns by column sums
  6. @ctokheim ctokheim revised this gist Jun 9, 2014. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -6,6 +6,9 @@ souce('myscript.R') # within R
    library('getopt')

    # set up command line arguments
    # 0: no argument
    # 1: required argument
    # 2: optional argument
    spec = matrix(c(
    'input', 'i', 1, "character",
    'outPrefix', 'o', 1, "character",
  7. @ctokheim ctokheim revised this gist Jun 9, 2014. 1 changed file with 18 additions and 0 deletions.
    18 changes: 18 additions & 0 deletions r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -2,6 +2,24 @@
    Rscript myscript.R # from command line
    souce('myscript.R') # within R

    ### Use getopt to parse arguments!!!
    library('getopt')

    # set up command line arguments
    spec = matrix(c(
    'input', 'i', 1, "character",
    'outPrefix', 'o', 1, "character",
    'help', 'h', 0, "logical"
    ), byrow=TRUE, ncol=4);
    opt = getopt(spec);

    # print out help msg
    if ( !is.null(opt$help) ) {
    cat(getopt(spec, usage=TRUE));
    q(status=1);
    }
    ### end getopt parsing

    # getopt package will parse command line arguments

    # the "las" argument in plotting can adjust axis labels
  8. @ctokheim ctokheim revised this gist Jun 9, 2014. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -42,6 +42,9 @@ names(myobj)
    # print data types of dataframe
    str(df)

    # concatenate two strings together
    paste('Hello ', 'world', sep='')

    # merge data frames by rows/cols
    rbind # row wise
    cbind # column wise
  9. @ctokheim ctokheim revised this gist Jun 9, 2014. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -50,6 +50,9 @@ cbind # column wise
    df.sub <- subset(df, Column1<13)
    df.sub <- df[df$Column1<13,]

    # reorder factors for plotting
    bymedian <- with(InsectSprays, reorder(spray, count, FUN=median))

    # make a vector of NA's
    na_vec <- rep(NA, 10)

  10. @ctokheim ctokheim revised this gist May 30, 2014. 1 changed file with 2 additions and 0 deletions.
    2 changes: 2 additions & 0 deletions r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -4,6 +4,8 @@ souce('myscript.R') # within R

    # getopt package will parse command line arguments

    # the "las" argument in plotting can adjust axis labels

    # simple reading of data frame
    df <- read.delim('tab_delim_file.txt', # file to read
    sep='\t', # tab delimiter
  11. @ctokheim ctokheim revised this gist May 29, 2014. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -48,6 +48,9 @@ cbind # column wise
    df.sub <- subset(df, Column1<13)
    df.sub <- df[df$Column1<13,]

    # make a vector of NA's
    na_vec <- rep(NA, 10)

    # sampling
    sample(10) # random permutation of 1..10
    sample(c(1, 7, 3, 9), 5, replace=TRUE) # sample with replacement 5 elements
  12. @ctokheim ctokheim revised this gist May 29, 2014. 1 changed file with 6 additions and 1 deletion.
    7 changes: 6 additions & 1 deletion r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -28,7 +28,7 @@ mygenes <- mygenes[,1] # select the only column in the file
    df[mygenes,]

    # select rows by column value
    df[df$total > 10,]
    df[df$total>10,]

    # check if an element is in a vector
    'b' %in% myvector
    @@ -48,6 +48,11 @@ cbind # column wise
    df.sub <- subset(df, Column1<13)
    df.sub <- df[df$Column1<13,]

    # sampling
    sample(10) # random permutation of 1..10
    sample(c(1, 7, 3, 9), 5, replace=TRUE) # sample with replacement 5 elements
    df <- df[sample(nrow(df)),] # randomly shuffle order of rows

    # KDE
    density(df$Column1)

  13. @ctokheim ctokheim revised this gist Apr 24, 2014. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -11,6 +11,9 @@ df <- read.delim('tab_delim_file.txt', # file to read

    rowSums(df) # sum by rows

    # divide columns by column sums
    dfNorm <- sweep(df, 2, colSums(df), "/")

    # get/set the names of a data.frame
    colnames(df) # return column names
    colnames(df) <- c('Col1', ...) # set column names
  14. @ctokheim ctokheim revised this gist Mar 26, 2014. 1 changed file with 39 additions and 1 deletion.
    40 changes: 39 additions & 1 deletion r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -51,4 +51,42 @@ density(df$Column1)
    # save plots
    png() # pdf(), jpeg(), etc
    hist(df$Column1)
    dev.off()
    dev.off()

    # simple R ML
    data(iris)
    # which function
    testidx <- which(1:length(iris[,1])%%5==0)

    # grab everything not in ids
    iristrain<-iris[-testidx,]

    # naive bayes
    nbmodel<-NaiveBayes(Species~., data=iristrain)
    prediction<-predict(nbmodel, iristest[,-5])
    attributes(prediction) # check attributes of object
    table(prediction$class, iristest[,5]) # get confusion matrix

    # decision tree
    library(rpart)
    treemodel<-rpart(Species~., data=iristrain)
    plot(treemodel) # plot decision tree
    text(treemodel, use.n=T, cex=.6) # add texts for decision tree
    prediction<-predict(treemodel, newdata=iristest, type='class')
    table(prediction, iristest$Species) # print confusion matrix

    #svm
    library(e1071)
    svmpred<-svm(Species~., data=iristrain)
    prediction<-predict(svmpred, iristest)
    table(prediction, iristest$Species)
    plot(svmpred, iris, Petal.Width~Petal.Length, slice=list(Sepal.Width=3, Sepal.Length=4))
    tune<-tune.svm(Species~., data=iristrain, gamma=10^(-5:0), cost=10^(0:5))
    summary(tune) # get best parameters

    # roc curve
    nbmodel<-NaiveBayes(Species~., data=iristrain)
    prediction<-predict(nbmodel, iristest[,-5])
    score<-prediction$poserior[,c("verginica")]
    score<-nbprediction$poserior[,c("virginica")]
    actual_class<-iristest$Species=='virginica'
  15. @ctokheim ctokheim revised this gist Feb 26, 2014. 1 changed file with 6 additions and 0 deletions.
    6 changes: 6 additions & 0 deletions r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,9 @@
    # Run r script
    Rscript myscript.R # from command line
    souce('myscript.R') # within R

    # getopt package will parse command line arguments

    # simple reading of data frame
    df <- read.delim('tab_delim_file.txt', # file to read
    sep='\t', # tab delimiter
  16. @ctokheim ctokheim revised this gist Feb 26, 2014. 1 changed file with 17 additions and 1 deletion.
    18 changes: 17 additions & 1 deletion r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -29,4 +29,20 @@ df$col1 %in% myvector # returns a boolean vector
    names(myobj)

    # print data types of dataframe
    str(df)
    str(df)

    # merge data frames by rows/cols
    rbind # row wise
    cbind # column wise

    # subsets of data
    df.sub <- subset(df, Column1<13)
    df.sub <- df[df$Column1<13,]

    # KDE
    density(df$Column1)

    # save plots
    png() # pdf(), jpeg(), etc
    hist(df$Column1)
    dev.off()
  17. @ctokheim ctokheim revised this gist Dec 22, 2013. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -18,6 +18,9 @@ mygenes <- mygenes[,1] # select the only column in the file
    # select rows by rownames
    df[mygenes,]

    # select rows by column value
    df[df$total > 10,]

    # check if an element is in a vector
    'b' %in% myvector
    df$col1 %in% myvector # returns a boolean vector
  18. @ctokheim ctokheim revised this gist Dec 13, 2013. 1 changed file with 4 additions and 1 deletion.
    5 changes: 4 additions & 1 deletion r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -23,4 +23,7 @@ df[mygenes,]
    df$col1 %in% myvector # returns a boolean vector

    # get name of an object
    names(myobj)
    names(myobj)

    # print data types of dataframe
    str(df)
  19. @ctokheim ctokheim revised this gist Dec 11, 2013. 1 changed file with 4 additions and 1 deletion.
    5 changes: 4 additions & 1 deletion r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -20,4 +20,7 @@ df[mygenes,]

    # check if an element is in a vector
    'b' %in% myvector
    df$col1 %in% myvector # returns a boolean vector
    df$col1 %in% myvector # returns a boolean vector

    # get name of an object
    names(myobj)
  20. @ctokheim ctokheim revised this gist Dec 11, 2013. 1 changed file with 5 additions and 1 deletion.
    6 changes: 5 additions & 1 deletion r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -16,4 +16,8 @@ mygenes <- read.table("single_column.txt", header=FALSE)
    mygenes <- mygenes[,1] # select the only column in the file

    # select rows by rownames
    df[mygenes,]
    df[mygenes,]

    # check if an element is in a vector
    'b' %in% myvector
    df$col1 %in% myvector # returns a boolean vector
  21. @ctokheim ctokheim revised this gist Dec 11, 2013. 1 changed file with 8 additions and 1 deletion.
    9 changes: 8 additions & 1 deletion r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -9,4 +9,11 @@ rowSums(df) # sum by rows
    colnames(df) # return column names
    colnames(df) <- c('Col1', ...) # set column names
    rownames(df) # return row names
    rownames(df) <- myVar # set row names
    rownames(df) <- myVar # set row names

    # read a simple list of genes
    mygenes <- read.table("single_column.txt", header=FALSE)
    mygenes <- mygenes[,1] # select the only column in the file

    # select rows by rownames
    df[mygenes,]
  22. @ctokheim ctokheim revised this gist Dec 11, 2013. 1 changed file with 7 additions and 1 deletion.
    8 changes: 7 additions & 1 deletion r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -3,4 +3,10 @@ df <- read.delim('tab_delim_file.txt', # file to read
    sep='\t', # tab delimiter
    row.names=1) # use first row as name for rows

    rowSums(df) # sum by rows
    rowSums(df) # sum by rows

    # get/set the names of a data.frame
    colnames(df) # return column names
    colnames(df) <- c('Col1', ...) # set column names
    rownames(df) # return row names
    rownames(df) <- myVar # set row names
  23. @ctokheim ctokheim created this gist Dec 11, 2013.
    6 changes: 6 additions & 0 deletions r_tricks.r
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,6 @@
    # simple reading of data frame
    df <- read.delim('tab_delim_file.txt', # file to read
    sep='\t', # tab delimiter
    row.names=1) # use first row as name for rows

    rowSums(df) # sum by rows