Skip to content

Instantly share code, notes, and snippets.

@foriin
Created December 16, 2019 12:33
Show Gist options
  • Save foriin/2fd062d671eca30becaf2a6442b22e3a to your computer and use it in GitHub Desktop.
Save foriin/2fd062d671eca30becaf2a6442b22e3a to your computer and use it in GitHub Desktop.

Revisions

  1. foriin created this gist Dec 16, 2019.
    71 changes: 71 additions & 0 deletions RNA-seq example
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,71 @@
    library(GenomicRanges)
    library(DESeq2)
    library(data.table)
    library(dplyr)
    library(stringr)
    library(gplots)
    library(tximport)
    library(openxlsx)

    # Genes were taken from flybase I think, selecting only GENEs from full annotation
    # E.g. ftp://ftp.flybase.net/releases/FB2019_05/dmel_r6.30/gff/dmel-all-r6.30.gff.gz
    genes <- fread("~/Database/dmel/Annotations/dmel-gene-r5.57.gff") %>%
    dplyr::select(c(1, 4, 5, 7, 9)) %>%
    setNames(c("chr", "start", "end", "strand", "attr")) %>%
    mutate(id = sub('^ID=(FBgn[0-9]+);.*', '\\1', attr),
    chr = paste0("chr", chr), gene_name = sub('.*Name=([^;]+);.*', '\\1', attr),
    tss = ifelse(strand == "+", start, end)) %>%
    dplyr::select(-attr)

    genes.gr <- GRanges(
    seqnames = Rle(genes$chr),
    ranges = IRanges(
    start = genes$start,
    end = genes$end,
    names = genes$gene_name
    )
    )


    # IMPORT SALMON OUTPUT
    files <- paste0("/home/artem/IMG/Projects/HP1.ovaries/rnaseq/salmon_out/190325defset/170914_HSGA.Shevelev_RNA_2017.",
    c("control1.trim", "control2.trim", "KD1.trim", "KD2.trim"))
    files <- file.path(files, "quant.genes.sf")
    names(files) <- c("WT1", "WT2", "HP1KD1", "HP1KD2")
    txi.hp1 <- tximport(files, "salmon", txOut = T)

    head(txi.hp1$counts)

    # DESEQ2
    samples <- data.frame(row.names = names(files), conditions = c("wt", "wt", "kd", "kd"), orig = "evrogen")

    ddsTxi <- DESeqDataSetFromTximport(txi.hp1, colData = samples, design =~ conditions)

    keep <- rowSums(counts(ddsTxi)) >= 10

    ddsTxi <- ddsTxi[keep,]
    ddsTxi$conditions <- relevel(ddsTxi$conditions, ref = "wt")

    dds <- DESeq(ddsTxi)
    res <- results(dds)

    resultsNames(dds)

    resLFC <- lfcShrink(dds, coef = "conditions_kd_vs_wt", type = "apeglm",
    lfcThreshold = 1)
    pdf("./plots/MA.plot.pdf")
    DESeq2::plotMA(resLFC, alpha = 0.05, ylim=c(-6, 6))
    dev.off()

    summary(res, alpha = 0.05)

    res.df <- as.data.frame(res) %>% mutate(id = rownames(res))
    res.df <- merge(genes, res.df, by = "id", all.y = T)
    write.xlsx(res.df, file = "./tables/DESeq2.hp1kd.results.xlsx", dec = ",")
    # res.df %>% filter(is.na(gene_name)) %>% View

    res.df.tss.gr <- makeGRangesFromDataFrame(res.df %>%
    mutate(start = tss,
    end = tss) %>%
    filter(-tss, padj < 0.05),
    keep.extra.columns = T)