Skip to content

Instantly share code, notes, and snippets.

@AmyOlex
Created September 25, 2017 17:27
Show Gist options
  • Save AmyOlex/b764c2983fc289db5cd0a5fcd079e27e to your computer and use it in GitHub Desktop.
Save AmyOlex/b764c2983fc289db5cd0a5fcd079e27e to your computer and use it in GitHub Desktop.
Code to identify the PAM50 subtypes samples using TCGABiolinks.
## Instructions for working with subtypes in TCGABiolinks: https://www.bioconductor.org/packages/devel/bioc/vignettes/TCGAbiolinks/inst/doc/tcgaBiolinks.html#tcgaquery_subtype:_working_with_molecular_subtypes_data
## Note: the subtypes are from 2012 paper, so do not include ALL TCGA samples.
## https://support.bioconductor.org/p/91855/
source("http://www.bioconductor.org/biocLite.R")
library(TCGAbiolinks)
#### RNASeq Gene Expression
## query.gene <- GDCquery(project="TCGA-BRCA", sample.type="Primary solid Tumor", data.category="Transcriptome Profiling", data.type="Gene Expression Quantification", workflow.type="HTSeq - Counts")
## GDCdownload(query.gene)
## data.gene <- GDCprepare(query.gene, summarizedExperiment = FALSE)
#### DNA Methylation
query.met <- GDCquery(project = c("TCGA-BRCA"),legacy = TRUE,data.category = "DNA methylation",platform = c("Illumina Human Methylation 450", "Illumina Human Methylation 27"))
GDCdownload(query.met)
data.met <- GDCprepare(query.met, summarizedExperiment = FALSE)
BRCA_path_subtypes <- TCGAquery_subtype(tumor = "brca")
metData.df <- data.met[,-(1:3)]
sort(names(metData.df))
tumors <- which(substr(names(metData.df),start=14, stop=15)=="01")
metData.tumors <- metData.df[,tumors]
subtyped = which(substr(names(metData.tumors),start=1,stop=12) %in% BRCA_path_subtypes$patient)
metData.subtyped <- metData.tumors[,subtyped]
pam50 <- BRCA_path_subtypes[!(is.na(BRCA_path_subtypes$PAM50.mRNA)),c("patient", "PAM50.mRNA")]
my_names <- as.data.frame(names(metData.subtyped))
names(my_names) <- c("barcode")
my_names$patient <- substr(my_names$barcode,start=1,stop=12)
metadata <- merge(my_names, pam50, by.x="patient", by.y="patient", all.x = FALSE, all.y=FALSE, sort=FALSE)
row.names(metadata) <- metadata$barcode
metData.subtyped2 <- metData.subtyped[,row.names(metadata)]
metData.subtyped3 <- metData.subtyped2[which(!is.na(rowSums(metData.subtyped2))),]
metData.subset <- as.data.frame(t(metData.subtyped3[c("cg07054526","cg19531713","cg19088651"),]))
metData.subset$pam50 <- metadata$PAM50.mRNA
boxplot(cg07054526~pam50, metData.subset, main="cg07054526", col=c("red","magenta","purple2","skyblue","darkgreen"))
boxplot(cg19531713~pam50, metData.subset, main="cg19531713", col=c("red","magenta","purple2","skyblue","darkgreen"))
boxplot(cg19088651~pam50, metData.subset, main="cg19088651", col=c("red","magenta","purple2","skyblue","darkgreen"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment