## Instructions for working with subtypes in TCGABiolinks: https://www.bioconductor.org/packages/devel/bioc/vignettes/TCGAbiolinks/inst/doc/tcgaBiolinks.html#tcgaquery_subtype:_working_with_molecular_subtypes_data ## Note: the subtypes are from 2012 paper, so do not include ALL TCGA samples. ## https://support.bioconductor.org/p/91855/ source("http://www.bioconductor.org/biocLite.R") library(TCGAbiolinks) #### RNASeq Gene Expression ## query.gene <- GDCquery(project="TCGA-BRCA", sample.type="Primary solid Tumor", data.category="Transcriptome Profiling", data.type="Gene Expression Quantification", workflow.type="HTSeq - Counts") ## GDCdownload(query.gene) ## data.gene <- GDCprepare(query.gene, summarizedExperiment = FALSE) #### DNA Methylation query.met <- GDCquery(project = c("TCGA-BRCA"),legacy = TRUE,data.category = "DNA methylation",platform = c("Illumina Human Methylation 450", "Illumina Human Methylation 27")) GDCdownload(query.met) data.met <- GDCprepare(query.met, summarizedExperiment = FALSE) BRCA_path_subtypes <- TCGAquery_subtype(tumor = "brca") metData.df <- data.met[,-(1:3)] sort(names(metData.df)) tumors <- which(substr(names(metData.df),start=14, stop=15)=="01") metData.tumors <- metData.df[,tumors] subtyped = which(substr(names(metData.tumors),start=1,stop=12) %in% BRCA_path_subtypes$patient) metData.subtyped <- metData.tumors[,subtyped] pam50 <- BRCA_path_subtypes[!(is.na(BRCA_path_subtypes$PAM50.mRNA)),c("patient", "PAM50.mRNA")] my_names <- as.data.frame(names(metData.subtyped)) names(my_names) <- c("barcode") my_names$patient <- substr(my_names$barcode,start=1,stop=12) metadata <- merge(my_names, pam50, by.x="patient", by.y="patient", all.x = FALSE, all.y=FALSE, sort=FALSE) row.names(metadata) <- metadata$barcode metData.subtyped2 <- metData.subtyped[,row.names(metadata)] metData.subtyped3 <- metData.subtyped2[which(!is.na(rowSums(metData.subtyped2))),] metData.subset <- as.data.frame(t(metData.subtyped3[c("cg07054526","cg19531713","cg19088651"),])) metData.subset$pam50 <- metadata$PAM50.mRNA boxplot(cg07054526~pam50, metData.subset, main="cg07054526", col=c("red","magenta","purple2","skyblue","darkgreen")) boxplot(cg19531713~pam50, metData.subset, main="cg19531713", col=c("red","magenta","purple2","skyblue","darkgreen")) boxplot(cg19088651~pam50, metData.subset, main="cg19088651", col=c("red","magenta","purple2","skyblue","darkgreen"))