Created
September 25, 2017 17:27
-
-
Save AmyOlex/b764c2983fc289db5cd0a5fcd079e27e to your computer and use it in GitHub Desktop.
Code to identify the PAM50 subtypes samples using TCGABiolinks.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ## Instructions for working with subtypes in TCGABiolinks: https://www.bioconductor.org/packages/devel/bioc/vignettes/TCGAbiolinks/inst/doc/tcgaBiolinks.html#tcgaquery_subtype:_working_with_molecular_subtypes_data | |
| ## Note: the subtypes are from 2012 paper, so do not include ALL TCGA samples. | |
| ## https://support.bioconductor.org/p/91855/ | |
| source("http://www.bioconductor.org/biocLite.R") | |
| library(TCGAbiolinks) | |
| #### RNASeq Gene Expression | |
| ## query.gene <- GDCquery(project="TCGA-BRCA", sample.type="Primary solid Tumor", data.category="Transcriptome Profiling", data.type="Gene Expression Quantification", workflow.type="HTSeq - Counts") | |
| ## GDCdownload(query.gene) | |
| ## data.gene <- GDCprepare(query.gene, summarizedExperiment = FALSE) | |
| #### DNA Methylation | |
| query.met <- GDCquery(project = c("TCGA-BRCA"),legacy = TRUE,data.category = "DNA methylation",platform = c("Illumina Human Methylation 450", "Illumina Human Methylation 27")) | |
| GDCdownload(query.met) | |
| data.met <- GDCprepare(query.met, summarizedExperiment = FALSE) | |
| BRCA_path_subtypes <- TCGAquery_subtype(tumor = "brca") | |
| metData.df <- data.met[,-(1:3)] | |
| sort(names(metData.df)) | |
| tumors <- which(substr(names(metData.df),start=14, stop=15)=="01") | |
| metData.tumors <- metData.df[,tumors] | |
| subtyped = which(substr(names(metData.tumors),start=1,stop=12) %in% BRCA_path_subtypes$patient) | |
| metData.subtyped <- metData.tumors[,subtyped] | |
| pam50 <- BRCA_path_subtypes[!(is.na(BRCA_path_subtypes$PAM50.mRNA)),c("patient", "PAM50.mRNA")] | |
| my_names <- as.data.frame(names(metData.subtyped)) | |
| names(my_names) <- c("barcode") | |
| my_names$patient <- substr(my_names$barcode,start=1,stop=12) | |
| metadata <- merge(my_names, pam50, by.x="patient", by.y="patient", all.x = FALSE, all.y=FALSE, sort=FALSE) | |
| row.names(metadata) <- metadata$barcode | |
| metData.subtyped2 <- metData.subtyped[,row.names(metadata)] | |
| metData.subtyped3 <- metData.subtyped2[which(!is.na(rowSums(metData.subtyped2))),] | |
| metData.subset <- as.data.frame(t(metData.subtyped3[c("cg07054526","cg19531713","cg19088651"),])) | |
| metData.subset$pam50 <- metadata$PAM50.mRNA | |
| boxplot(cg07054526~pam50, metData.subset, main="cg07054526", col=c("red","magenta","purple2","skyblue","darkgreen")) | |
| boxplot(cg19531713~pam50, metData.subset, main="cg19531713", col=c("red","magenta","purple2","skyblue","darkgreen")) | |
| boxplot(cg19088651~pam50, metData.subset, main="cg19088651", col=c("red","magenta","purple2","skyblue","darkgreen")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment