Last active
September 18, 2020 13:01
-
-
Save grabear/018e86413b19b62a6bb8e72a9adba349 to your computer and use it in GitHub Desktop.
Revisions
-
grabear renamed this gist
Apr 27, 2018 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
grabear revised this gist
Apr 27, 2018 . 1 changed file with 14 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,14 @@ # This functionality has been tested and a PR has been pulled with phyloseq here: # https://github.com/joey711/phyloseq/pull/854 # # While the function has been vetted, the maintainers are very busy and the PR has not # yet been added to the main package. Below i've added some detail to explain how to parse # your silva data. It's quite easy.... source("parse_silva_taxonomy_128") # import biom data silva_biom <- system.file("extdata", "SILVA_OTU_table.biom", package="phyloseq") # Create phyloseq object using silva parseing function silva_phyloseq <- import_biom(BIOMfilename = silva_biom, parseFunction = parse_taxonomy_silva_128) -
grabear revised this gist
Apr 27, 2018 . 1 changed file with 3 additions and 7 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,11 +1,6 @@ #' @examples #' taxvec1 = c("Root", "k__Bacteria", "p__Firmicutes", "c__Bacilli", "o__Bacillales", "f__Staphylococcaceae") #' parse_taxonomy_default(taxvec1) parse_taxonomy_default = function(char.vec){ # Remove any leading empty space char.vec = gsub("^[[:space:]]{1,}", "", char.vec) @@ -20,8 +15,9 @@ parse_taxonomy_default = function(char.vec){ return(char.vec) } #' @examples #' taxvec3 = c("D_0__Bacteria", "D_1__Firmicutes", "D_2__Bacilli", "D_3__Staphylococcaceae") #' parse_taxonomy_silva(taxvec3) parse_taxonomy_silva_128 <- function(char.vec){ # Use default to assign names to elements in case problem with greengenes prefix char.vec = parse_taxonomy_default(char.vec) -
grabear created this gist
Apr 27, 2018 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,66 @@ #' @examples #' taxvec1 = c("Root", "k__Bacteria", "p__Firmicutes", "c__Bacilli", "o__Bacillales", "f__Staphylococcaceae") #' parse_taxonomy_default(taxvec1) #' parse_taxonomy_greengenes(taxvec1) #' taxvec2 = c("Root;k__Bacteria;p__Firmicutes;c__Bacilli;o__Bacillales;f__Staphylococcaceae") #' parse_taxonomy_qiime(taxvec2) #' taxvec3 = c("D_0__Bacteria", "D_1__Firmicutes", "D_2__Bacilli", "D_3__Staphylococcaceae") #' parse_taxonomy_silva(taxvec3) parse_taxonomy_default = function(char.vec){ # Remove any leading empty space char.vec = gsub("^[[:space:]]{1,}", "", char.vec) # Remove any trailing space char.vec = gsub("[[:space:]]{1,}$", "", char.vec) if( length(char.vec) > 0 ){ # Add dummy element (rank) name names(char.vec) = paste("Rank", 1:length(char.vec), sep="") } else { warning("Empty taxonomy vector encountered.") } return(char.vec) } parse_taxonomy_silva_128 <- function(char.vec){ # Use default to assign names to elements in case problem with greengenes prefix char.vec = parse_taxonomy_default(char.vec) # Check for unassigned taxa if (char.vec["Rank1"] == "Unassigned") { char.vec <- c(Rank1="D_0__Unassigned", Rank2="D_1__Unassigned", Rank3="D_2__Unassigned", Rank4="D_3__Unassigned", Rank5="D_4__Unassigned", Rank6="D_5__Unassigned", Rank7="D_6__Unassigned") } # Define the meaning of each prefix according to SILVA taxonomy Tranks = c(D_0="Kingdom", D_1="Phylum", D_2="Class", D_3="Order", D_4="Family", D_5="Genus", D_6="Species") # Check for prefix using regexp, warn if there were none. trim indices, ti ti = grep("[[:alpha:]]\\_[[:digit:]]{1}\\_\\_", char.vec) if( length(ti) == 0L ){ warning( "No silva prefixes were found. \n", "Consider using parse_taxonomy_delfault() instead if true for all OTUs. \n", "Dummy ranks may be included among taxonomic ranks now." ) # Will want to return without further modifying char.vec taxvec = char.vec # Replace names of taxvec according to prefix, if any present... } else { # Format character vectors for Ambiguous taxa if( length(ti) < 7 ){ for (key in names(char.vec)) { if ( char.vec[key] == "Ambiguous_taxa" ) { tax_no <- (as.numeric(substr(key, 5, 5)) - 1) char.vec[key] = sprintf("D_%s__Ambiguous_taxa", tax_no) } } # Reset the trimmed indicies if Ambiguous taxa ti = grep("[[:alpha:]]\\_[[:digit:]]{1}\\_\\_", char.vec) } # Remove prefix using sub-"" regexp, call result taxvec taxvec = gsub("[[:alpha:]]\\_[[:digit:]]{1}\\_\\_", "", char.vec) # Define the ranks that will be replaced repranks = Tranks[substr(char.vec[ti], 1, 3)] # Replace, being sure to avoid prefixes notK present in Tranks names(taxvec)[ti[!is.na(repranks)]] = repranks[!is.na(repranks)] } return(taxvec) }