wilkox · September 26, 2016 01:20
diff --git a/prepare_SPEIC-EASI.R b/prepare_SPEIC-EASI.R
 # Libraries
 library(readr)
 library(SpiecEasi)
 library(phyloseq)
 library(tibble)
 library(igraph)
 library(dplyr)
 library(stringr)
 library(tidyr)
 library(ggplot2)

 # Prepare phyloseq OTU table
 OTUTable <- "OTU_table_clean5.tidy.txt" %>%
  read_tsv %>%
  select(OTU, Sample, Count) %>%
  mutate(OTU = word(OTU, 1, sep = ";")) %>%
  # Select only most abundant OTUs
  # Delete the marked lines below to include all OTUs
  group_by(OTU) %>%                   # <- Delete
  mutate(TotalCount = sum(Count)) %>% # <- Delete
  ungroup %>%                         # <- Delete
  filter(TotalCount >= 1000) %>%      # <- Delete
  select(-TotalCount) %>%             # <- Delete
  spread(OTU, Count, fill = 0) %>%
  as.data.frame %>%
  remove_rownames %>%
  column_to_rownames("Sample") %>%
  as.matrix %>%
  otu_table(taxa_are_rows = FALSE)

 # Prepare phyloseq taxa table
 TaxaTable <- "OTU_table_clean5.tidy.txt" %>%
  read_tsv %>%
  select(OTU, Domain = Kingdom, Phylum, Class, Order, Family, Genus, Species) %>%
  mutate(OTU = word(OTU, 1, sep = ";")) %>%
  unique %>%
  as.data.frame %>%
  remove_rownames %>%
  column_to_rownames("OTU") %>%
  as.matrix %>%
  tax_table

 # Combine OTU and taxonomy into single phyloseq object
 Physeq <- phyloseq(OTUTable, TaxaTable)

 # Run Spiec-Easi. According to the documentation, Spiec-Easi performs
 # normalisation internally so no need to prepare the data.
 SEoutput <- Physeq %>%
  spiec.easi(
    method = 'mb',
    lambda.min.ratio = 1e-2,
    nlambda = 20,
    icov.select.params = list(rep.num=20, ncores=2)
  )

 # Prepare plot
 Graph <- adj2igraph(
    SEoutput$refit,
    vertex.attr = list(name = taxa_names(Physeq))
  )

 # Identify top seven genera by number of edges (i.e. sum of degrees across all
 # OTUs in that genus)
 TopDegreeGenera <- Graph %>%
  degree %>%
  data.frame(OTU = names(.), Degree = .) %>%
  remove_rownames %>%
  as.tbl %>%
  left_join(
    "OTU_table_clean5.tidy.txt" %>%
      read_tsv %>%
      select(
        OTU,
        Domain = Kingdom,
        Phylum,
        Class,
        Order,
        Family,
        Genus,
        Species
      ) %>%
      unique %>%
      mutate(OTU = word(OTU, 1, sep = ";"))
  ) %>%
  group_by(Genus) %>%
  summarise(SumDegree = sum(Degree)) %>%
  arrange(desc(SumDegree)) %>%
  na.omit %>%
  .[1:11, ] %>%
  .$Genus

 # Rewrite taxonomy table in phyloseq object to collapse genera
 tax_table(Physeq) %>%
  as.data.frame %>%
  rownames_to_column("OTU") %>%
  as.tbl %>%
  mutate(Genus = ifelse(
    Genus %in% TopDegreeGenera,
    as.character(Genus),
    "Low-degree genera"
  )) %>%
  as.data.frame %>%
  remove_rownames %>%
  column_to_rownames("OTU") %>%
  as.matrix -> tax_table(Physeq)

 # Draw plot
 Plot <- plot_network(
  Graph,
  Physeq,
  type = "taxa",
  color = "Genus",
  label = NULL
 ) +
  scale_colour_brewer(palette = "Set3")

 # Reorder genus levels to put "Low-degree genera" last
 Plot$data %>%
  as.tbl %>%
  mutate(
    Genus = factor(Genus, levels = c(TopDegreeGenera, "Low-degree genera"))
  ) %>%
  as.data.frame -> Plot$data

 # Add OTU relative abundance data to plot data
 "OTU_table_clean5.tidy.txt" %>%
  read_tsv %>%
  select(OTU, Sample, Count) %>%
  mutate(OTU = word(OTU, 1, sep = ";")) %>%
  complete(OTU, Sample, fill = list(Count = 0)) %>%
  group_by(Sample) %>%
  mutate(RelativeAbundance = Count / sum(Count)) %>%
  ungroup %>%
  group_by(OTU) %>%
  summarise(MeanRelativeAbundance = mean(RelativeAbundance)) %>%
  ungroup %>%
  rename(value = OTU) %>%
  right_join(Plot$data %>% as.tbl) %>%
  as.data.frame -> Plot$data

 # Remove existing geom_point aesthetic from plot
 Plot$layers <- Plot$layers[2]

 # Add new point aesthetic
 Plot <- Plot + geom_point(aes(
  x = x,
  y = y,
  colour = Genus,
  size = MeanRelativeAbundance
 ))

 # Save plot
 ggsave("plot.pdf", plot = Plot, width = 300, height = 150, units = "mm")
	# Libraries
	library(readr)
	library(SpiecEasi)
	library(phyloseq)
	library(tibble)
	library(igraph)
	library(dplyr)
	library(stringr)
	library(tidyr)
	library(ggplot2)

	# Prepare phyloseq OTU table
	OTUTable <- "OTU_table_clean5.tidy.txt" %>%
	read_tsv %>%
	select(OTU, Sample, Count) %>%
	mutate(OTU = word(OTU, 1, sep = ";")) %>%
	# Select only most abundant OTUs
	# Delete the marked lines below to include all OTUs
	group_by(OTU) %>% # <- Delete
	mutate(TotalCount = sum(Count)) %>% # <- Delete
	ungroup %>% # <- Delete
	filter(TotalCount >= 1000) %>% # <- Delete
	select(-TotalCount) %>% # <- Delete
	spread(OTU, Count, fill = 0) %>%
	as.data.frame %>%
	remove_rownames %>%
	column_to_rownames("Sample") %>%
	as.matrix %>%
	otu_table(taxa_are_rows = FALSE)

	# Prepare phyloseq taxa table
	TaxaTable <- "OTU_table_clean5.tidy.txt" %>%
	read_tsv %>%
	select(OTU, Domain = Kingdom, Phylum, Class, Order, Family, Genus, Species) %>%
	mutate(OTU = word(OTU, 1, sep = ";")) %>%
	unique %>%
	as.data.frame %>%
	remove_rownames %>%
	column_to_rownames("OTU") %>%
	as.matrix %>%
	tax_table

	# Combine OTU and taxonomy into single phyloseq object
	Physeq <- phyloseq(OTUTable, TaxaTable)

	# Run Spiec-Easi. According to the documentation, Spiec-Easi performs
	# normalisation internally so no need to prepare the data.
	SEoutput <- Physeq %>%
	spiec.easi(
	method = 'mb',
	lambda.min.ratio = 1e-2,
	nlambda = 20,
	icov.select.params = list(rep.num=20, ncores=2)
	)

	# Prepare plot
	Graph <- adj2igraph(
	SEoutput$refit,
	vertex.attr = list(name = taxa_names(Physeq))
	)

	# Identify top seven genera by number of edges (i.e. sum of degrees across all
	# OTUs in that genus)
	TopDegreeGenera <- Graph %>%
	degree %>%
	data.frame(OTU = names(.), Degree = .) %>%
	remove_rownames %>%
	as.tbl %>%
	left_join(
	"OTU_table_clean5.tidy.txt" %>%
	read_tsv %>%
	select(
	OTU,
	Domain = Kingdom,
	Phylum,
	Class,
	Order,
	Family,
	Genus,
	Species
	) %>%
	unique %>%
	mutate(OTU = word(OTU, 1, sep = ";"))
	) %>%
	group_by(Genus) %>%
	summarise(SumDegree = sum(Degree)) %>%
	arrange(desc(SumDegree)) %>%
	na.omit %>%
	.[1:11, ] %>%
	.$Genus

	# Rewrite taxonomy table in phyloseq object to collapse genera
	tax_table(Physeq) %>%
	as.data.frame %>%
	rownames_to_column("OTU") %>%
	as.tbl %>%
	mutate(Genus = ifelse(
	Genus %in% TopDegreeGenera,
	as.character(Genus),
	"Low-degree genera"
	)) %>%
	as.data.frame %>%
	remove_rownames %>%
	column_to_rownames("OTU") %>%
	as.matrix -> tax_table(Physeq)

	# Draw plot
	Plot <- plot_network(
	Graph,
	Physeq,
	type = "taxa",
	color = "Genus",
	label = NULL
	) +
	scale_colour_brewer(palette = "Set3")

	# Reorder genus levels to put "Low-degree genera" last
	Plot$data %>%
	as.tbl %>%
	mutate(
	Genus = factor(Genus, levels = c(TopDegreeGenera, "Low-degree genera"))
	) %>%
	as.data.frame -> Plot$data

	# Add OTU relative abundance data to plot data
	"OTU_table_clean5.tidy.txt" %>%
	read_tsv %>%
	select(OTU, Sample, Count) %>%
	mutate(OTU = word(OTU, 1, sep = ";")) %>%
	complete(OTU, Sample, fill = list(Count = 0)) %>%
	group_by(Sample) %>%
	mutate(RelativeAbundance = Count / sum(Count)) %>%
	ungroup %>%
	group_by(OTU) %>%
	summarise(MeanRelativeAbundance = mean(RelativeAbundance)) %>%
	ungroup %>%
	rename(value = OTU) %>%
	right_join(Plot$data %>% as.tbl) %>%
	as.data.frame -> Plot$data

	# Remove existing geom_point aesthetic from plot
	Plot$layers <- Plot$layers[2]

	# Add new point aesthetic
	Plot <- Plot + geom_point(aes(
	x = x,
	y = y,
	colour = Genus,
	size = MeanRelativeAbundance
	))

	# Save plot
	ggsave("plot.pdf", plot = Plot, width = 300, height = 150, units = "mm")
No results found