Related article: Drosdowech et al. (2024). Influence of feeding black soldier fly (Hermetia illucens), cricket (Gryllodes sigillatus), and superworm (Zophobas morio) on the gut microbiota of rainbow trout (Oncorhynchus mykiss) In draft

Related GitHub repository: https://github.com/sonjadros/fishgut15i

The R code provided below was used to generate the following figures and table presented in related article:

  • Figure 2
  • Figure 3

1. Load required packages

library(readr)
library(dplyr)
library(stringr)
library(ggplot2)
library(phyloseq)
library(reshape2)
library(cowplot)
library(eoffice)
library(zCompositions)
library(compositions)
library(viridis)
library(Maaslin2)
library(ggrepel)
library(ggiraph)
library(BiocManager)
library(ape)

theme_set(theme_bw())

sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.6.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Toronto
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ape_5.8               BiocManager_1.30.23   ggiraph_0.8.10       
##  [4] ggrepel_0.9.5         Maaslin2_1.16.0       viridis_0.6.5        
##  [7] viridisLite_0.4.2     compositions_2.0-8    zCompositions_1.5.0-4
## [10] truncnorm_1.0-9       NADA_1.6-1.1          survival_3.7-0       
## [13] MASS_7.3-60.0.1       eoffice_0.2.2         cowplot_1.1.3        
## [16] reshape2_1.4.4        phyloseq_1.46.0       ggplot2_3.5.1        
## [19] stringr_1.5.1         dplyr_1.1.4           readr_2.1.5          
## 
## loaded via a namespace (and not attached):
##   [1] tensorA_0.36.2.1        rstudioapi_0.16.0       jsonlite_1.8.8         
##   [4] magrittr_2.0.3          magick_2.8.4            rmarkdown_2.27         
##   [7] fs_1.6.4                zlibbioc_1.48.2         ragg_1.3.2             
##  [10] vctrs_0.6.5             multtest_2.58.0         memoise_2.0.1          
##  [13] RCurl_1.98-1.16         askpass_1.2.0           base64enc_0.1-3        
##  [16] htmltools_0.5.8.1       curl_5.2.1              broom_1.0.6            
##  [19] Rhdf5lib_1.24.2         rhdf5_2.46.1            gridGraphics_0.5-1     
##  [22] sass_0.4.9              bslib_0.8.0             htmlwidgets_1.6.4      
##  [25] plyr_1.8.9              plotly_4.10.4           cachem_1.1.0           
##  [28] uuid_1.2-1              igraph_2.0.3            mime_0.12              
##  [31] lifecycle_1.0.4         iterators_1.0.14        pkgconfig_2.0.3        
##  [34] Matrix_1.6-5            R6_2.5.1                fastmap_1.2.0          
##  [37] GenomeInfoDbData_1.2.11 shiny_1.9.1             digest_0.6.36          
##  [40] colorspace_2.1-1        S4Vectors_0.40.2        textshaping_0.4.0      
##  [43] vegan_2.6-6.1           fansi_1.0.6             httr_1.4.7             
##  [46] mgcv_1.9-1              compiler_4.3.2          fontquiver_0.2.1       
##  [49] withr_3.0.1             backports_1.5.0         DBI_1.2.3              
##  [52] biglm_0.9-3             R.utils_2.12.3          bayesm_3.1-6           
##  [55] openssl_2.2.0           biomformat_1.30.0       devEMF_4.5             
##  [58] gfonts_0.2.0            permute_0.9-7           optparse_1.7.5         
##  [61] tools_4.3.2             zip_2.3.1               httpuv_1.6.15          
##  [64] R.oo_1.26.0             glue_1.7.0              nlme_3.1-165           
##  [67] rhdf5filters_1.14.1     promises_1.3.0          grid_4.3.2             
##  [70] cluster_2.1.6           ade4_1.7-22             generics_0.1.3         
##  [73] gtable_0.3.5            tzdb_0.4.0              R.methodsS3_1.8.2      
##  [76] tidyr_1.3.1             data.table_1.15.4       hms_1.1.3              
##  [79] xml2_1.3.6              utf8_1.2.4              XVector_0.42.0         
##  [82] BiocGenerics_0.48.1     foreach_1.5.2           pillar_1.9.0           
##  [85] yulab.utils_0.1.5       later_1.3.2             robustbase_0.99-3      
##  [88] splines_4.3.2           getopt_1.20.4           lattice_0.22-6         
##  [91] tidyselect_1.2.1        rvg_0.3.3               fontLiberation_0.1.0   
##  [94] Biostrings_2.70.3       knitr_1.48              gridExtra_2.3          
##  [97] fontBitstreamVera_0.1.1 IRanges_2.36.0          crul_1.5.0             
## [100] stats4_4.3.2            xfun_0.46               Biobase_2.62.0         
## [103] DEoptimR_1.1-3          stringi_1.8.4           lazyeval_0.2.2         
## [106] yaml_2.3.10             evaluate_0.24.0         codetools_0.2-20       
## [109] httpcode_0.3.0          officer_0.6.6           gdtools_0.3.7          
## [112] tibble_3.2.1            ggplotify_0.1.2         cli_3.6.3              
## [115] xtable_1.8-4            systemfonts_1.1.0       munsell_0.5.1          
## [118] jquerylib_0.1.4         R.devices_2.17.2        Rcpp_1.0.13            
## [121] GenomeInfoDb_1.38.8     parallel_4.3.2          bitops_1.0-8           
## [124] mvtnorm_1.2-5           scales_1.3.0            pcaPP_2.0-4            
## [127] purrr_1.0.2             crayon_1.5.3            flextable_0.9.6        
## [130] rlang_1.1.4

Acquire datasets

Download dataset (TBD)

#source("https://github.com/sonjadros/fishgut15i/tree/main/input")

Import Dataset

# Read count table in
df.counts <- read_tsv("input/merged-data.tsv") %>% #%>% .[-1,] #get rid of qiime2 row if needed
  tibble::column_to_rownames("Taxon") %>%
  dplyr::select(-id, -Sequence, -Confidence) %>%
  filter(!grepl("Mitochondria|Chloroplast", rownames(.))) %>%
  sjmisc::rotate_df()

# Read metadata
df.meta <- read_tsv("input/table_level6.txt") %>% dplyr::select(!matches(";")) %>% mutate(treatment_group = ifelse(treatment == "CON", "CON", "INSECT"))

# Merge tables
df.merge <- merge(df.meta, df.counts, by.x="sample", by.y="row.names", all=FALSE)

# Melt convert formats for plotting and anaylsis
df.melt <- df %>% melt(., id.vars=c("sample", "treatment", "sampletype"))

#------------------------

df.merge.counts <- df.merge %>% filter(sampletype=="Faeces") %>% tibble::column_to_rownames("sample") %>% dplyr::select(matches(";")) %>% sjmisc::rotate_df()
df.merge.meta <- df.merge %>% filter(sampletype=="Faeces") %>% dplyr::select(!matches(";"))

#-------------
# Get rid of rows with all zeros (meaningless data)
df.merge.counts <- df.merge.counts[!rowSums(df.merge.counts)==0,]
# Calculate czm
d.in.czm <- as.data.frame((t(cmultRepl(t(df.merge.counts), label=0, method="CZM", output="prop"))), stringsasfactors=FALSE)
## No. adjusted imputations:  27
# Filter
d.in.czm.filt <- data.frame(d.in.czm[apply(d.in.czm, 1, max)>0,], check.names=F)   #ABUNDANCE CUTOFF
# Calculate CLR
d.in.czm.filt.clr <- t(apply(t(d.in.czm.filt), 1, function(x){log2(x) - mean(log2(x))})) %>% as.data.frame(., stringsasfactors=FALSE) %>% sjmisc::rotate_df()
# Merge CLR + Metadata
d.mer <-merge(df.merge.meta, t(d.in.czm.filt.clr), by.x="sample", by.y="row.names",all=F)

MaasLin2 Stats

d.mer.counts <- d.mer %>% tibble::column_to_rownames("sample") %>% dplyr::select(matches(";")) #%>% sjmisc::rotate_df()
d.mer.meta <- d.mer %>% dplyr::select(!matches(";")) %>% `rownames<-`(.$sample)

maaslin.results <- Maaslin2(d.mer.counts, d.mer.meta, output='maaslin2_output',
                            transform = "none", min_prevalence = 0.3, min_abundance = -50000,
                            plot_heatmap=FALSE, plot_scatter=FALSE, standardize = FALSE,
                            analysis_method="LM", normalization="none", #cores=20
                            fixed_effects= c("treatment_group"), reference=c("treatment,CON"))

maaslin.results.df <- maaslin.results$results %>%
  mutate(sv = str_split_fixed(feature, "[.]", 8)[,1]) %>% #SV names
  mutate(species = str_split_fixed(feature, "[.]", 8)[,8]) %>% #Species names
  mutate(sv_species = paste(sv, species, sep="_")) %>% #SV Species names
  mutate(family = str_split_fixed(feature, "[.]", 8)[,6]) %>% #Family names
  mutate(family_species = paste(sv_species, " (", str_split_fixed(feature, "[.]", 8)[,6], ")", sep="")) #Family species names


maaslin.results.df.allinsect <- maaslin.results.df
write_tsv(maaslin.results.df.allinsect, "output/maaslin2_statistics_NEW_ALL_INSECT.tsv")

Interactive Volcano Plot

maaslin.results.df <- maaslin.results.df.allinsect %>%
  mutate(col_group = case_when(
    pval < 0.05 & coef < -3 ~ "negative",       #Set multi-threshold colouring with case_when function
    pval < 0.05 & coef > 3 ~ "positive",       #Set multi-threshold colouring with case_when function
    TRUE ~ "group1")) %>%
  mutate(sv = str_split_fixed(feature, "[.]", 8)[,1]) %>% #SV names
  mutate(species = str_split_fixed(feature, "[.]", 8)[,8]) #Species names


#------------------------------
#---Plot with Species names
#------------------------------
volcano_plot <- ggplot(maaslin.results.df, aes(x = coef, y = log2(pval), fill=col_group, colour=col_group)) +
  #geom_point(aes(color = col_group))+ # Coloring significant points
  ggiraph::geom_point_interactive(ggplot2::aes(color=col_group, #data_id = group,
                                               tooltip = ggplot2::after_stat({
                                                 paste0(
                                                   "SV: ", stringr::str_split_fixed(maaslin.results.df$feature, "[.]", 8)[,1],
                                                   "\nPhylum: ", stringr::str_split_fixed(maaslin.results.df$feature, "[.]", 8)[,3],
                                                   "\nFamily: ", stringr::str_split_fixed(maaslin.results.df$feature, "[.]", 8)[,6],
                                                   "\nSpecies: ", maaslin.results.df$species,
                                                   "\np-value: ", formatC(maaslin.results.df$pval, format="e", digits=2),
                                                   "\nq-value: ", formatC(maaslin.results.df$qval, format="e", digits=2),
                                                   "\ncoef: ", formatC(maaslin.results.df$coef, format="e", digits=2)
                                                   )
                                               }))) +
  scale_color_manual(values=c(negative = "red3", positive = "steelblue", group1="grey60"),
                     labels=c('ns', 'pval < 0.05 + coef < -3', 'pval < 0.05 + coef > 3')) +         # Grey for non-significant, red for significant
  labs(x = "Log Fold Change (coef)", y = "-Log10 P-value") +
  theme_minimal() +
  scale_y_reverse() +
  theme(legend.position = "right") + guides(colour=guide_legend(title="Key")) +
  # Remove legend if not needed
  # Add labels for significant points
  geom_text_repel(aes(label = ifelse(pval < 0.05 & abs(coef) >3, species, "")), max.overlaps=50, size = 3)


ggiraph::set_girafe_defaults(
  opts_hover_inv = ggiraph::opts_hover_inv(css = "opacity:0.4;"),
  opts_hover = ggiraph::opts_hover(css = "fill:orange;stroke:orange;stroke-width:7;", nearest_distance = 1000),
  #opts_zoom = ggiraph::opts_zoom(min = 1, max = 4),
  opts_tooltip = ggiraph::opts_tooltip(css = "padding:3px;background-color:#333333;color:white;"),
  opts_sizing = ggiraph::opts_sizing(rescale = FALSE),
  opts_toolbar = ggiraph::opts_toolbar(saveaspng = TRUE, pngname="volcano", delay_mouseout = 5000, position="topright", hidden=c("selection", "zoom"))
)

volcano.interactive <- ggiraph::girafe(ggobj = volcano_plot, width_svg = 10, height_svg = 7)
volcano.interactive

Effect Plot of Pooled Insect Treatments

number_of_top_tax <- 1:20
#----------------------------ALLINSECT
maaslin.results.df.filt <- maaslin.results.df.allinsect %>%
  mutate(coef = -log10(pval) * sign(coef)) %>% # HASHTAG OUT TO CONVERT TO NORMAL COEF SCORES
  mutate(col_group = case_when(
    coef < 0 ~ "negative",       #Set multi-threshold colouring with case_when function
    coef > 0 ~ "positive",
    TRUE ~ "ns")) %>%
  #mutate(sv = str_split_fixed(feature, "[.]", 8)[,1]) %>% #SV names
  #mutate(species = str_split_fixed(feature, "[.]", 8)[,8]) %>% #Species names
  #mutate(sv_species = paste(sv, species, sep="_")) %>%
  mutate(family_species = paste(str_split_fixed(feature, "[.]", 8)[,8], " (", str_split_fixed(feature, "[.]", 8)[,6], ")", sep="")) %>% #Family names
  #filter(value == effect.plot.list[i]) %>% #filter based on group
  #filter( abs(coef) >= 5) %>% #Option 1 (COEF) for getting top most important taxa
  #arrange(desc(abs(coef))) %>% .[1:30,] #Option 2 (COEF) for getting top most important taxa (MORE EVEN APPROACH)
  arrange(qval) %>% .[number_of_top_tax,] #Option 1 (qval) for getting top most important taxa (MORE EVEN APPROACH)

effect.plot.allinsect <- ggplot(maaslin.results.df.filt, aes(x=reorder(sv_species,coef), y=coef, fill=col_group)) + geom_bar(stat="identity")+coord_flip()+theme_classic() +
  ggtitle(unique(maaslin.results.df.filt$value)) +
  ylab("-log10(pval)*sign(coef)") +
  ylim(-30,30) +
  geom_hline(yintercept = c(0), linetype = "solid")+
  geom_hline(yintercept = c(-30,-15,15,30), linetype = "dashed") + theme(plot.title = element_text(size=24,hjust=0.5)) +
  scale_fill_manual(values=c(negative = "red3", positive = "steelblue4")) + theme(legend.position="none") +
  theme(axis.title.y = element_blank())

effect.plot.allinsect

Import Dataset

# Read count table in
df.counts <- read_tsv("input/merged-data.tsv") %>% #%>% .[-1,] #get rid of qiime2 row if needed
  tibble::column_to_rownames("Taxon") %>%
  dplyr::select(-id, -Sequence, -Confidence) %>%
  filter(!grepl("Mitochondria|Chloroplast", rownames(.))) %>%
  sjmisc::rotate_df()

# Read metadata
df.meta <- read_tsv("input/table_level6.txt") %>% dplyr::select(!matches(";"))

# Merge tables
df.merge <- merge(df.meta, df.counts, by.x="sample", by.y="row.names", all=FALSE)

# Melt convert formats for plotting and anaylsis
df.melt <- df %>% melt(., id.vars=c("sample", "treatment", "sampletype"))

#------------------------

df.merge.counts <- df.merge %>% filter(sampletype=="Faeces") %>% tibble::column_to_rownames("sample") %>% dplyr::select(matches(";")) %>% sjmisc::rotate_df()
df.merge.meta <- df.merge %>% filter(sampletype=="Faeces") %>% dplyr::select(!matches(";"))

#-------------
# Get rid of rows with all zeros (meaningless data)
df.merge.counts <- df.merge.counts[!rowSums(df.merge.counts)==0,]
# Calculate czm
d.in.czm <- as.data.frame((t(cmultRepl(t(df.merge.counts), label=0, method="CZM", output="prop"))), stringsasfactors=FALSE)
## No. adjusted imputations:  27
# Filter
d.in.czm.filt <- data.frame(d.in.czm[apply(d.in.czm, 1, max)>0,], check.names=F)   #ABUNDANCE CUTOFF
# Calculate CLR
d.in.czm.filt.clr <- t(apply(t(d.in.czm.filt), 1, function(x){log2(x) - mean(log2(x))})) %>% as.data.frame(., stringsasfactors=FALSE) %>% sjmisc::rotate_df()
# Merge CLR + Metadata
d.mer <-merge(df.merge.meta, t(d.in.czm.filt.clr), by.x="sample", by.y="row.names",all=F)

MaAsLin2 Stats For Individual Insect Treatments

d.mer.counts <- d.mer %>% tibble::column_to_rownames("sample") %>% dplyr::select(matches(";")) #%>% sjmisc::rotate_df()
d.mer.meta <- d.mer %>% dplyr::select(!matches(";")) %>% `rownames<-`(.$sample)

maaslin.results <- Maaslin2(d.mer.counts, d.mer.meta, output='maaslin2_output',
                            transform = "none", min_prevalence = 0.3, min_abundance = -50000,
                            plot_heatmap=FALSE, plot_scatter=FALSE, standardize = FALSE,
                            analysis_method="LM", normalization="none", #cores=20
                            fixed_effects= c("treatment"), reference=c("treatment,CON"))

maaslin.results.df <- maaslin.results$results %>%
  mutate(sv = str_split_fixed(feature, "[.]", 8)[,1]) %>% #SV names
  mutate(species = str_split_fixed(feature, "[.]", 8)[,8]) %>% #Species names
  mutate(sv_species = paste(sv, species, sep="_")) %>% #SV Species names
  mutate(family = str_split_fixed(feature, "[.]", 8)[,6]) %>% #Family names
  mutate(family_species = paste(sv_species, " (", str_split_fixed(feature, "[.]", 8)[,6], ")", sep="")) #Family species names

maaslin.results.df.original <- maaslin.results.df

#write_tsv(maaslin.results.df.original, "output/maaslin2_statistics_NEW_ORIGINAL.tsv")

Effect Plots For Individual Insect Treatment

number_of_top_tax <- 1:20

effect.plot.list <- c("BSF", "CR", "SW")

effect.collector.list <- list()
for(i in 1:length(effect.plot.list)){
  print(effect.plot.list[i])
  
  maaslin.results.df.filt <- maaslin.results.df.original %>%
    mutate(coef = -log10(pval) * sign(coef)) %>% # HASHTAG OUT TO CONVERT TO NORMAL COEF SCORES
    mutate(col_group = case_when(
      coef < 0 ~ "negative",       #Set multi-threshold colouring with case_when function
      coef > 0 ~ "positive",
      TRUE ~ "ns")) %>%
    mutate(sv = str_split_fixed(feature, "[.]", 8)[,1]) %>% #SV names
    mutate(species = str_split_fixed(feature, "[.]", 8)[,8]) %>% #Species names
    mutate(sv_species = paste(sv, species, sep="_")) %>%
    filter(value == effect.plot.list[i]) %>% #filter based on group
    #filter( abs(coef) >= 5) %>% #Option 1 (COEF) for getting top most important taxa
    #arrange(desc(abs(coef))) %>% .[1:30,] #Option 2 (COEF) for getting top most important taxa (MORE EVEN APPROACH)
    arrange(pval) %>% .[number_of_top_tax,] #Option 1 (qval) for getting top most important taxa (MORE EVEN APPROACH)
  
  #  filter(pval < 0.05 |  pval < 0.1 & abs(coef) >= 0.5 ) %>% filter(prev >0.1 & abund > 0.0001) %>%
  #  filter(grepl(paste(fungi.list.PP, collapse="|"), species)) %>%
  #  distinct(species, .keep_all=TRUE) %>%
  #  mutate(X7_combined = paste(.$X7, .$value)) #%>% filter( abs(coef_mean) >= 0.2) %>% filter(prev > 0.3)#arrange(desc(abs(coef_mean))) %>% .[1:15,]
  
  effect.plot <- ggplot(maaslin.results.df.filt, aes(x=reorder(sv_species,coef), y=coef, fill=col_group)) + geom_bar(stat="identity")+coord_flip()+theme_classic() +
    ggtitle(unique(maaslin.results.df.filt$value)) +
    ylab("-log10(pval)*sign(coef)") +
    ylim(-30,30) +
    geom_hline(yintercept = c(0), linetype = "solid")+
    #scale_y_continuous(breaks=c(-10,-5,0,5,10), labels=c(-50,-25,0,25,50)) +
    geom_hline(yintercept = c(-30,-15,15,30), linetype = "dashed") + theme(plot.title = element_text(size=24,hjust=0.5)) +
    scale_fill_manual(values=c(negative = "red3", positive = "steelblue4")) + theme(legend.position="none") +
    theme(axis.title.y = element_blank())
  
  effect.collector.list[[paste(effect.plot.list[i])]] <- effect.plot
  
}
## [1] "BSF"
## [1] "CR"
## [1] "SW"
effect.plot.original <- cowplot::plot_grid(effect.collector.list$BSF,
                                           effect.collector.list$CR,
                                           effect.collector.list$SW,
                                           ncol=3)
effect.plot.original

Read Data and Group by any Given Taxonomic Rank

# Read count table in
df.counts <- read_tsv("input/merged-data.tsv") %>% #%>% .[-1,] #get rid of qiime2 row if needed
  tibble::column_to_rownames("Taxon") %>%
  dplyr::select(-id, -Sequence, -Confidence) %>%
  filter(!grepl("Mitochondria|Chloroplast|Acinetobacter|Mesorhizobium", rownames(.))) %>%
  sjmisc::rotate_df()

# Now df.counts contains the filtered data

# Read metadata
df.meta <- read_tsv("input/table_level6.txt") %>% dplyr::select(!matches(";"))

# Merge tables
df.merge <- merge(df.meta, df.counts, by.x="sample", by.y="row.names", all=FALSE)

# Melt convert formats for plotting and anaylsis
df.melt <- df %>% melt(., id.vars=c("sample", "treatment", "sampletype"))

####################################
####################################  Make phyloseq
####################################

OTU.phyloseq = otu_table(as.matrix(df.counts), taxa_are_rows = FALSE)
tax.in <- str_split_fixed(colnames(df.counts), ";", 8)
colnames(tax.in) <- c("SV", "domain", "phylum", "class", "order", "family", "genus", "species")
rownames(tax.in) <- colnames(df.counts)
TAX.phyloseq = tax_table(as.matrix(tax.in))
META.phyloseq = sample_data(data.frame(df.meta[,1:3]))
sample_names(META.phyloseq) <- sample_names(OTU.phyloseq)
ps <-phyloseq(OTU.phyloseq, TAX.phyloseq, META.phyloseq)
TAX.phyloseq
####################################
#################################### Transform data - "whatever you want" level
####################################
level <- "genus"

#Filtering
ps.freq  = transform_sample_counts(ps, function(x) x / sum(x))
ps.freq.filt = filter_taxa(ps.freq, function(x) mean(x) > 1e-2, TRUE) #optional to filter
ps.freq.filt  = transform_sample_counts(ps.freq.filt, function(x) x / sum(x))


###################################################################################################################### Run this chunk, if you want other
otu_table.example <- as.data.frame(as.matrix(otu_table(ps.freq.filt)))
#Make new phyloseq object for "other" counts
#--------------------------------------------------
OTU.phyloseq.other <- as.data.frame(1 - rowSums(otu_table.example))
colnames(OTU.phyloseq.other) <- "SV_010101;Other;Other;Other;Other;Other;Other;Other"
OTU.phyloseq.other = otu_table(as.matrix(OTU.phyloseq.other), taxa_are_rows = FALSE)
tax.in.other <- as.matrix(cbind("SV" = "SV_010101",
                                "domain" = "Other",
                                "phylum" = "Other",
                                "class" = "Other",
                                "order" = "Other",
                                "family" = "Other",
                                "genus" = "Other",
                                "species"= "Other"))
rownames(tax.in.other) <- "SV_010101;Other;Other;Other;Other;Other;Other;Other"
TAX.phyloseq.other = tax_table(as.matrix((tax.in.other)))
ps.other <-phyloseq(OTU.phyloseq.other, TAX.phyloseq.other, META.phyloseq)

#Make filtered + "other" phyloseq objects
#--------------------------------------------------
ps.merge <- merge_phyloseq(ps.freq.filt, ps.other)
ps.freq.filt <- ps.merge

######################################################################################################################




#Group ASVs at a given taxonomic rank
ps.freq.filt <- tax_glom(ps.freq.filt, taxrank=level, NArm=TRUE, bad_empty=c(NA, "", " ", "\t"))

ps.melt <- psmelt(ps.freq.filt)

write_tsv(ps.melt, "output/ps.melt.genus.tsv")

Interactive Taxa Bar Plots

####################################
#################################### Make plots
####################################


#Set themes
sonja.theme <- theme(axis.text.x = element_text(angle = -90, hjust = 0),
                     panel.background = element_blank(),
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank())

#theme(legend.position="none") +

#Plot graphs separately
library(ggiraph)
ggiraph::set_girafe_defaults(
  opts_hover_inv = ggiraph::opts_hover_inv(css = "opacity:0.4;"),
  opts_hover = ggiraph::opts_hover(css = "fill:orange;stroke:orange;stroke-width:7;", nearest_distance = 1000),
  #opts_zoom = ggiraph::opts_zoom(min = 1, max = 4),
  opts_tooltip = ggiraph::opts_tooltip(css = "padding:3px;background-color:#333333;color:white;"),
  opts_sizing = ggiraph::opts_sizing(rescale = FALSE),
  opts_toolbar = ggiraph::opts_toolbar(saveaspng = TRUE, pngname="volcano", delay_mouseout = 5000, position="topright", hidden=c("selection", "zoom"))
)

plot1 <- ggplot(ps.melt %>% filter(treatment == "Day0")
                , aes_string(x = "Sample", y = "Abundance", fill = level)) +
  geom_bar_interactive(aes(tooltip=paste("Genus: ",genus,
                                         "\nAbundance: ", formatC(Abundance,format = "e",digits=2), 
                                         sep="")), stat = "identity", position = "stack", color = "black") +
  facet_grid(~treatment, labeller = labeller(treatment = c(`Day0` = "Day0")))+
  theme_bw() + guides(fill = guide_legend(ncol = 1)) + sonja.theme + theme(legend.position="none")


plot2 <- ggplot(ps.melt %>% filter(treatment == "CON")
                , aes_string(x = "Sample", y = "Abundance", fill = level)) +
  geom_bar_interactive(aes(tooltip=paste("Genus: ",genus,
                                         "\nAbundance: ", formatC(Abundance,format = "e",digits=2), 
                                         sep="")), stat = "identity", position = "stack", color = "black") +
  facet_grid(~treatment, labeller = labeller(treatment = c(`CON` = "Control")))+
  theme_bw() + guides(fill = guide_legend(ncol = 1)) + sonja.theme + theme(legend.position="none")


plot3 <- ggplot(ps.melt %>% filter(treatment == "SW")
                , aes_string(x = "Sample", y = "Abundance", fill = level)) +
  geom_bar_interactive(aes(tooltip=paste("Genus: ",genus,
                                         "\nAbundance: ", formatC(Abundance,format = "e",digits=2), 
                                         sep="")), stat = "identity", position = "stack", color = "black") +
  facet_grid(~treatment, labeller = labeller(treatment = c(`SW` = "Superworm")))+
  theme_bw() + guides(fill = guide_legend(ncol = 1)) + sonja.theme + theme(legend.position="none")


plot4 <- ggplot(ps.melt %>% filter(treatment == "CR")
                , aes_string(x = "Sample", y = "Abundance", fill = level)) +
   geom_bar_interactive(aes(tooltip=paste("Genus: ",genus,
                                         "\nAbundance: ", formatC(Abundance,format = "e",digits=2), 
                                         sep="")), stat = "identity", position = "stack", color = "black") +
  facet_grid(~treatment, labeller = labeller(treatment = c(`CR` = "Cricket")))+
  theme_bw() + guides(fill = guide_legend(ncol = 1)) + sonja.theme + theme(legend.position="none")

plot5 <- ggplot(ps.melt %>% filter(treatment == "BSF")
                , aes_string(x = "Sample", y = "Abundance", fill = level)) + geom_bar_interactive(aes(tooltip=paste("Genus: ",genus,
                                         "\nAbundance: ", formatC(Abundance,format = "e",digits=2), 
                                         sep="")), stat = "identity", position = "stack", color = "black") +
  facet_grid(~treatment, labeller = labeller(treatment = c(`BSF` = "Black soldier fly")))+
  theme_bw() + guides(fill = guide_legend(ncol = 1)) + sonja.theme + theme(legend.position="none")


legend <- ggplot(ps.melt %>% filter(treatment == "BSF")
                 , aes_string(x = "Sample", y = "Abundance", fill = level)) +
  geom_bar(stat = "identity", position = "stack", color = "black") +
  theme_bw() + guides(fill = guide_legend(ncol = 1)) + sonja.theme


#Export graph(s)

final.plot <- cowplot::plot_grid(plot1, plot2, plot3, plot4, plot5,
                   ncol=5)

#final.plot #Regular plot 

ggiraph::girafe(code=print(final.plot)) #Interactive plot
#topptx(file=paste("output/4_by_4_barplots_", level, ".pptx", sep=""), append=FALSE, width=15, height=10, units="cm")

Combined Bar Plots

####################################
####################################  Read count table in
####################################
# Read count table in
df.counts <- read_tsv("input/merged-data.tsv") %>% #%>% .[-1,] #get rid of qiime2 row if needed
  tibble::column_to_rownames("Taxon") %>%
  dplyr::select(-id, -Sequence, -Confidence) %>%
  filter(!grepl("Mitochondria|Chloroplast", rownames(.))) %>%
  sjmisc::rotate_df()

# Read metadata
df.meta <- read_tsv("input/table_level6.txt") %>% dplyr::select(!matches(";"))

# Merge tables
df.merge <- merge(df.meta, df.counts, by.x="sample", by.y="row.names", all=FALSE)

# Melt convert formats for plotting and anaylsis
df.melt <- df %>% melt(., id.vars=c("sample", "treatment", "sampletype"))

####################################
####################################  Make phyloseq
####################################

OTU.phyloseq = otu_table(as.matrix(df.counts), taxa_are_rows = FALSE)
tax.in <- str_split_fixed(colnames(df.counts), ";", 8)
colnames(tax.in) <- c("SV", "domain", "phylum", "class", "order", "family", "genus", "species")
rownames(tax.in) <- colnames(df.counts)
TAX.phyloseq = tax_table(as.matrix(tax.in))
META.phyloseq = sample_data(data.frame(df.meta[,1:3]))
sample_names(META.phyloseq) <- sample_names(OTU.phyloseq)
ps <-phyloseq(OTU.phyloseq, TAX.phyloseq, META.phyloseq)
TAX.phyloseq


####################################
#################################### Transform data - "whatever you want" level
####################################
for.list <- c("phylum", "family", "genus")

collector.list <- list()
for(i in 1:length(for.list)){
  level <- for.list[i]
  
  #Filtering
  ps.freq  = transform_sample_counts(ps, function(x) x / sum(x))
  ps.freq.filt = filter_taxa(ps.freq, function(x) mean(x) > 1e-2, TRUE) #optional to filter
  ps.freq.filt  = transform_sample_counts(ps.freq.filt, function(x) x / sum(x))
  
  
###################################################################################################################### Run this chunk, if you want other
  otu_table.example <- as.data.frame(as.matrix(otu_table(ps.freq.filt)))
  #Make new phyloseq object for "other" counts
  #--------------------------------------------------
  OTU.phyloseq.other <- as.data.frame(1 - rowSums(otu_table.example))
  colnames(OTU.phyloseq.other) <- "SV_010101;other;other;other;other;other;other;other"
  OTU.phyloseq.other = otu_table(as.matrix(OTU.phyloseq.other), taxa_are_rows = FALSE)
  tax.in.other <- as.matrix(cbind("SV" = "SV_010101",
                                  "domain" = "other",
                                  "phylum" = "other",
                                  "class" = "other",
                                  "order" = "other",
                                  "family" = "other",
                                  "genus" = "other",
                                  "species"= "other"))
  rownames(tax.in.other) <- "SV_010101;other;other;other;other;other;other;other"
  TAX.phyloseq.other = tax_table(as.matrix((tax.in.other)))
  ps.other <-phyloseq(OTU.phyloseq.other, TAX.phyloseq.other, META.phyloseq)
  
  #Make filtered + "other" phyloseq objects
  #--------------------------------------------------
  ps.merge <- merge_phyloseq(ps.freq.filt, ps.other)
  ps.freq.filt <- ps.merge
  
######################################################################################################################
  
  
  
  
  #Group ASVs at a given taxonomic rank
  ps.freq.filt <- tax_glom(ps.freq.filt, taxrank=level, NArm=TRUE, bad_empty=c(NA, "", " ", "\t"))
  
  ps.melt <- psmelt(ps.freq.filt)
  
  ####################################
  #################################### Make plots
  ####################################
  
  
  #Set themes
  sonja.theme <- theme(axis.text.x = element_text(angle = -90, hjust = 0),
                       panel.background = element_blank(),
                       panel.grid.major = element_blank(),
                       panel.grid.minor = element_blank())
  
  
  #---------------- DAY 0
  ps.melt.subset <- ps.melt %>% filter(treatment == "Day 0")
  
  plot.day0 <- ggplot(ps.melt %>% filter(treatment == "Day0")
                      , aes_string(x = "Sample", y = "Abundance", fill = level)) +
    geom_bar(stat = "identity", position = "stack", color = "black") +
    facet_grid(~treatment, labeller = labeller(treatment = c(`CON` = "CONTROL")))+
    theme_bw() + guides(fill = guide_legend(ncol = 1)) + sonja.theme + theme(legend.position="none")
  
  
  #---------------- DIET
  #Plot graphs separately
  plot.diet1 <- ggplot(ps.melt %>% arrange(Sample) %>% filter(treatment == "CONdiet") %>%
                         group_by(SV) %>%
                         mutate(Abundance_adj = sum(Abundance)) %>%
                         slice(1) %>%
                         ungroup() %>%
                         mutate(Abundance_adj2 = Abundance_adj/sum(Abundance_adj))
                       , aes_string(x = "Sample", y = "Abundance", fill = level)) +
    geom_bar(stat = "identity", position = "stack", color = "black") +
    facet_grid(~treatment, labeller = labeller(treatment = c(`CON` = "CONTROL")))+
    theme_bw() + guides(fill = guide_legend(ncol = 1)) + sonja.theme + theme(legend.position="none") + 
    theme(axis.title.y = element_blank(), axis.text.y = element_blank())
  #Plot graphs separately
  plot.diet2 <- ggplot(ps.melt %>% arrange(Sample) %>% filter(treatment == "BSFdiet") %>%
                         group_by(SV) %>%
                         mutate(Abundance_adj = sum(Abundance)) %>%
                         slice(1) %>%
                         ungroup() %>%
                         mutate(Abundance_adj2 = Abundance_adj/sum(Abundance_adj))
                       , aes_string(x = "Sample", y = "Abundance", fill = level)) +
    geom_bar(stat = "identity", position = "stack", color = "black") +
    facet_grid(~treatment, labeller = labeller(treatment = c(`CON` = "CONTROL")))+
    theme_bw() + guides(fill = guide_legend(ncol = 1)) + sonja.theme + theme(legend.position="none") + 
    theme(axis.title.y = element_blank(), axis.text.y = element_blank())
  #Plot graphs separately
  plot.diet3 <- ggplot(ps.melt %>% arrange(Sample) %>% filter(treatment == "CRdiet") %>%
                         group_by(SV) %>%
                         mutate(Abundance_adj = sum(Abundance)) %>%
                         slice(1) %>%
                         ungroup() %>%
                         mutate(Abundance_adj2 = Abundance_adj/sum(Abundance_adj))
                       , aes_string(x = "Sample", y = "Abundance", fill = level)) +
    geom_bar(stat = "identity", position = "stack", color = "black") +
    facet_grid(~treatment, labeller = labeller(treatment = c(`CON` = "CONTROL")))+
    theme_bw() + guides(fill = guide_legend(ncol = 1)) + sonja.theme + theme(legend.position="none") + 
    theme(axis.title.y = element_blank(), axis.text.y = element_blank())
  #Plot graphs separately
  plot.diet4 <- ggplot(ps.melt %>% arrange(Sample) %>% filter(treatment == "SWdiet") %>%
                         group_by(SV) %>%
                         mutate(Abundance_adj = sum(Abundance)) %>%
                         slice(1) %>%
                         ungroup() %>%
                         mutate(Abundance_adj2 = Abundance_adj/sum(Abundance_adj))
                       , aes_string(x = "Sample", y = "Abundance", fill = level)) +
    geom_bar(stat = "identity", position = "stack", color = "black") +
    facet_grid(~treatment, labeller = labeller(treatment = c(`CON` = "CONTROL")))+
    theme_bw() + guides(fill = guide_legend(ncol = 1)) + sonja.theme + theme(legend.position="none") + 
    theme(axis.title.y = element_blank(), axis.text.y = element_blank())
  
  #---------------- FISH
  
  plot.fish1 <- ggplot(ps.melt %>% filter(treatment == "CON")
                       , aes_string(x = "Sample", y = "Abundance", fill = level)) +
    geom_bar(stat = "identity", position = "stack", color = "black") +
    facet_grid(~treatment, labeller = labeller(treatment = c(`CON` = "CONTROL")))+
    theme_bw() + guides(fill = guide_legend(ncol = 1)) + sonja.theme + theme(legend.position="none") + 
    theme(axis.title.y = element_blank(), axis.text.y = element_blank())
  
  
  plot.fish2 <- ggplot(ps.melt %>% filter(treatment == "BSF")
                       , aes_string(x = "Sample", y = "Abundance", fill = level)) +
    geom_bar(stat = "identity", position = "stack", color = "black") +
    facet_grid(~treatment, labeller = labeller(treatment = c(`BSF` = "Black Soldier Fly")))+
    theme_bw() + guides(fill = guide_legend(ncol = 1)) + sonja.theme + theme(legend.position="none") + 
    theme(axis.title.y = element_blank(), axis.text.y = element_blank())
  
  plot.fish3 <- ggplot(ps.melt %>% filter(treatment == "CR")
                       , aes_string(x = "Sample", y = "Abundance", fill = level)) +
    geom_bar(stat = "identity", position = "stack", color = "black") +
    facet_grid(~treatment, labeller = labeller(treatment = c(`CR` = "Cricket")))+
    theme_bw() + guides(fill = guide_legend(ncol = 1)) + sonja.theme + theme(legend.position="none") + 
    theme(axis.title.y = element_blank(), axis.text.y = element_blank())
  
  plot.fish4 <- ggplot(ps.melt %>% filter(treatment == "SW")
                       , aes_string(x = "Sample", y = "Abundance", fill = level)) +
    geom_bar(stat = "identity", position = "stack", color = "black") +
    facet_grid(~treatment, labeller = labeller(treatment = c(`SW` = "Superworm")))+
    theme_bw() + guides(fill = guide_legend(ncol = 1)) + sonja.theme + theme(legend.position="none") + 
    theme(axis.title.y = element_blank(), axis.text.y = element_blank())
  
  
  legend <- ggplot(ps.melt %>% filter(treatment == "SW")
                   , aes_string(x = "Sample", y = "Abundance", fill = level)) +
    geom_bar(stat = "identity", position = "stack", color = "black") +
    theme_bw() + guides(fill = guide_legend(ncol = 1)) + sonja.theme
  
  #Export graph(s)
  #-----------------------------------------------------------------------------------------------------
  plot.phylum.day0 <- cowplot::plot_grid(plot.day0, ncol=1)
  plot.phlyum.CON <- cowplot::plot_grid(plot.diet1, plot.fish1, ncol=2, rel_widths=c(0.4, 0.6))
  plot.phlyum.BSF <- cowplot::plot_grid(plot.diet2, plot.fish2, ncol=2, rel_widths=c(0.4, 0.6))
  plot.phlyum.CR <- cowplot::plot_grid(plot.diet3, plot.fish3, ncol=2, rel_widths=c(0.4, 0.6))
  plot.phlyum.SW <- cowplot::plot_grid(plot.diet4, plot.fish4, ncol=2, rel_widths=c(0.4, 0.6))
  
  plot.phylum.combined <- cowplot::plot_grid(plot.phylum.day0,
                                             plot.phlyum.CON,
                                             plot.phlyum.BSF,
                                             plot.phlyum.CR,
                                             plot.phlyum.SW,
                                             ggpubr::as_ggplot(get_legend(legend)),
                                             ncol=6, rel_widths=c(0.15, 0.2, 0.2, 0.2, 0.2, 0.2))
  collector.list[[paste(for.list[i])]] <- plot.phylum.combined
  #-----------------------------------------------------------------------------------------------------
}
collector.list$phylum

collector.list$family

collector.list$genus

# Combine all taxonomic levels
#plot.phylum.combined <- cowplot::plot_grid(collector.list$phylum, collector.list$family, collector.list$genus, ncol=1)
#plot.phylum.combined
#topptx(file=paste("output/combined_barplots", level, ".pptx", sep=""), append=FALSE, width=15, height=10, units="cm")