Microbiota Analysis

Author

Sudarshan A. Shetty

Published

January 8, 2023

In this document, we investigate whether the Shannon diversity and Observed ASVs of samples is correlated to their sequencing depth.

Load libraries

library(phyloseq)
library(microbiome)
library(ggpubr) # stats with viz
library(patchwork) # combine plots 

Define parameters

pseq_path = params$pseq_path
primary_var = params$primary_var

Read Data

pseq <- readRDS(pseq_path)

Check Reads

# add reads/sample to sample_data()
sample_data(pseq)$reads.per.sample <- sample_sums(pseq)
# Compare between groups
pseq |> 
    microbiome::meta() |> 
    ggplot(aes_string(primary_var, "reads.per.sample")) +
    geom_violin(aes_string(color=primary_var))+
    geom_boxplot(aes_string(color=primary_var), width=0.2) +
    labs(x="", y="Reads/sample")+
    theme_bw()

Alpha diversity

# we can add shannon diversity to sample_data(pseq)
sample_data(pseq)$shannon <- microbiome::diversity(pseq, index="shannon")[,1]
sample_data(pseq)$observed <- microbiome::richness(pseq, index="observed")[,1]
p.shannon <- meta(pseq) |> 
    ggplot(aes_string(primary_var, "shannon")) +
    geom_violin(aes_string(color=primary_var))+
    geom_boxplot(aes_string(color=primary_var), width=0.2) +
    labs(x="", y="Shannon Diversity")+
    theme_bw()
p.observed <- meta(pseq) |> 
    ggplot(aes_string(primary_var, "observed")) +
    geom_violin(aes_string(color=primary_var))+
    geom_boxplot(aes_string(color=primary_var), width=0.2) +
    labs(x="", y="Observed ASVs")+
    theme_bw()

(p.shannon | p.observed ) + 
    plot_layout(guides = "collect") +
    plot_annotation(tag_levels = "a")

Correlation

Check if reads per sample is correlated to Shannon diversity and Observed ASVs.

p.shanon.cor <- meta(pseq) |> 
    ggplot(aes_string("reads.per.sample", "shannon", group=primary_var)) +
    geom_point(aes_string(color=primary_var), alpha=0.5) +
    geom_smooth(method = "lm", aes_string(color=primary_var), show.legend = FALSE) +
    ggpubr::stat_cor(method = "spearman", aes_string(color=primary_var), show.legend = FALSE,
                     label.x.npc = 0.5, label.y.npc = 0.3, hjust = 0)+
    labs(x="Reads/sample", y="Shannon Diversity") +
    theme_bw() 

p.observed.cor <- meta(pseq) |> 
    ggplot(aes_string("reads.per.sample", "observed", group=primary_var)) +
    geom_point(aes_string(color=primary_var), alpha=0.5) +
    geom_smooth(method = "lm", aes_string(color=primary_var), show.legend = FALSE) +
    ggpubr::stat_cor(method = "spearman", aes_string(color=primary_var), show.legend = FALSE,
                     label.x.npc = 0.5, label.y.npc = 0.3, hjust = 0) +
    labs(x="Reads/sample", y="Observed ASVs") +
    theme_bw() 


(p.shanon.cor | p.observed.cor) + 
    plot_layout(guides = "collect") +
    plot_annotation(tag_levels = "a")

sessionInfo()
## R version 4.2.1 (2022-06-23 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] patchwork_1.1.2   ggpubr_0.4.0      microbiome_1.18.0 ggplot2_3.3.6    
## [5] phyloseq_1.40.0  
## 
## loaded via a namespace (and not attached):
##  [1] Biobase_2.56.0         tidyr_1.2.0            jsonlite_1.8.0        
##  [4] splines_4.2.1          foreach_1.5.2          carData_3.0-5         
##  [7] assertthat_0.2.1       stats4_4.2.1           GenomeInfoDbData_1.2.8
## [10] yaml_2.3.5             backports_1.4.1        pillar_1.8.1          
## [13] lattice_0.20-45        glue_1.6.2             digest_0.6.29         
## [16] XVector_0.36.0         ggsignif_0.6.3         colorspace_2.0-3      
## [19] htmltools_0.5.3        Matrix_1.5-1           plyr_1.8.7            
## [22] pkgconfig_2.0.3        broom_1.0.1            zlibbioc_1.42.0       
## [25] purrr_0.3.4            scales_1.2.1           Rtsne_0.16            
## [28] tibble_3.1.7           mgcv_1.8-40            farver_2.1.1          
## [31] car_3.1-0              generics_0.1.3         IRanges_2.30.0        
## [34] ellipsis_0.3.2         withr_2.5.0            BiocGenerics_0.42.0   
## [37] cli_3.3.0              survival_3.3-1         magrittr_2.0.3        
## [40] crayon_1.5.1           evaluate_0.16          fansi_1.0.3           
## [43] nlme_3.1-157           MASS_7.3-57            rstatix_0.7.0         
## [46] vegan_2.6-2            tools_4.2.1            data.table_1.14.2     
## [49] lifecycle_1.0.2        stringr_1.4.1          Rhdf5lib_1.18.2       
## [52] S4Vectors_0.34.0       munsell_0.5.0          cluster_2.1.3         
## [55] Biostrings_2.64.0      ade4_1.7-19            compiler_4.2.1        
## [58] GenomeInfoDb_1.32.2    rlang_1.0.5            rhdf5_2.40.0          
## [61] grid_4.2.1             RCurl_1.98-1.6         iterators_1.0.14      
## [64] rhdf5filters_1.8.0     biomformat_1.24.0      rstudioapi_0.14       
## [67] htmlwidgets_1.5.4      igraph_1.3.1           labeling_0.4.2        
## [70] bitops_1.0-7           rmarkdown_2.16         gtable_0.3.1          
## [73] codetools_0.2-18       multtest_2.52.0        abind_1.4-5           
## [76] DBI_1.1.3              reshape2_1.4.4         R6_2.5.1              
## [79] knitr_1.40             dplyr_1.0.9            fastmap_1.1.0         
## [82] utf8_1.2.2             permute_0.9-7          ape_5.6-2             
## [85] stringi_1.7.6          parallel_4.2.1         Rcpp_1.0.8.3          
## [88] vctrs_0.4.1            tidyselect_1.1.2       xfun_0.31