library(phyloseq)
library(microbiome)
library(ggpubr) # stats with viz
library(patchwork) # combine plots
Microbiota Analysis
In this document, we investigate whether the Shannon diversity and Observed ASVs of samples is correlated to their sequencing depth.
Load libraries
Define parameters
= params$pseq_path
pseq_path = params$primary_var primary_var
Read Data
<- readRDS(pseq_path) pseq
Check Reads
# add reads/sample to sample_data()
sample_data(pseq)$reads.per.sample <- sample_sums(pseq)
# Compare between groups
|>
pseq ::meta() |>
microbiomeggplot(aes_string(primary_var, "reads.per.sample")) +
geom_violin(aes_string(color=primary_var))+
geom_boxplot(aes_string(color=primary_var), width=0.2) +
labs(x="", y="Reads/sample")+
theme_bw()
Alpha diversity
# we can add shannon diversity to sample_data(pseq)
sample_data(pseq)$shannon <- microbiome::diversity(pseq, index="shannon")[,1]
sample_data(pseq)$observed <- microbiome::richness(pseq, index="observed")[,1]
<- meta(pseq) |>
p.shannon ggplot(aes_string(primary_var, "shannon")) +
geom_violin(aes_string(color=primary_var))+
geom_boxplot(aes_string(color=primary_var), width=0.2) +
labs(x="", y="Shannon Diversity")+
theme_bw()
<- meta(pseq) |>
p.observed ggplot(aes_string(primary_var, "observed")) +
geom_violin(aes_string(color=primary_var))+
geom_boxplot(aes_string(color=primary_var), width=0.2) +
labs(x="", y="Observed ASVs")+
theme_bw()
| p.observed ) +
(p.shannon plot_layout(guides = "collect") +
plot_annotation(tag_levels = "a")
Correlation
Check if reads per sample is correlated to Shannon diversity and Observed ASVs.
<- meta(pseq) |>
p.shanon.cor ggplot(aes_string("reads.per.sample", "shannon", group=primary_var)) +
geom_point(aes_string(color=primary_var), alpha=0.5) +
geom_smooth(method = "lm", aes_string(color=primary_var), show.legend = FALSE) +
::stat_cor(method = "spearman", aes_string(color=primary_var), show.legend = FALSE,
ggpubrlabel.x.npc = 0.5, label.y.npc = 0.3, hjust = 0)+
labs(x="Reads/sample", y="Shannon Diversity") +
theme_bw()
<- meta(pseq) |>
p.observed.cor ggplot(aes_string("reads.per.sample", "observed", group=primary_var)) +
geom_point(aes_string(color=primary_var), alpha=0.5) +
geom_smooth(method = "lm", aes_string(color=primary_var), show.legend = FALSE) +
::stat_cor(method = "spearman", aes_string(color=primary_var), show.legend = FALSE,
ggpubrlabel.x.npc = 0.5, label.y.npc = 0.3, hjust = 0) +
labs(x="Reads/sample", y="Observed ASVs") +
theme_bw()
| p.observed.cor) +
(p.shanon.cor plot_layout(guides = "collect") +
plot_annotation(tag_levels = "a")
sessionInfo()
## R version 4.2.1 (2022-06-23 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] patchwork_1.1.2 ggpubr_0.4.0 microbiome_1.18.0 ggplot2_3.3.6
## [5] phyloseq_1.40.0
##
## loaded via a namespace (and not attached):
## [1] Biobase_2.56.0 tidyr_1.2.0 jsonlite_1.8.0
## [4] splines_4.2.1 foreach_1.5.2 carData_3.0-5
## [7] assertthat_0.2.1 stats4_4.2.1 GenomeInfoDbData_1.2.8
## [10] yaml_2.3.5 backports_1.4.1 pillar_1.8.1
## [13] lattice_0.20-45 glue_1.6.2 digest_0.6.29
## [16] XVector_0.36.0 ggsignif_0.6.3 colorspace_2.0-3
## [19] htmltools_0.5.3 Matrix_1.5-1 plyr_1.8.7
## [22] pkgconfig_2.0.3 broom_1.0.1 zlibbioc_1.42.0
## [25] purrr_0.3.4 scales_1.2.1 Rtsne_0.16
## [28] tibble_3.1.7 mgcv_1.8-40 farver_2.1.1
## [31] car_3.1-0 generics_0.1.3 IRanges_2.30.0
## [34] ellipsis_0.3.2 withr_2.5.0 BiocGenerics_0.42.0
## [37] cli_3.3.0 survival_3.3-1 magrittr_2.0.3
## [40] crayon_1.5.1 evaluate_0.16 fansi_1.0.3
## [43] nlme_3.1-157 MASS_7.3-57 rstatix_0.7.0
## [46] vegan_2.6-2 tools_4.2.1 data.table_1.14.2
## [49] lifecycle_1.0.2 stringr_1.4.1 Rhdf5lib_1.18.2
## [52] S4Vectors_0.34.0 munsell_0.5.0 cluster_2.1.3
## [55] Biostrings_2.64.0 ade4_1.7-19 compiler_4.2.1
## [58] GenomeInfoDb_1.32.2 rlang_1.0.5 rhdf5_2.40.0
## [61] grid_4.2.1 RCurl_1.98-1.6 iterators_1.0.14
## [64] rhdf5filters_1.8.0 biomformat_1.24.0 rstudioapi_0.14
## [67] htmlwidgets_1.5.4 igraph_1.3.1 labeling_0.4.2
## [70] bitops_1.0-7 rmarkdown_2.16 gtable_0.3.1
## [73] codetools_0.2-18 multtest_2.52.0 abind_1.4-5
## [76] DBI_1.1.3 reshape2_1.4.4 R6_2.5.1
## [79] knitr_1.40 dplyr_1.0.9 fastmap_1.1.0
## [82] utf8_1.2.2 permute_0.9-7 ape_5.6-2
## [85] stringi_1.7.6 parallel_4.2.1 Rcpp_1.0.8.3
## [88] vctrs_0.4.1 tidyselect_1.1.2 xfun_0.31