Compiled date: 2024-10-29
Last edited: 2023-12-14
License: GPL-3
Run the following code to install the Bioconductor version of package.
# install.packages("BiocManager")
BiocManager::install("POMA")
library(POMA)
library(ggtext)
library(patchwork)
Let’s create a cleaned SummarizedExperiment
object from the sample data st000336
to explore the normalization effects.
example_data <- st000336 %>%
PomaImpute() # KNN imputation
Loading required namespace: SummarizedExperiment
1 features removed.
example_data
class: SummarizedExperiment
dim: 30 57
metadata(0):
assays(1): ''
rownames(30): x1_methylhistidine x3_methylhistidine ... pyruvate
succinate
rowData names(0):
colnames(57): 1 2 ... 56 57
colData names(2): group steroids
Here we will evaluate the normalization methods that POMA offers on the same SummarizedExperiment
object to compare them (Berg et al. 2006).
none <- PomaNorm(example_data, method = "none")
auto_scaling <- PomaNorm(example_data, method = "auto_scaling")
level_scaling <- PomaNorm(example_data, method = "level_scaling")
log_scaling <- PomaNorm(example_data, method = "log_scaling")
log_transformation <- PomaNorm(example_data, method = "log")
vast_scaling <- PomaNorm(example_data, method = "vast_scaling")
log_pareto <- PomaNorm(example_data, method = "log_pareto")
When we check for the dimension of the data after normalization we can see that all methods have the same effect on data dimension. PomaNorm
only modifies the data dimension when the dataset contains only-zero features or zero-variance features.
dim(SummarizedExperiment::assay(none))
> [1] 30 57
dim(SummarizedExperiment::assay(auto_scaling))
> [1] 30 57
dim(SummarizedExperiment::assay(level_scaling))
> [1] 30 57
dim(SummarizedExperiment::assay(log_scaling))
> [1] 30 57
dim(SummarizedExperiment::assay(log_transformation))
> [1] 30 57
dim(SummarizedExperiment::assay(vast_scaling))
> [1] 30 57
dim(SummarizedExperiment::assay(log_pareto))
> [1] 30 57
Here we can evaluate the normalization effects on samples (Berg et al. 2006).
a <- PomaBoxplots(none,
x = "samples") +
ggplot2::ggtitle("Not Normalized")
b <- PomaBoxplots(auto_scaling,
x = "samples",
theme_params = list(legend_title = FALSE, legend_position = "none")) +
ggplot2::ggtitle("Auto Scaling") +
ggplot2::theme(axis.text.x = ggplot2::element_blank())
c <- PomaBoxplots(level_scaling,
x = "samples",
theme_params = list(legend_title = FALSE, legend_position = "none")) +
ggplot2::ggtitle("Level Scaling") +
ggplot2::theme(axis.text.x = ggplot2::element_blank())
d <- PomaBoxplots(log_scaling,
x = "samples",
theme_params = list(legend_title = FALSE, legend_position = "none")) +
ggplot2::ggtitle("Log Scaling") +
ggplot2::theme(axis.text.x = ggplot2::element_blank())
e <- PomaBoxplots(log_transformation,
x = "samples",
theme_params = list(legend_title = FALSE, legend_position = "none")) +
ggplot2::ggtitle("Log Transformation") +
ggplot2::theme(axis.text.x = ggplot2::element_blank())
f <- PomaBoxplots(vast_scaling,
x = "samples",
theme_params = list(legend_title = FALSE, legend_position = "none")) +
ggplot2::ggtitle("Vast Scaling") +
ggplot2::theme(axis.text.x = ggplot2::element_blank())
g <- PomaBoxplots(log_pareto,
x = "samples",
theme_params = list(legend_title = FALSE, legend_position = "none")) +
ggplot2::ggtitle("Log Pareto") +
ggplot2::theme(axis.text.x = ggplot2::element_blank())
a
(b + c + d) / (e + f + g)
Here we can evaluate the normalization effects on features.
h <- PomaDensity(none,
x = "features",
theme_params = list(legend_title = FALSE, legend_position = "none")) +
ggplot2::ggtitle("Not Normalized")
i <- PomaDensity(auto_scaling,
x = "features",
theme_params = list(legend_title = FALSE, legend_position = "none")) +
ggplot2::ggtitle("Auto Scaling") +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.title.y = ggplot2::element_blank())
j <- PomaDensity(level_scaling,
x = "features",
theme_params = list(legend_title = FALSE, legend_position = "none")) +
ggplot2::ggtitle("Level Scaling") +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.title.y = ggplot2::element_blank())
k <- PomaDensity(log_scaling,
x = "features",
theme_params = list(legend_title = FALSE, legend_position = "none")) +
ggplot2::ggtitle("Log Scaling") +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.title.y = ggplot2::element_blank())
l <- PomaDensity(log_transformation,
x = "features",
theme_params = list(legend_title = FALSE, legend_position = "none")) +
ggplot2::ggtitle("Log Transformation") +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.title.y = ggplot2::element_blank())
m <- PomaDensity(vast_scaling,
x = "features",
theme_params = list(legend_title = FALSE, legend_position = "none")) +
ggplot2::ggtitle("Vast Scaling") +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.title.y = ggplot2::element_blank())
n <- PomaDensity(log_pareto,
x = "features",
theme_params = list(legend_title = FALSE, legend_position = "none")) +
ggplot2::ggtitle("Log Pareto") +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.title.y = ggplot2::element_blank())
h
(i + j + k) / (l + m + n)
sessionInfo()
> R Under development (unstable) (2024-10-21 r87258)
> Platform: x86_64-pc-linux-gnu
> Running under: Ubuntu 24.04.1 LTS
>
> Matrix products: default
> BLAS: /home/biocbuild/bbs-3.21-bioc/R/lib/libRblas.so
> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0
>
> locale:
> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
> [3] LC_TIME=en_GB LC_COLLATE=C
> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
> [9] LC_ADDRESS=C LC_TELEPHONE=C
> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
>
> time zone: America/New_York
> tzcode source: system (glibc)
>
> attached base packages:
> [1] stats graphics grDevices utils datasets methods base
>
> other attached packages:
> [1] patchwork_1.3.0 ggtext_0.1.2 POMA_1.17.0 BiocStyle_2.35.0
>
> loaded via a namespace (and not attached):
> [1] SummarizedExperiment_1.37.0 gtable_0.3.6
> [3] impute_1.81.0 xfun_0.48
> [5] bslib_0.8.0 ggplot2_3.5.1
> [7] Biobase_2.67.0 lattice_0.22-6
> [9] vctrs_0.6.5 tools_4.5.0
> [11] generics_0.1.3 stats4_4.5.0
> [13] tibble_3.2.1 fansi_1.0.6
> [15] highr_0.11 pkgconfig_2.0.3
> [17] Matrix_1.7-1 S4Vectors_0.45.0
> [19] lifecycle_1.0.4 GenomeInfoDbData_1.2.13
> [21] stringr_1.5.1 compiler_4.5.0
> [23] farver_2.1.2 tinytex_0.53
> [25] munsell_0.5.1 GenomeInfoDb_1.43.0
> [27] htmltools_0.5.8.1 sass_0.4.9
> [29] yaml_2.3.10 pillar_1.9.0
> [31] crayon_1.5.3 jquerylib_0.1.4
> [33] tidyr_1.3.1 cachem_1.1.0
> [35] DelayedArray_0.33.0 magick_2.8.5
> [37] abind_1.4-8 commonmark_1.9.2
> [39] tidyselect_1.2.1 digest_0.6.37
> [41] stringi_1.8.4 dplyr_1.1.4
> [43] purrr_1.0.2 bookdown_0.41
> [45] labeling_0.4.3 fastmap_1.2.0
> [47] grid_4.5.0 colorspace_2.1-1
> [49] cli_3.6.3 SparseArray_1.7.0
> [51] magrittr_2.0.3 S4Arrays_1.7.0
> [53] utf8_1.2.4 withr_3.0.2
> [55] scales_1.3.0 UCSC.utils_1.3.0
> [57] rmarkdown_2.28 XVector_0.47.0
> [59] httr_1.4.7 matrixStats_1.4.1
> [61] evaluate_1.0.1 knitr_1.48
> [63] GenomicRanges_1.59.0 IRanges_2.41.0
> [65] viridisLite_0.4.2 markdown_1.13
> [67] rlang_1.1.4 gridtext_0.1.5
> [69] Rcpp_1.0.13 glue_1.8.0
> [71] BiocManager_1.30.25 xml2_1.3.6
> [73] BiocGenerics_0.53.0 jsonlite_1.8.9
> [75] R6_2.5.1 MatrixGenerics_1.19.0
> [77] zlibbioc_1.53.0