1 Installation

Run the following code to install the Bioconductor version of package.

# install.packages("BiocManager")
BiocManager::install("POMA")

2 Load Packages

library(POMA)
library(ggtext)
library(patchwork)

3 Load Data and Imputation

Let’s create a cleaned SummarizedExperiment object from the sample data st000336 to explore the normalization effects.

example_data <- st000336 %>% 
  PomaImpute() # KNN imputation
Loading required namespace: SummarizedExperiment
2 features removed.

example_data
class: SummarizedExperiment 
dim: 29 57 
metadata(0):
assays(1): ''
rownames(29): x1_methylhistidine x3_methylhistidine ... pyruvate
  succinate
rowData names(0):
colnames(57): 1 2 ... 56 57
colData names(2): group steroids

4 Normalization

Here we will evaluate the normalization methods that POMA offers on the same SummarizedExperiment object to compare them (Berg et al. 2006).

none <- PomaNorm(example_data, method = "none")
auto_scaling <- PomaNorm(example_data, method = "auto_scaling")
level_scaling <- PomaNorm(example_data, method = "level_scaling")
log_scaling <- PomaNorm(example_data, method = "log_scaling")
log_transformation <- PomaNorm(example_data, method = "log")
vast_scaling <- PomaNorm(example_data, method = "vast_scaling")
log_pareto <- PomaNorm(example_data, method = "log_pareto")

4.1 Normalization effect on data dimensions

When we check for the dimension of the data after normalization we can see that all methods have the same effect on data dimension. PomaNorm only modifies the data dimension when the dataset contains only-zero features or zero-variance features.

dim(SummarizedExperiment::assay(none))
> [1] 29 57
dim(SummarizedExperiment::assay(auto_scaling))
> [1] 29 57
dim(SummarizedExperiment::assay(level_scaling))
> [1] 29 57
dim(SummarizedExperiment::assay(log_scaling))
> [1] 29 57
dim(SummarizedExperiment::assay(log_transformation))
> [1] 29 57
dim(SummarizedExperiment::assay(vast_scaling))
> [1] 29 57
dim(SummarizedExperiment::assay(log_pareto))
> [1] 29 57

4.2 Normalization effect on samples

Here we can evaluate the normalization effects on samples (Berg et al. 2006).

a <- PomaBoxplots(none, 
                  x = "samples") +
  ggplot2::ggtitle("Not Normalized")

b <- PomaBoxplots(auto_scaling, 
                  x = "samples", 
                  theme_params = list(legend_title = FALSE, legend_position = "none")) +
  ggplot2::ggtitle("Auto Scaling") +
  ggplot2::theme(axis.text.x = ggplot2::element_blank())

c <- PomaBoxplots(level_scaling, 
                  x = "samples", 
                  theme_params = list(legend_title = FALSE, legend_position = "none")) +
  ggplot2::ggtitle("Level Scaling") +
  ggplot2::theme(axis.text.x = ggplot2::element_blank())

d <- PomaBoxplots(log_scaling, 
                  x = "samples", 
                  theme_params = list(legend_title = FALSE, legend_position = "none")) +
  ggplot2::ggtitle("Log Scaling") +
  ggplot2::theme(axis.text.x = ggplot2::element_blank())

e <- PomaBoxplots(log_transformation, 
                  x = "samples", 
                  theme_params = list(legend_title = FALSE, legend_position = "none")) +
  ggplot2::ggtitle("Log Transformation") +
  ggplot2::theme(axis.text.x = ggplot2::element_blank())

f <- PomaBoxplots(vast_scaling, 
                  x = "samples", 
                  theme_params = list(legend_title = FALSE, legend_position = "none")) +
  ggplot2::ggtitle("Vast Scaling") +
  ggplot2::theme(axis.text.x = ggplot2::element_blank())

g <- PomaBoxplots(log_pareto, 
                  x = "samples", 
                  theme_params = list(legend_title = FALSE, legend_position = "none")) +
  ggplot2::ggtitle("Log Pareto") +
  ggplot2::theme(axis.text.x = ggplot2::element_blank())

a

(b + c + d) / (e + f + g)

4.3 Normalization effect on features

Here we can evaluate the normalization effects on features.

h <- PomaDensity(none, 
                 x = "features", 
                 theme_params = list(legend_title = FALSE, legend_position = "none")) +
  ggplot2::ggtitle("Not Normalized")

i <- PomaDensity(auto_scaling, 
                 x = "features", 
                 theme_params = list(legend_title = FALSE, legend_position = "none")) +
  ggplot2::ggtitle("Auto Scaling") +
  ggplot2::theme(axis.title.x = ggplot2::element_blank(),
                 axis.title.y = ggplot2::element_blank())

j <- PomaDensity(level_scaling, 
                 x = "features", 
                 theme_params = list(legend_title = FALSE, legend_position = "none")) +
  ggplot2::ggtitle("Level Scaling") +
  ggplot2::theme(axis.title.x = ggplot2::element_blank(),
                 axis.title.y = ggplot2::element_blank())

k <- PomaDensity(log_scaling, 
                 x = "features", 
                 theme_params = list(legend_title = FALSE, legend_position = "none")) +
  ggplot2::ggtitle("Log Scaling") +
  ggplot2::theme(axis.title.x = ggplot2::element_blank(),
                 axis.title.y = ggplot2::element_blank())

l <- PomaDensity(log_transformation, 
                 x = "features", 
                 theme_params = list(legend_title = FALSE, legend_position = "none")) +
  ggplot2::ggtitle("Log Transformation") +
  ggplot2::theme(axis.title.x = ggplot2::element_blank(),
                 axis.title.y = ggplot2::element_blank())

m <- PomaDensity(vast_scaling, 
                 x = "features", 
                 theme_params = list(legend_title = FALSE, legend_position = "none")) +
  ggplot2::ggtitle("Vast Scaling") +
  ggplot2::theme(axis.title.x = ggplot2::element_blank(),
                 axis.title.y = ggplot2::element_blank())

n <- PomaDensity(log_pareto, 
                 x = "features", 
                 theme_params = list(legend_title = FALSE, legend_position = "none")) +
  ggplot2::ggtitle("Log Pareto") +
  ggplot2::theme(axis.title.x = ggplot2::element_blank(),
                 axis.title.y = ggplot2::element_blank())

h

(i + j + k) / (l + m + n)

5 Session Information

sessionInfo()
> R Under development (unstable) (2024-10-21 r87258)
> Platform: x86_64-pc-linux-gnu
> Running under: Ubuntu 24.04.1 LTS
> 
> Matrix products: default
> BLAS:   /home/biocbuild/bbs-3.21-bioc/R/lib/libRblas.so 
> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0
> 
> locale:
>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
>  [3] LC_TIME=en_GB              LC_COLLATE=C              
>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
> 
> time zone: America/New_York
> tzcode source: system (glibc)
> 
> attached base packages:
> [1] stats     graphics  grDevices utils     datasets  methods   base     
> 
> other attached packages:
> [1] patchwork_1.3.0  ggtext_0.1.2     POMA_1.17.6      BiocStyle_2.35.0
> 
> loaded via a namespace (and not attached):
>  [1] SummarizedExperiment_1.37.0 gtable_0.3.6               
>  [3] impute_1.81.0               xfun_0.49                  
>  [5] bslib_0.8.0                 ggplot2_3.5.1              
>  [7] Biobase_2.67.0              lattice_0.22-6             
>  [9] vctrs_0.6.5                 tools_4.5.0                
> [11] generics_0.1.3              stats4_4.5.0               
> [13] tibble_3.2.1                fansi_1.0.6                
> [15] pkgconfig_2.0.3             Matrix_1.7-1               
> [17] S4Vectors_0.45.2            lifecycle_1.0.4            
> [19] GenomeInfoDbData_1.2.13     stringr_1.5.1              
> [21] compiler_4.5.0              farver_2.1.2               
> [23] tinytex_0.54                munsell_0.5.1              
> [25] GenomeInfoDb_1.43.2         htmltools_0.5.8.1          
> [27] sass_0.4.9                  yaml_2.3.10                
> [29] pillar_1.9.0                crayon_1.5.3               
> [31] jquerylib_0.1.4             tidyr_1.3.1                
> [33] cachem_1.1.0                DelayedArray_0.33.2        
> [35] magick_2.8.5                abind_1.4-8                
> [37] commonmark_1.9.2            tidyselect_1.2.1           
> [39] digest_0.6.37               stringi_1.8.4              
> [41] dplyr_1.1.4                 purrr_1.0.2                
> [43] bookdown_0.41               labeling_0.4.3             
> [45] fastmap_1.2.0               grid_4.5.0                 
> [47] colorspace_2.1-1            cli_3.6.3                  
> [49] SparseArray_1.7.2           magrittr_2.0.3             
> [51] S4Arrays_1.7.1              utf8_1.2.4                 
> [53] withr_3.0.2                 scales_1.3.0               
> [55] UCSC.utils_1.3.0            rmarkdown_2.29             
> [57] XVector_0.47.0              httr_1.4.7                 
> [59] matrixStats_1.4.1           evaluate_1.0.1             
> [61] knitr_1.49                  GenomicRanges_1.59.1       
> [63] IRanges_2.41.1              viridisLite_0.4.2          
> [65] markdown_1.13               rlang_1.1.4                
> [67] gridtext_0.1.5              Rcpp_1.0.13-1              
> [69] glue_1.8.0                  BiocManager_1.30.25        
> [71] xml2_1.3.6                  BiocGenerics_0.53.3        
> [73] jsonlite_1.8.9              R6_2.5.1                   
> [75] MatrixGenerics_1.19.0       zlibbioc_1.53.0

References

Berg, Robert A van den, Huub CJ Hoefsloot, Johan A Westerhuis, Age K Smilde, and Mariët J van der Werf. 2006. “Centering, Scaling, and Transformations: Improving the Biological Information Content of Metabolomics Data.” BMC Genomics 7 (1): 142.

Normalization Methods

29 November 2024

Contents