Abstract
A comparison and visualisation of epialleleR output values for various input files
The best possible explanation on VEF and lMHL values is given in help
files for generateCytosineReport
and
generateMhlReport
methods, respectively. Here we
try to show some simplified and real situations, i.e., different
methylation patterns that may exist, and provide a visual summary of
epialleleR
output.
The readers are welcome to try their own real and simulated data. If it might be of interest to others, please create an issue and these examples might get included in this vignette.
NB: the plotMetrics
function used below is a piece of
spaghetti code, hence hidden. If you still want to use it or see what it
does - browse a source
code of this vignette online.
out.bam <- tempfile(pattern="simulated", fileext=".bam")
set.seed(1)
# no epimutations
simulateBam(
output.bam.file=out.bam,
XM=c(
sapply(
lapply(1:1000, function (x) sample(c("Z",rep("z", 9)), 10)),
paste, collapse=""
)
),
XG="CT"
)
#> Writing sample BAM [0.143s]
#> [1] 1000
plotMetrics(out.bam, as("chrS:1-10", "GRanges"), 0, title="no epimutations")
# one complete epimutation
simulateBam(
output.bam.file=out.bam,
XM=c(
paste(rep("Z", 10), collapse=""),
sapply(
lapply(1:999, function (x) sample(c("Z",rep("z", 9)), 10)),
paste, collapse=""
)
),
XG="CT"
)
#> Writing sample BAM [0.214s]
#> [1] 1000
plotMetrics(out.bam, as("chrS:1-10", "GRanges"), title="one complete epimutation")
# one partial epimutation
simulateBam(
output.bam.file=out.bam,
XM=c(
paste(c(rep("Z", 4), "z", "z", rep("Z", 4)), collapse=""),
sapply(
lapply(1:999, function (x) sample(c("Z",rep("z", 9)), 10)),
paste, collapse=""
)
),
XG="CT"
)
#> Writing sample BAM [0.124s]
#> [1] 1000
plotMetrics(out.bam, as("chrS:1-10", "GRanges"), title="one partial epimutation")
# another partial epimutation
simulateBam(
output.bam.file=out.bam,
XM=c(
"zZZZZZZZzz",
sapply(
lapply(1:999, function (x) sample(c("Z",rep("z", 9)), 10)),
paste, collapse=""
)
),
XG="CT"
)
#> Writing sample BAM [0.116s]
#> [1] 1000
plotMetrics(out.bam, as("chrS:1-10", "GRanges"), title="another partial epimutation")
# several partial epimutations
simulateBam(
output.bam.file=out.bam,
XM=c(
sapply(
lapply(1:10, function (x) c(rep("Z", 6), rep("z", 4))),
paste, collapse=""
),
sapply(
lapply(1:999, function (x) sample(c("Z",rep("z", 9)), 10)),
paste, collapse=""
)
),
XG="CT"
)
#> Writing sample BAM [0.140s]
#> [1] 1009
plotMetrics(out.bam, as("chrS:1-10", "GRanges"), title="several partial epimutations")
# several short partial epimutations
simulateBam(
output.bam.file=out.bam,
XM=c(
sapply(
lapply(1:10, function (x) c(rep("Z", 4), rep("z", 6))),
paste, collapse=""
),
sapply(
lapply(1:999, function (x) sample(c("Z",rep("z", 9)), 10)),
paste, collapse=""
)
),
XG="CT"
)
#> Writing sample BAM [0.140s]
#> [1] 1009
plotMetrics(out.bam, as("chrS:1-10", "GRanges"), title="several short partial epimutations")
# several overlapping partial epimutations
simulateBam(
output.bam.file=out.bam,
pos=1:10,
XM=c(
"ZZZZZZZZZZ", "ZZZZZZZZZz", "ZZZZZZZZzz", "ZZZZZZZzzz", "ZZZZZZzzzz",
sapply(
lapply(1:15, function (x) sample(c("Z",rep("z", 9)), 10)),
paste, collapse=""
)
),
XG="CT"
)
#> Writing sample BAM [0.005s]
#> [1] 20
plotMetrics(out.bam, as("chrS:1-20", "GRanges"), title="several overlapping partial epimutations")
# amplicon 0%
plotMetrics(
system.file("extdata", "amplicon000meth.bam", package="epialleleR"),
as("chr17:43124861-43126026", "GRanges"), title="amplicon, 0%"
)
# amplicon 10%
plotMetrics(
system.file("extdata", "amplicon010meth.bam", package="epialleleR"),
as("chr17:43124861-43126026", "GRanges"), title="amplicon, 10%"
)
# sample capture, BMP7
plotMetrics(
system.file("extdata", "capture.bam", package="epialleleR"),
as("chr20:57266125-57268185:+", "GRanges"), title="sample capture, BMP7, + strand"
)
# sample capture, BMP7
plotMetrics(
system.file("extdata", "capture.bam", package="epialleleR"),
as("chr20:57266125-57268185:-", "GRanges"), title="sample capture, BMP7, - strand"
)
# sample capture, RAD51C
plotMetrics(
system.file("extdata", "capture.bam", package="epialleleR"),
as("chr17:58691673-58693108:+", "GRanges"), title="sample capture, RAD51C, + strand"
)
# sample capture, RAD51C
plotMetrics(
system.file("extdata", "capture.bam", package="epialleleR"),
as("chr17:58691673-58693108:-", "GRanges"), title="sample capture, RAD51C, - strand"
)
# long-read sequencing, low methylation
getXM <- function (p) {sample(x=c("z", "Z"), size=1, prob=c(p, 1-p))}
probs <- (sin(seq(-2*pi, +1*pi, by = pi/25))+2)/3
simulateBam(
output.bam.file=out.bam,
pos=1:10,
XM=sapply(1:10, function (i) {paste(sapply(probs, getXM), collapse="")}),
XG="CT"
)
#> Writing sample BAM [0.018s]
#> [1] 10
plotMetrics(out.bam, as("chrS:1-1000", "GRanges"), title="long-read sequencing, low methylation")
# long-read sequencing, high methylation
simulateBam(
output.bam.file=out.bam,
pos=1:10,
XM=sapply(1:10, function (i) {paste(sapply(1-probs, getXM), collapse="")}),
XG="CT"
)
#> Writing sample BAM [0.014s]
#> [1] 10
plotMetrics(out.bam, as("chrS:1-1000", "GRanges"), title="long-read sequencing, high methylation")
sessionInfo()
#> R Under development (unstable) (2024-10-21 r87258)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.1 LTS
#>
#> Matrix products: default
#> BLAS: /home/biocbuild/bbs-3.21-bioc/R/lib/libRblas.so
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_GB
#> [4] LC_COLLATE=C LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: America/New_York
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats4 stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] GenomicRanges_1.59.0 GenomeInfoDb_1.43.0 IRanges_2.41.0 S4Vectors_0.45.0
#> [5] BiocGenerics_0.53.0 data.table_1.16.2 ggplot2_3.5.1 epialleleR_1.15.0
#>
#> loaded via a namespace (and not attached):
#> [1] tidyselect_1.2.1 dplyr_1.1.4 farver_2.1.2
#> [4] blob_1.2.4 Biostrings_2.75.0 bitops_1.0-9
#> [7] fastmap_1.2.0 RCurl_1.98-1.16 VariantAnnotation_1.53.0
#> [10] GenomicAlignments_1.43.0 XML_3.99-0.17 digest_0.6.37
#> [13] lifecycle_1.0.4 KEGGREST_1.47.0 RSQLite_2.3.7
#> [16] magrittr_2.0.3 compiler_4.5.0 rlang_1.1.4
#> [19] sass_0.4.9 tools_4.5.0 utf8_1.2.4
#> [22] yaml_2.3.10 rtracklayer_1.67.0 knitr_1.48
#> [25] S4Arrays_1.7.0 labeling_0.4.3 bit_4.5.0
#> [28] curl_5.2.3 DelayedArray_0.33.0 RColorBrewer_1.1-3
#> [31] abind_1.4-8 BiocParallel_1.41.0 withr_3.0.2
#> [34] grid_4.5.0 fansi_1.0.6 colorspace_2.1-1
#> [37] scales_1.3.0 SummarizedExperiment_1.37.0 cli_3.6.3
#> [40] rmarkdown_2.28 crayon_1.5.3 generics_0.1.3
#> [43] httr_1.4.7 rjson_0.2.23 DBI_1.2.3
#> [46] cachem_1.1.0 zlibbioc_1.53.0 parallel_4.5.0
#> [49] AnnotationDbi_1.69.0 XVector_0.47.0 restfulr_0.0.15
#> [52] matrixStats_1.4.1 vctrs_0.6.5 Matrix_1.7-1
#> [55] jsonlite_1.8.9 bit64_4.5.2 GenomicFeatures_1.59.0
#> [58] jquerylib_0.1.4 glue_1.8.0 codetools_0.2-20
#> [61] gtable_0.3.6 BiocIO_1.17.0 UCSC.utils_1.3.0
#> [64] munsell_0.5.1 tibble_3.2.1 pillar_1.9.0
#> [67] htmltools_0.5.8.1 GenomeInfoDbData_1.2.13 BSgenome_1.75.0
#> [70] R6_2.5.1 evaluate_1.0.1 lattice_0.22-6
#> [73] Biobase_2.67.0 highr_0.11 png_0.1-8
#> [76] Rsamtools_2.23.0 memoise_2.0.1 bslib_0.8.0
#> [79] Rcpp_1.0.13 SparseArray_1.7.0 xfun_0.48
#> [82] MatrixGenerics_1.19.0 pkgconfig_2.0.3