citation('HiContacts')
#> To cite package 'HiContacts' in publications use:
#>
#> Serizay J, Matthey-Doret C, Bignaud A, Baudry L, Koszul R (2024).
#> "Orchestrating chromosome conformation capture analysis with
#> Bioconductor." _Nature Communications_, *15*, 1-9.
#> doi:10.1038/s41467-024-44761-x
#> <https://doi.org/10.1038/s41467-024-44761-x>.
#>
#> A BibTeX entry for LaTeX users is
#>
#> @Article{,
#> author = {Jacques Serizay and Cyril Matthey-Doret and Amaury Bignaud and Lyam Baudry and Romain Koszul},
#> title = {Orchestrating chromosome conformation capture analysis with Bioconductor},
#> journal = {Nature Communications},
#> year = {2024},
#> volume = {15},
#> pages = {1--9},
#> doi = {10.1038/s41467-024-44761-x},
#> }
.(m)/cool
files as HiCExperiment
objectsThe HiCExperiment
package provides classes and methods to import an .(m)cool
file in R. The HiContactsData
package gives access to a range of toy
datasets stored by Bioconductor in the ExperimentHub
.
library(dplyr)
library(ggplot2)
library(HiCExperiment)
library(HiContacts)
library(HiContactsData)
library(rtracklayer)
#>
#> Attaching package: 'rtracklayer'
#> The following object is masked from 'package:AnnotationHub':
#>
#> hubUrl
library(InteractionSet)
#> Loading required package: SummarizedExperiment
#> Loading required package: MatrixGenerics
#> Loading required package: matrixStats
#>
#> Attaching package: 'matrixStats'
#> The following object is masked from 'package:dplyr':
#>
#> count
#>
#> Attaching package: 'MatrixGenerics'
#> The following objects are masked from 'package:matrixStats':
#>
#> colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
#> colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
#> colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
#> colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
#> colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
#> colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
#> colWeightedMeans, colWeightedMedians, colWeightedSds,
#> colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
#> rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
#> rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
#> rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
#> rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
#> rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
#> rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
#> rowWeightedSds, rowWeightedVars
#> Loading required package: Biobase
#> Welcome to Bioconductor
#>
#> Vignettes contain introductory material; view with
#> 'browseVignettes()'. To cite Bioconductor, see
#> 'citation("Biobase")', and for packages 'citation("pkgname")'.
#>
#> Attaching package: 'Biobase'
#> The following object is masked from 'package:MatrixGenerics':
#>
#> rowMedians
#> The following objects are masked from 'package:matrixStats':
#>
#> anyMissing, rowMedians
#> The following object is masked from 'package:ExperimentHub':
#>
#> cache
#> The following object is masked from 'package:AnnotationHub':
#>
#> cache
cool_file <- HiContactsData('yeast_wt', format = 'cool')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
hic <- import(cool_file, format = 'cool')
hic
#> `HiCExperiment` object with 8,757,906 contacts over 12,079 regions
#> -------
#> fileName: "/home/biocbuild/.cache/R/ExperimentHub/1780c614d0404c_7751"
#> focus: "whole genome"
#> resolutions(1): 1000
#> active resolution: 1000
#> interactions: 2945692
#> scores(2): count balanced
#> topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0)
#> pairsFile: N/A
#> metadata(0):
The plotMatrix
function takes a HiCExperiment
object and plots it as a heatmap.
Use the use.scores
argument to specify which type of interaction scores to use
in the contact maps (e.g. count
, balanced
, …). By default, plotMatrix()
looks for balanced scores. If they are not stored in the original .(m)/cool
file,
plotMatrix()
simply takes the first scores available.
## Square matrix
plotMatrix(hic, use.scores = 'balanced', limits = c(-4, -1))
## Horizontal matrix
plotMatrix(
refocus(hic, 'II'),
use.scores = 'balanced', limits = c(-4, -1),
maxDistance = 200000
)
Loops can be plotted on top of Hi-C matrices by providing a GInteractions
object to the loops
argument.
Note:
Loops in .bedpe
format can be imported in R using the import()
function,
and converted into GInteractions
with the
InteractionSet::makeGInteractionsFromGRangesPairs()
function.
mcool_file <- HiContactsData('yeast_wt', format = 'mcool')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
loops <- system.file("extdata", 'S288C-loops.bedpe', package = 'HiCExperiment') |>
import() |>
makeGInteractionsFromGRangesPairs()
p <- import(mcool_file, format = 'mcool', focus = 'IV') |>
plotMatrix(loops = loops, limits = c(-4, -1), dpi = 120)
borders <- system.file("extdata", 'S288C-borders.bed', package = 'HiCExperiment') |>
import()
p <- import(mcool_file, format = 'mcool', focus = 'IV') |>
plotMatrix(loops = loops, borders = borders, limits = c(-4, -1), dpi = 120)
aggr_centros <- HiContacts::aggregate(
hic, targets = loops, BPPARAM = BiocParallel::SerialParam()
)
#> Going through preflight checklist...
#> Parsing the entire contact matrice as a sparse matrix...
#> Modeling distance decay...
#> Filtering for contacts within provided targets...
plotMatrix(
aggr_centros, use.scores = 'detrended', limits = c(-1, 1), scale = 'linear',
cmap = bgrColors()
)
hic <- import(mcool_file, format = 'mcool', focus = 'chr18:20000000-35000000', resolution = 40000)
detrended_hic <- detrend(hic)
patchwork::wrap_plots(
plotMatrix(detrended_hic, use.scores = 'expected', scale = 'log10', limits = c(-3, -1), dpi = 120),
plotMatrix(detrended_hic, use.scores = 'detrended', scale = 'linear', limits = c(-1, 1), dpi = 120)
)
mcool_file_1 <- HiContactsData('yeast_eco1', format = 'mcool')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
mcool_file_2 <- HiContactsData('yeast_wt', format = 'mcool')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
hic_1 <- import(mcool_file_1, format = 'mcool', focus = 'II:1-300000', resolution = 2000)
hic_2 <- import(mcool_file_2, format = 'mcool', focus = 'II:1-300000', resolution = 2000)
merged_hic <- merge(hic_1, hic_2)
hic_1
#> `HiCExperiment` object with 301,285 contacts over 150 regions
#> -------
#> fileName: "/home/biocbuild/.cache/R/ExperimentHub/2dc7284a9c7d2c_7754"
#> focus: "II:1-300,000"
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 2000
#> interactions: 9607
#> scores(2): count balanced
#> topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0)
#> pairsFile: N/A
#> metadata(0):
hic_2
#> `HiCExperiment` object with 146,812 contacts over 150 regions
#> -------
#> fileName: "/home/biocbuild/.cache/R/ExperimentHub/1a8be61e27f712_7752"
#> focus: "II:1-300,000"
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 2000
#> interactions: 6933
#> scores(2): count balanced
#> topologicalFeatures: compartments(0) borders(0) loops(0) viewpoints(0)
#> pairsFile: N/A
#> metadata(0):
merged_hic
#> `HiCExperiment` object with 229,926 contacts over 150 regions
#> -------
#> fileName: "/home/biocbuild/.cache/R/ExperimentHub/2dc7284a9c7d2c_7754"
#> focus: "II:1-300,000"
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 2000
#> interactions: 9748
#> scores(2): count balanced
#> topologicalFeatures: ()
#> pairsFile: N/A
#> metadata(2): hce_list operation
hic_1 <- import(mcool_file_1, format = 'mcool', focus = 'II', resolution = 2000)
hic_2 <- import(mcool_file_2, format = 'mcool', focus = 'II', resolution = 2000)
div_hic <- divide(hic_1, by = hic_2)
div_hic
#> `HiCExperiment` object with 996,154 contacts over 407 regions
#> -------
#> fileName: N/A
#> focus: "II"
#> resolutions(1): 2000
#> active resolution: 2000
#> interactions: 60894
#> scores(6): count.x balanced.x count.by balanced.by balanced.fc balanced.l2fc
#> topologicalFeatures: ()
#> pairsFile: N/A
#> metadata(2): hce_list operation
p <- patchwork::wrap_plots(
plotMatrix(hic_1, use.scores = 'balanced', scale = 'log10', limits = c(-4, -1)),
plotMatrix(hic_2, use.scores = 'balanced', scale = 'log10', limits = c(-4, -1)),
plotMatrix(div_hic, use.scores = 'balanced.fc', scale = 'log2', limits = c(-2, 2), cmap = bwrColors())
)
hic_1_despeckled <- despeckle(hic_1)
hic_1_despeckled5 <- despeckle(hic_1, focal.size = 5)
p <- patchwork::wrap_plots(
plotMatrix(hic_1, use.scores = 'balanced', scale = 'log10', limits = c(-4, -1)),
plotMatrix(hic_1_despeckled, use.scores = 'balanced.despeckled', scale = 'log10', limits = c(-4, -1)),
plotMatrix(hic_1_despeckled5, use.scores = 'balanced.despeckled', scale = 'log10', limits = c(-4, -1))
)
mcool_file <- HiContactsData('yeast_wt', format = 'mcool')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
hic <- import(mcool_file, format = 'mcool', resolution = 16000)
# - Get compartments
hic <- getCompartments(hic, chromosomes = c('XV', 'XVI'))
#> Going through preflight checklist...
#> Parsing intra-chromosomal contacts for each chromosome...
#> Computing eigenvectors for each chromosome...
hic
#> `HiCExperiment` object with 8,757,906 contacts over 763 regions
#> -------
#> fileName: "/home/biocbuild/.cache/R/ExperimentHub/1a8be61e27f712_7752"
#> focus: "whole genome"
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 16000
#> interactions: 267709
#> scores(2): count balanced
#> topologicalFeatures: compartments(18) borders(0) loops(0) viewpoints(0)
#> pairsFile: N/A
#> metadata(1): eigens
# - Export compartments as bigwig and bed files
export(IRanges::coverage(metadata(hic)$eigens, weight = 'eigen'), 'compartments.bw')
export(
topologicalFeatures(hic, 'compartments')[topologicalFeatures(hic, 'compartments')$compartment == 'A'],
'A-compartments.bed'
)
export(
topologicalFeatures(hic, 'compartments')[topologicalFeatures(hic, 'compartments')$compartment == 'B'],
'B-compartments.bed'
)
# - Generate saddle plot
plotSaddle(hic)
# - Compute insulation score
hic <- refocus(hic, 'II:1-300000') |>
zoom(resolution = 1000) |>
getDiamondInsulation(window_size = 8000) |>
getBorders()
#> Going through preflight checklist...
#> Scan each window and compute diamond insulation score...
#> Annotating diamond score prominence for each window...
hic
#> `HiCExperiment` object with 146,812 contacts over 300 regions
#> -------
#> fileName: "/home/biocbuild/.cache/R/ExperimentHub/1a8be61e27f712_7752"
#> focus: "II:1-300,000"
#> resolutions(5): 1000 2000 4000 8000 16000
#> active resolution: 1000
#> interactions: 18286
#> scores(2): count balanced
#> topologicalFeatures: compartments(18) borders(17) loops(0) viewpoints(0)
#> pairsFile: N/A
#> metadata(2): eigens insulation
# - Export insulation as bigwig track and borders as bed file
export(IRanges::coverage(metadata(hic)$insulation, weight = 'insulation'), 'insulation.bw')
export(topologicalFeatures(hic, 'borders'), 'borders.bed')
mcool_file <- HiContactsData('mESCs', format = 'mcool')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
hic <- import(mcool_file, format = 'mcool', focus = 'chr18:20000000-35000000', resolution = 40000)
v4C <- virtual4C(hic, viewpoint = GRanges('chr18:31000000-31050000'))
plot4C(v4C, ggplot2::aes(x = center, y = score))
mcool_file <- HiContactsData('yeast_wt', format = 'mcool')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
hic <- import(mcool_file, format = 'mcool', resolution = 1000)
cisTransRatio(hic)
#> # A tibble: 16 × 6
#> # Groups: chr [16]
#> chr cis trans n_total cis_pct trans_pct
#> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 I 186326 96738 283064 0.658 0.342
#> 2 II 942728 273966 1216694 0.775 0.225
#> 3 III 303980 127087 431067 0.705 0.295
#> 4 IV 1858062 418218 2276280 0.816 0.184
#> 5 V 607090 220873 827963 0.733 0.267
#> 6 VI 280282 127771 408053 0.687 0.313
#> 7 VII 1228532 335909 1564441 0.785 0.215
#> 8 VIII 574086 205122 779208 0.737 0.263
#> 9 IX 474182 179280 653462 0.726 0.274
#> 10 X 834656 259240 1093896 0.763 0.237
#> 11 XI 775240 245899 1021139 0.759 0.241
#> 12 XII 1182742 278065 1460807 0.810 0.190
#> 13 XIII 1084810 296351 1381161 0.785 0.215
#> 14 XIV 852516 256639 1109155 0.769 0.231
#> 15 XV 1274070 351132 1625202 0.784 0.216
#> 16 XVI 1070700 313520 1384220 0.774 0.226
# Without a pairs file
mcool_file <- HiContactsData('yeast_wt', format = 'mcool')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
hic <- import(mcool_file, format = 'mcool', resolution = 1000)
ps <- distanceLaw(hic)
#> pairsFile not specified. The P(s) curve will be an approximation.
plotPs(ps, ggplot2::aes(x = binned_distance, y = norm_p))
#> Warning: Removed 18 rows containing missing values or values outside the scale range
#> (`geom_line()`).
# With a pairs file
pairsFile(hic) <- HiContactsData('yeast_wt', format = 'pairs.gz')
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
ps <- distanceLaw(hic)
#> Importing pairs file /home/biocbuild/.cache/R/ExperimentHub/2dc6e223949313_7753 in memory. This may take a while...
plotPs(ps, ggplot2::aes(x = binned_distance, y = norm_p))
#> Warning: Removed 67 rows containing missing values or values outside the scale range
#> (`geom_line()`).
plotPsSlope(ps, ggplot2::aes(x = binned_distance, y = slope))
#> Warning: Removed 67 rows containing missing values or values outside the scale range
#> (`geom_line()`).
# Comparing P(s) curves
c1 <- import(
HiContactsData('yeast_wt', format = 'mcool'),
format = 'mcool',
resolution = 1000,
pairsFile = HiContactsData('yeast_wt', format = 'pairs.gz')
)
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
c2 <- import(
HiContactsData('yeast_eco1', format = 'mcool'),
format = 'mcool',
resolution = 1000,
pairsFile = HiContactsData('yeast_eco1', format = 'pairs.gz')
)
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
#> see ?HiContactsData and browseVignettes('HiContactsData') for documentation
#> loading from cache
ps_1 <- distanceLaw(c1) |> mutate(sample = 'WT')
#> Importing pairs file /home/biocbuild/.cache/R/ExperimentHub/2dc6e223949313_7753 in memory. This may take a while...
ps_2 <- distanceLaw(c2) |> mutate(sample = 'Eco1-AID')
#> Importing pairs file /home/biocbuild/.cache/R/ExperimentHub/2dc72849e7f76f_7755 in memory. This may take a while...
ps <- rbind(ps_1, ps_2)
plotPs(ps, ggplot2::aes(x = binned_distance, y = norm_p, group = sample, color = sample))
#> Warning: Removed 134 rows containing missing values or values outside the scale range
#> (`geom_line()`).
plotPsSlope(ps, ggplot2::aes(x = binned_distance, y = slope, group = sample, color = sample))
#> Warning: Removed 135 rows containing missing values or values outside the scale range
#> (`geom_line()`).
sessionInfo()
#> R Under development (unstable) (2024-10-21 r87258)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.1 LTS
#>
#> Matrix products: default
#> BLAS: /home/biocbuild/bbs-3.21-bioc/R/lib/libRblas.so
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_GB LC_COLLATE=C
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: America/New_York
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats4 stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] InteractionSet_1.35.0 SummarizedExperiment_1.37.0
#> [3] Biobase_2.67.0 MatrixGenerics_1.19.0
#> [5] matrixStats_1.4.1 rtracklayer_1.67.0
#> [7] HiContacts_1.9.0 HiContactsData_1.7.0
#> [9] ExperimentHub_2.15.0 AnnotationHub_3.15.0
#> [11] BiocFileCache_2.15.0 dbplyr_2.5.0
#> [13] HiCExperiment_1.7.0 GenomicRanges_1.59.0
#> [15] GenomeInfoDb_1.43.0 IRanges_2.41.0
#> [17] S4Vectors_0.45.0 BiocGenerics_0.53.0
#> [19] dplyr_1.1.4 ggplot2_3.5.1
#> [21] BiocStyle_2.35.0
#>
#> loaded via a namespace (and not attached):
#> [1] strawr_0.0.92 rstudioapi_0.17.1 jsonlite_1.8.9
#> [4] magrittr_2.0.3 ggbeeswarm_0.7.2 farver_2.1.2
#> [7] rmarkdown_2.28 BiocIO_1.17.0 zlibbioc_1.53.0
#> [10] vctrs_0.6.5 memoise_2.0.1 Cairo_1.6-2
#> [13] Rsamtools_2.23.0 RCurl_1.98-1.16 terra_1.7-83
#> [16] base64enc_0.1-3 htmltools_0.5.8.1 S4Arrays_1.7.0
#> [19] dynamicTreeCut_1.63-1 curl_5.2.3 Rhdf5lib_1.29.0
#> [22] Formula_1.2-5 SparseArray_1.7.0 rhdf5_2.51.0
#> [25] sass_0.4.9 bslib_0.8.0 htmlwidgets_1.6.4
#> [28] impute_1.81.0 cachem_1.1.0 GenomicAlignments_1.43.0
#> [31] mime_0.12 lifecycle_1.0.4 iterators_1.0.14
#> [34] pkgconfig_2.0.3 Matrix_1.7-1 R6_2.5.1
#> [37] fastmap_1.2.0 GenomeInfoDbData_1.2.13 digest_0.6.37
#> [40] colorspace_2.1-1 patchwork_1.3.0 AnnotationDbi_1.69.0
#> [43] RSpectra_0.16-2 Hmisc_5.2-0 RSQLite_2.3.7
#> [46] filelock_1.0.3 labeling_0.4.3 fansi_1.0.6
#> [49] httr_1.4.7 abind_1.4-8 compiler_4.5.0
#> [52] bit64_4.5.2 withr_3.0.2 doParallel_1.0.17
#> [55] backports_1.5.0 htmlTable_2.4.3 BiocParallel_1.41.0
#> [58] DBI_1.2.3 highr_0.11 rappdirs_0.3.3
#> [61] DelayedArray_0.33.0 rjson_0.2.23 tools_4.5.0
#> [64] foreign_0.8-87 vipor_0.4.7 beeswarm_0.4.0
#> [67] nnet_7.3-19 glue_1.8.0 restfulr_0.0.15
#> [70] rhdf5filters_1.19.0 grid_4.5.0 checkmate_2.3.2
#> [73] cluster_2.1.6 generics_0.1.3 gtable_0.3.6
#> [76] tzdb_0.4.0 preprocessCore_1.69.0 tidyr_1.3.1
#> [79] data.table_1.16.2 hms_1.1.3 WGCNA_1.73
#> [82] utf8_1.2.4 XVector_0.47.0 BiocVersion_3.21.1
#> [85] foreach_1.5.2 pillar_1.9.0 stringr_1.5.1
#> [88] vroom_1.6.5 splines_4.5.0 lattice_0.22-6
#> [91] survival_3.7-0 bit_4.5.0 tidyselect_1.2.1
#> [94] GO.db_3.20.0 Biostrings_2.75.0 knitr_1.48
#> [97] gridExtra_2.3 bookdown_0.41 xfun_0.48
#> [100] stringi_1.8.4 UCSC.utils_1.3.0 yaml_2.3.10
#> [103] evaluate_1.0.1 codetools_0.2-20 archive_1.1.9
#> [106] tibble_3.2.1 BiocManager_1.30.25 cli_3.6.3
#> [109] rpart_4.1.23 munsell_0.5.1 jquerylib_0.1.4
#> [112] Rcpp_1.0.13 png_0.1-8 XML_3.99-0.17
#> [115] fastcluster_1.2.6 ggrastr_1.0.2 parallel_4.5.0
#> [118] readr_2.1.5 blob_1.2.4 bitops_1.0-9
#> [121] scales_1.3.0 purrr_1.0.2 crayon_1.5.3
#> [124] rlang_1.1.4 KEGGREST_1.47.0