suppressPackageStartupMessages({
library(SingleCellExperiment)
library(scater)
library(BayesPrism)
library(pheatmap)
})
source("../params.R")Deconvolve bulk timecourse with epithelial atlas
Load packages
Define parameters
bulk <- "data/IVMC.raw_counts.20250403.tab"
sc <- "../scRNAseq_invivo_endometrium_epithelial_cell_atlas/invivo_epithelial_cell_atlas.rds"Session info
sessionInfo()R version 4.4.2 (2024-10-31)
Platform: x86_64-pc-linux-gnu
Running under: Rocky Linux 8.10 (Green Obsidian)
Matrix products: default
BLAS/LAPACK: FlexiBLAS OPENBLAS; LAPACK version 3.12.0
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Zurich
tzcode source: system (glibc)
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] pheatmap_1.0.12 BayesPrism_2.2.2
[3] NMF_0.28 bigmemory_4.6.4
[5] cluster_2.1.6 rngtools_1.5.2
[7] registry_0.5-1 snowfall_1.84-6.3
[9] snow_0.4-4 scater_1.34.1
[11] ggplot2_3.5.2 scuttle_1.16.0
[13] SingleCellExperiment_1.28.1 SummarizedExperiment_1.36.0
[15] Biobase_2.66.0 GenomicRanges_1.58.0
[17] GenomeInfoDb_1.42.3 IRanges_2.40.1
[19] S4Vectors_0.44.0 BiocGenerics_0.52.0
[21] MatrixGenerics_1.18.1 matrixStats_1.5.0
loaded via a namespace (and not attached):
[1] bitops_1.0-9 gridExtra_2.3 rlang_1.1.6
[4] magrittr_2.0.3 gridBase_0.4-7 compiler_4.4.2
[7] vctrs_0.6.5 reshape2_1.4.4 stringr_1.5.1
[10] shape_1.4.6.1 pkgconfig_2.0.3 crayon_1.5.3
[13] fastmap_1.2.0 XVector_0.46.0 caTools_1.18.3
[16] rmarkdown_2.29 UCSC.utils_1.2.0 ggbeeswarm_0.7.2
[19] xfun_0.52 bluster_1.16.0 zlibbioc_1.52.0
[22] beachmat_2.22.0 jsonlite_2.0.0 DelayedArray_0.32.0
[25] uuid_1.2-1 BiocParallel_1.40.2 irlba_2.3.5.1
[28] parallel_4.4.2 R6_2.6.1 stringi_1.8.7
[31] RColorBrewer_1.1-3 limma_3.62.2 Rcpp_1.0.14
[34] iterators_1.0.14 knitr_1.50 Matrix_1.7-1
[37] igraph_2.1.4 tidyselect_1.2.1 dichromat_2.0-0.1
[40] abind_1.4-8 yaml_2.3.10 viridis_0.6.5
[43] doParallel_1.0.17 gplots_3.2.0 codetools_0.2-20
[46] lattice_0.22-6 tibble_3.2.1 plyr_1.8.9
[49] withr_3.0.2 evaluate_1.0.3 circlize_0.4.16
[52] pillar_1.10.2 BiocManager_1.30.25 KernSmooth_2.23-24
[55] foreach_1.5.2 generics_0.1.3 scales_1.3.0.9000
[58] gtools_3.9.5 glue_1.8.0 metapod_1.14.0
[61] tools_4.4.2 BiocNeighbors_2.0.1 ScaledMatrix_1.14.0
[64] locfit_1.5-9.12 scran_1.34.0 grid_4.4.2
[67] edgeR_4.4.2 colorspace_2.1-1 GenomeInfoDbData_1.2.13
[70] beeswarm_0.4.0 BiocSingular_1.22.0 vipor_0.4.7
[73] cli_3.6.4 rsvd_1.0.5 bigmemory.sri_0.1.8
[76] S4Arrays_1.6.0 viridisLite_0.4.2 dplyr_1.1.4
[79] gtable_0.3.6 digest_0.6.37 dqrng_0.4.1
[82] SparseArray_1.6.2 ggrepel_0.9.6 htmlwidgets_1.6.4
[85] farver_2.1.2 htmltools_0.5.8.1 lifecycle_1.0.4
[88] httr_1.4.7 GlobalOptions_0.1.2 statmod_1.5.0
Read data
bulk <- read.delim(bulk, header = TRUE)
sc <- readRDS(sc)
## Add/modify annotations for single-cell data
table(sc$donorCol, useNA = "ifany")
A13 A30 E1 E3 GSM4577306 GSM4577307 GSM4577308
808 4738 90 761 789 2124 4805
GSM4577309 GSM4577310 GSM4577311 GSM4577312 GSM4577313 GSM4577314 GSM4577315
14065 2492 2171 2581 172 1218 1805
GSM6605437 GSM6605438 GSM6605439 GSM6605440 GSM7277296 GSM7277297 GSM7277298
2730 710 1604 1434 1876 657 1127
TB13020851 TB13021372
112 53
sc$annotated_celltype <- as.character(sc$leiden_res0.4_annot)
(tbl <- table(sc$annotated_celltype))
ciliated cycling hormone responsive early
3595 6035 4874
hormone responsive late KRT5+ luminal early
3175 81 1274
luminal late MUC5B+ pre-ciliated
10567 152 178
remodeling secretory early secretory late
734 4299 1265
secretory mid transcriptionally active
11291 1402
(celltypes_to_keep <- names(tbl)[tbl > 10]) [1] "ciliated" "cycling"
[3] "hormone responsive early" "hormone responsive late"
[5] "KRT5+" "luminal early"
[7] "luminal late" "MUC5B+"
[9] "pre-ciliated" "remodeling"
[11] "secretory early" "secretory late"
[13] "secretory mid" "transcriptionally active"
sc <- sc[, sc$annotated_celltype %in% celltypes_to_keep]scater::plotReducedDim(
sc, dimred = "UMAP_fastMNNcorr_sketch",
colour_by = "annotated_celltype"
) + scale_color_manual(values = tissue_leiden_res0.4_annot_colors, name = "")Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Look at distribution of cell types across single-cell samples
tbl <- unclass(table(sc$annotated_celltype, sc$donorCol))
pheatmap::pheatmap(tbl)pheatmap::pheatmap(sweep(tbl, MARGIN = 2, STATS = colSums(tbl), FUN = "/"))Predict cell type composition
bulkmat <- t(as.matrix(bulk))
scmat <- t(as.matrix(assay(sc, "counts")))Warning in asMethod(object): sparse->dense coercion: allocating vector of size
7.4 GiB
## Make sure gene names are the same
ints <- intersect(colnames(bulkmat), colnames(scmat))
length(ints)[1] 20149
bulkmat <- bulkmat[, ints]
scmat <- scmat[, ints]
stopifnot(all(colnames(bulkmat) == colnames(scmat)))
dim(bulkmat)[1] 114 20149
dim(scmat)[1] 48922 20149
sc.stat <- plot.scRNA.outlier(
input = scmat,
cell.type.labels = sc$annotated_celltype,
species = "hs",
return.raw = TRUE
)Gene symbols detected. Recommend to use EMSEMBLE IDs for more unique mapping.
bulk.stat <- plot.bulk.outlier(
bulk.input = bulkmat,
sc.input = scmat,
cell.type.labels = sc$annotated_celltype,
species = "hs",
return.raw = TRUE
)Gene symbols detected. Recommend to use EMSEMBLE IDs for more unique mapping.
scmat.filt <- cleanup.genes(
input = scmat,
input.type = "count.matrix",
species = "hs",
gene.group = c("Rb", "Mrp", "other_Rb", "chrM",
"MALAT1"),
exp.cells = 5
)Gene symbols detected. Recommend to use EMSEMBLE IDs for more unique mapping.
number of genes filtered in each category:
Rb Mrp other_Rb chrM MALAT1
0 12 6 37 0
A total of 54 genes from Rb Mrp other_Rb chrM MALAT1 have been excluded
A total of 2576 gene expressed in fewer than 5 cells have been excluded
plot.bulk.vs.sc(
sc.input = scmat.filt,
bulk.input = bulkmat
)Gene symbols detected. Recommend to use EMSEMBLE IDs for more unique mapping.
scmat.filt.pc <- select.gene.type(
scmat.filt,
gene.type = "protein_coding"
)Gene symbols detected. Recommend to use EMSEMBLE IDs for more unique mapping.
number of genes retained in each category:
protein_coding
16278
plot.cor.phi(
input = scmat.filt.pc,
input.labels = sc$annotated_celltype,
title = "cell type correlation",
cexRow = 0.8, cexCol = 0.8,
margins = c(5, 5)
)diff.exp.stat <- get.exp.stat(
sc.dat = scmat[, colSums(scmat > 0) > 3],
cell.type.labels = sc$annotated_celltype,
cell.state.labels = sc$annotated_celltype,
pseudo.count = 0.1,
cell.count.cutoff = 50,
n.cores = 8L
)
scmat.filt.pc.sig <- select.marker(
sc.dat = scmat.filt.pc,
stat = diff.exp.stat,
pval.max = 0.01,
lfc.min = 0.1
)number of markers selected for each cell type:
cycling : 436
luminal late : 137
ciliated : 213
hormone responsive early : 308
hormone responsive late : 763
secretory mid : 250
secretory early : 81
KRT5+ : 182
transcriptionally active : 33
luminal early : 163
secretory late : 158
remodeling : 0
pre-ciliated : 1857
MUC5B+ : 311
sc$annotated_celltype_grouped <- as.character(sc$annotated_celltype)
## Marker genes
stopifnot(all(rownames(scmat.filt.pc.sig) == colnames(sc)))
myPrism2 <- new.prism(
reference = scmat.filt.pc.sig,
mixture = bulkmat,
input.type = "count.matrix",
cell.type.labels = sc$annotated_celltype_grouped,
cell.state.labels = sc$annotated_celltype_grouped,
key = NULL,
outlier.cut = 0.01,
outlier.fraction = 0.1
)number of cells in each cell state
cell.state.labels
KRT5+ MUC5B+ pre-ciliated
81 152 178
remodeling secretory late luminal early
734 1265 1274
transcriptionally active hormone responsive late ciliated
1402 3175 3595
secretory early hormone responsive early cycling
4299 4874 6035
luminal late secretory mid
10567 11291
No tumor reference is speficied. Reference cell types are treated equally.
Number of outlier genes filtered from mixture = 12
Aligning reference and mixture...
Normalizing reference...
bpres_markers <- run.prism(
prism = myPrism2,
n.cores = 10
)Run Gibbs sampling...
Current time: 2025-07-01 21:05:44.34839
Estimated time to complete: 42mins
Estimated finishing time: 2025-07-01 21:47:13.62839
Start run...
Warning in searchCommandline(parallel, cpus = cpus, type = type, socketHosts =
socketHosts, : Unknown option on commandline: --file
R Version: R version 4.4.2 (2024-10-31)
snowfall 1.84-6.3 initialized (using snow 0.4-4): parallel execution on 10 CPUs.
Stopping cluster
Update the reference matrix ...
Warning in searchCommandline(parallel, cpus = cpus, type = type, socketHosts =
socketHosts, : Unknown option on commandline: --file
snowfall 1.84-6.3 initialized (using snow 0.4-4): parallel execution on 10 CPUs.
Stopping cluster
Run Gibbs sampling using updated reference ...
Current time: 2025-07-01 21:13:40.75125
Estimated time to complete: 16mins
Estimated finishing time: 2025-07-01 21:29:04.27125
Start run...
Warning in searchCommandline(parallel, cpus = cpus, type = type, socketHosts =
socketHosts, : Unknown option on commandline: --file
snowfall 1.84-6.3 initialized (using snow 0.4-4): parallel execution on 10 CPUs.
Stopping cluster
saveRDS(
bpres_markers,
file = "bayesprism_predicted_celltypes_marker_genes.rds"
)