suppressPackageStartupMessages({
library(dplyr)
library(ggplot2)
library(tidyr)
library(cowplot)
library(BayesPrism)
library(tibble)
library(ggnewscale)
library(ggh4x)
library(grid)
library(ggtext)
})
source("../params.R")
source("../helpers.R")
if (!file.exists("paper_figures_deconvolution")) {
dir.create("paper_figures_deconvolution", showWarnings = FALSE, recursive = FALSE)
}Generating plots for paper (deconvolution)
Load packages
Load data
bayesprism <- readRDS("bayesprism_predicted_celltypes_marker_genes.rds")
counts <- read.delim("data/IVMC.raw_counts.20250403.tab", header = TRUE)
annot <- data.frame(names = colnames(counts)) |>
separate_wider_delim(cols = names, delim = "_",
names = c("donor", "timepoint", "breakdown", "hormones"),
cols_remove = FALSE) |>
as.data.frame()
rownames(annot) <- annot$names
annot$protocol <- case_when(
annot$hormones == "H" & annot$timepoint %in% c("tp1", "tp2", "tp3") ~ "IVMC",
annot$hormones == "H" & annot$breakdown == "B" &
annot$timepoint %in% c("tp4", "tp5", "tp6") ~ "IVMC",
annot$timepoint == "tp0" ~ "IVMC",
annot$hormones == "NH" & annot$timepoint %in% c("tp1", "tp2", "tp3") ~ "B_NH",
annot$hormones == "NH" & annot$breakdown == "B" &
annot$timepoint %in% c("tp4", "tp5", "tp6") ~ "B_NH",
.default = NA_character_
)
annot$timepointexp <- bulk_timepoint_map_mll[annot$timepoint]
annot$timepointexp <- factor(annot$timepointexp, levels = unname(bulk_timepoint_map_mll))Reformat data
mprop <- get.fraction(bp = bayesprism,
which.theta = "final",
state.or.type = "type")
cdm <- as.data.frame(annot[rownames(mprop), , drop = FALSE])
mprop <- as.data.frame(mprop) |>
rownames_to_column("names") |>
pivot_longer(names_to = "celltype", values_to = "proportion",
-names) |>
left_join(cdm) |>
group_by(celltype) |>
mutate(ndet = sum(proportion > 0.01)) |>
filter(!is.na(protocol)) Joining with `by = join_by(names)`
# Replicate the pre-diff timepoint also for the B_NH protocol
prediff <- mprop |>
filter(timepointexp == "Pre-diff")
prediff_bnh <- prediff |>
mutate(protocol = "B_NH")
mprop <- bind_rows(mprop, prediff_bnh)Figure 2D, Extended Data 8A (deconvolution)
Keep only cell types for which the proportion in at least two samples is above 1%. Put together t0-t3 (no breakdown) and t4-t6 (breakdown)
(p_area_all <- ggplot(mprop |> filter(ndet >= 2),
aes(x = timepointexp, y = proportion,
group = celltype, fill = celltype)) +
geom_area() +
scale_fill_manual(values = tissue_leiden_res0.4_annot_colors,
labels = tissue_leiden_res0.4_annot_labels,
name = "") +
guides(fill = guide_legend(nrow = 5)) +
new_scale_fill() +
geom_rect(data = data.frame(
donor = "B080", protocol = rep(c("IVMC", "B_NH"), c(7, 7)),
timepointexp = c(levels(mprop$timepointexp),
levels(mprop$timepointexp)),
xmin = c(1:7, 1:7) - 0.45, xmax = c(1:7, 1:7) + 0.45,
ymin = rep(0, 14), ymax = rep(-0.85, 14)),
aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,
fill = timepointexp), inherit.aes = FALSE,
show.legend = FALSE) +
scale_fill_manual(values = alpha(bulk_timepoint_colors, 0.5)) +
theme_cowplot() +
labs(x = "", y = "Proportion") +
scale_y_continuous(expand = expansion(mult = c(0, 0), add = c(0, 0))) +
coord_cartesian(clip = "off", ylim = c(0, 1.05)) +
theme(legend.position = "bottom",
axis.text.x = element_text(size = 16, angle = 90, hjust = 0.5, vjust = 0.5),
axis.title = element_text(size = 20),
legend.text = element_markdown(size = 16),
strip.text = element_text(size = 16),
panel.border = element_rect(fill = NA, colour = "grey20"),
strip.background = element_rect(color = "black", fill = "white")) +
facet_grid(donor ~ protocol, scales = "free_x", space = "free_x"))pdf("paper_figures_deconvolution/areaplot_allclusters.pdf", height = 18, width = 9)
p_area_all
dev.off()png
2
# now do the same with average values for all lines
table(paste(mprop$timepointexp, mprop$breakdown, mprop$hormones, sep = "_"),
mprop$protocol, useNA = "ifany")
B_NH IVMC
E2-diff\n24h_B_H 0 84
E2-diff\n24h_B_NH 84 0
E2-diff\n48h_B_H 0 84
E2-diff\n48h_B_NH 84 0
Horm\nWithdr 24h_NB_H 0 84
Horm\nWithdr 24h_NB_NH 84 0
Horm\nWithdr 48h_NB_H 0 84
Horm\nWithdr 48h_NB_NH 84 0
P4-diff_NB_H 0 84
P4-diff_NB_NH 84 0
Post-breakdown\n24h_B_H 0 84
Post-breakdown\n24h_B_NH 84 0
Pre-diff_NB_NH 84 84
(p_area_avg <- ggplot(mprop |>
filter(ndet >= 2) |>
group_by(celltype, timepointexp, protocol) |>
summarize(proportion = mean(proportion), .groups = "drop") |>
filter(protocol == "IVMC"),
aes(x = timepointexp, y = proportion,
group = celltype, fill = celltype)) +
geom_area() +
scale_fill_manual(values = tissue_leiden_res0.4_annot_colors, name = "",
labels = tissue_leiden_res0.4_annot_labels) +
new_scale_fill() +
geom_rect(data = data.frame(
timepointexp = levels(mprop$timepointexp),
xmin = (1:7) - 0.35, xmax = (1:7) + 0.35,
ymin = rep(0, 7), ymax = rep(-0.95, 7)),
aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,
fill = timepointexp), inherit.aes = FALSE,
show.legend = FALSE) +
scale_fill_manual(values = alpha(bulk_timepoint_colors, 0.5)) +
theme_cowplot() +
labs(x = "", y = "Proportion") +
scale_y_continuous(expand = expansion(mult = c(0, 0), add = c(0, 0))) +
coord_cartesian(clip = "off", ylim = c(0, 1.05)) +
theme(legend.position = "bottom",
legend.text = element_markdown(),
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 20),
axis.text.x = element_text(size = 20, angle = 90, hjust = 0.5, vjust = 0.5)))pdf("paper_figures_deconvolution/areaplot_allclusters_avg_donors.pdf", height = 5, width = 10)
p_area_avg + theme(legend.position = "none")
dev.off()png
2
png("paper_figures_deconvolution/areaplot_allclusters_avg_donors.png", height = 5, width = 10,
unit = "in", res = 400)
p_area_avg + theme(legend.position = "none")
dev.off()png
2
pdf("paper_figures_deconvolution/areaplot_allclusters_avg_donors_legend.pdf",
height = 1.75, width = 11)
grid.newpage()
grid.draw(get_legend2(p_area_avg + theme(legend.position = "bottom",
legend.text = element_markdown(size = 12))))
dev.off()png
2
Figure 2E, Extended Data 8B (deconvolution box plots)
mprop <- mprop |>
mutate(celltypenew = tissue_leiden_res0.4_annot_labels[celltype])
(p_box <- ggplot(
mprop |>
filter(ndet >= 2) |>
mutate(celltypenew = replace(celltypenew, celltypenew == "hormone responsive PGR<sup>high</sup>",
"hormone<br>responsive<br>PGR<sup>high</high>")) |>
mutate(celltypenew = replace(celltypenew, celltypenew == "hormone responsive PGR<sup>low</sup>",
"hormone<br>responsive<br>PGR<sup>low</sup>")) |>
mutate(celltypenew = replace(celltypenew, celltypenew == "secretory late",
"secretory<br>late")) |>
mutate(celltypenew = replace(celltypenew, celltypenew == "secretory mid",
"secretory<br>mid")) |>
mutate(celltypenew = replace(celltypenew, celltypenew == "secretory early",
"secretory<br>early")) |>
mutate(celltypenew = replace(celltypenew, celltypenew == "receptivity associated luminal",
"receptivity<br>associated<br>luminal")) |>
mutate(celltypenew = replace(celltypenew, celltypenew == "regeneration associated luminal",
"regeneration<br>associated<br>luminal")) |>
mutate(celltypenew = replace(celltypenew, celltypenew == "transcriptionally active",
"trans-<br>criptionally<br>active")),
aes(x = timepointexp, y = proportion)) +
geom_boxplot(position = position_dodge2(0.75, preserve = "single"),
outlier.size = -1, alpha = 0.5) +
geom_jitter(position = position_dodge2(0.75, preserve = "single"),
size = 2, aes(color = donor)) +
geom_rect(data = data.frame(
celltypenew = "VIM<sup>+</sup>",
protocol = rep(c("IVMC", "B_NH"), c(7, 7)),
timepointexp = c(levels(mprop$timepointexp),
levels(mprop$timepointexp)),
xmin = c(1:7, 1:7) - 0.45, xmax = c(1:7, 1:7) + 0.45,
ymin = rep(-0.05, 14), ymax = rep(-0.9, 14)),
aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,
fill = timepointexp), inherit.aes = FALSE,
show.legend = FALSE) +
scale_fill_manual(values = alpha(bulk_timepoint_colors, 0.5)) +
guides(color = guide_legend(override.aes = list(size = 4))) +
facet_grid(celltypenew ~ protocol,
scales = "free") +
labs(x = "", y = "Proportion") +
scale_y_continuous(expand = expansion(mult = c(0, 0), add = c(0.05, 0.05))) +
coord_cartesian(clip = "off", ylim = c(0, NA)) +
theme_cowplot() +
theme(axis.text.y = element_text(size = 12),
axis.text.x = element_text(size = 16, angle = 90, hjust = 0.5, vjust = 0.5),
axis.title = element_text(size = 20),
strip.text.y = element_markdown(size = 14),
strip.text.x = element_markdown(size = 14),
panel.border = element_rect(fill = NA, colour = "grey20"),
strip.background = element_rect(fill = "white", color = "black")) +
scale_color_manual(values = donor_colors, name = "Donor"))pdf("paper_figures_deconvolution/boxplot_allclusters_proportions.pdf", height = 18, width = 9)
p_box + theme(legend.position = "none")
dev.off()png
2
pdf("paper_figures_deconvolution/boxplot_allclusters_proportions_legend.pdf", height = 3, width = 2)
grid.newpage()
grid.draw(get_legend2(p_box + theme(legend.position = "right",
legend.text = element_text(size = 14),
legend.title = element_text(size = 16))))
dev.off()png
2
mpropsub <- mprop |>
filter(celltypenew %in% c("cycling", "regeneration associated luminal"),
protocol == "IVMC") |>
mutate(celltypenew = factor(celltypenew,
levels = c("regeneration associated luminal",
"cycling")),
celltype = factor(celltype,
levels = c("luminal early", "cycling")))
sz <- mpropsub |>
group_by(celltype, celltypenew) |>
summarize(rng = 0.1 + 1.0 * (max(proportion) - min(proportion)),
.groups = "drop")
(p_box_sub <- ggplot(mpropsub,
aes(x = timepointexp, y = proportion)) +
geom_boxplot(position = position_dodge2(0.75, preserve = "single"),
outlier.size = -1, alpha = 0.5) +
geom_jitter(position = position_dodge2(0.75, preserve = "single"),
size = 2, aes(color = donor)) +
geom_rect(data = data.frame(
timepointexp = rep(levels(mpropsub$timepointexp), nlevels(mpropsub$celltypenew)),
celltypenew = factor(rep(levels(mpropsub$celltypenew), each = nlevels(mpropsub$timepointexp)),
c("regeneration associated luminal", "cycling")),
xmin = rep((1:nlevels(mpropsub$timepointexp)) - 0.35, nlevels(mpropsub$celltypenew)),
xmax = rep((1:nlevels(mpropsub$timepointexp)) + 0.35, nlevels(mpropsub$celltypenew)),
ymin = rep(rep(-0.05, nlevels(mpropsub$timepointexp)), nlevels(mpropsub$celltypenew)),
ymax = rep(rep(-0.9, nlevels(mpropsub$timepointexp)), nlevels(mpropsub$celltypenew))) |>
left_join(sz) |> mutate(ymax = ymax * rng / rng[celltypenew == "cycling"]),
aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,
fill = timepointexp), inherit.aes = FALSE,
show.legend = FALSE) +
scale_fill_manual(values = alpha(bulk_timepoint_colors, 0.5)) +
facet_wrap2(
~ celltypenew, nrow = 1,
strip = strip_themed(clip = "off", background_x = elem_list_rect(
fill = alpha(tissue_leiden_res0.4_annot_colors[levels(mpropsub$celltype)], 1))),
scales = "free_y") +
labs(x = "", y = "Proportion") +
scale_y_continuous(expand = expansion(mult = c(0, 0), add = c(0.05, 0.05))) +
coord_cartesian(clip = "off", ylim = c(0, NA)) +
theme_cowplot() +
theme(strip.text = element_text(size = 20, hjust = 0.5,
margin = margin(0.15, 0, 0.15, 0, "cm")),
strip.background = element_rect(fill = "white", color = "black"),
axis.text.x = element_text(size = 20, angle = 90, hjust = 0.5, vjust = 0.5),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 20),
legend.text = element_text(size = 20),
panel.border = element_rect(fill = NA, colour = "grey20"),
axis.title = element_text(size = 20)) +
scale_color_manual(values = donor_colors, name = "Donor"))Joining with `by = join_by(celltypenew)`
pdf("paper_figures_deconvolution/boxplot_sub_proportions.pdf", height = 5.5, width = 15)
p_box_sub
dev.off()png
2
for (ct in c("cycling", "regeneration associated luminal")) {
tmp <- mpropsub |> filter(celltypenew == ct) |> droplevels()
(p_box_sub <- ggplot(tmp, aes(x = timepointexp, y = proportion)) +
geom_boxplot(position = position_dodge2(0.75, preserve = "single"),
outlier.size = -1, alpha = 0.5) +
geom_jitter(position = position_dodge2(0.75, preserve = "single"),
size = 2, aes(color = donor)) +
geom_rect(data = data.frame(
timepointexp = rep(levels(tmp$timepointexp), 1),
celltypenew = rep(ct, each = 7),
xmin = rep((1:7) - 0.35, 1), xmax = rep((1:7) + 0.35, 1),
ymin = rep(rep(-0.05, 7), 1), ymax = rep(rep(-0.9, 7), 1)) |>
left_join(sz) |> mutate(ymax = ymax * rng / sz$rng[sz$celltype == "cycling"]),
aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,
fill = timepointexp), inherit.aes = FALSE,
show.legend = FALSE) +
scale_fill_manual(values = alpha(bulk_timepoint_colors, 0.5)) +
facet_wrap2(
~ celltypenew, nrow = 1,
strip = strip_themed(clip = "off", background_x = elem_list_rect(
fill = alpha(tissue_leiden_res0.4_annot_colors[levels(tmp$celltype)], 1))),
scales = "free_y") +
labs(x = "", y = "Proportion") +
scale_y_continuous(expand = expansion(mult = c(0, 0), add = c(0.05, 0.05))) +
coord_cartesian(clip = "off", ylim = c(0, NA)) +
theme_cowplot() +
theme(strip.text = element_text(size = 20, hjust = 0.5,
margin = margin(0.15, 0, 0.15, 0, "cm")),
strip.background = element_rect(fill = "white", color = "black"),
axis.text.x = element_text(size = 20, angle = 90, hjust = 0.5, vjust = 0.5),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 20),
legend.text = element_text(size = 20),
panel.border = element_rect(fill = NA, colour = "grey20"),
axis.title = element_text(size = 20)) +
scale_color_manual(values = donor_colors, name = "Donor"))
pdf(paste0("paper_figures_deconvolution/boxplot_sub_proportions_", ct, ".pdf"),
height = 5.75, width = 9)
print(p_box_sub)
dev.off()
}Joining with `by = join_by(celltypenew)`
Joining with `by = join_by(celltypenew)`
Session info
sessionInfo()R version 4.4.2 (2024-10-31)
Platform: x86_64-pc-linux-gnu
Running under: Rocky Linux 8.10 (Green Obsidian)
Matrix products: default
BLAS/LAPACK: FlexiBLAS OPENBLAS; LAPACK version 3.12.0
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Zurich
tzcode source: system (glibc)
attached base packages:
[1] grid stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] ggtext_0.1.2 ggh4x_0.3.0 ggnewscale_0.5.1
[4] tibble_3.2.1 BayesPrism_2.2.2 NMF_0.28
[7] bigmemory_4.6.4 Biobase_2.66.0 BiocGenerics_0.52.0
[10] cluster_2.1.6 rngtools_1.5.2 registry_0.5-1
[13] snowfall_1.84-6.3 snow_0.4-4 cowplot_1.1.3
[16] tidyr_1.3.1 ggplot2_3.5.2 dplyr_1.1.4
loaded via a namespace (and not attached):
[1] bitops_1.0-9 rlang_1.1.6
[3] magrittr_2.0.3 gridBase_0.4-7
[5] matrixStats_1.5.0 compiler_4.4.2
[7] vctrs_0.6.5 reshape2_1.4.4
[9] stringr_1.5.1 shape_1.4.6.1
[11] pkgconfig_2.0.3 crayon_1.5.3
[13] fastmap_1.2.0 XVector_0.46.0
[15] labeling_0.4.3 scuttle_1.16.0
[17] caTools_1.18.3 rmarkdown_2.29
[19] markdown_2.0 UCSC.utils_1.2.0
[21] purrr_1.0.4 xfun_0.52
[23] bluster_1.16.0 zlibbioc_1.52.0
[25] beachmat_2.22.0 litedown_0.7
[27] GenomeInfoDb_1.42.3 jsonlite_2.0.0
[29] DelayedArray_0.32.0 uuid_1.2-1
[31] BiocParallel_1.40.2 irlba_2.3.5.1
[33] parallel_4.4.2 R6_2.6.1
[35] stringi_1.8.7 RColorBrewer_1.1-3
[37] limma_3.62.2 GenomicRanges_1.58.0
[39] Rcpp_1.0.14 SummarizedExperiment_1.36.0
[41] iterators_1.0.14 knitr_1.50
[43] IRanges_2.40.1 Matrix_1.7-1
[45] igraph_2.1.4 tidyselect_1.2.1
[47] dichromat_2.0-0.1 abind_1.4-8
[49] yaml_2.3.10 doParallel_1.0.17
[51] gplots_3.2.0 codetools_0.2-20
[53] lattice_0.22-6 plyr_1.8.9
[55] withr_3.0.2 evaluate_1.0.3
[57] xml2_1.3.8 circlize_0.4.16
[59] pillar_1.10.2 BiocManager_1.30.25
[61] MatrixGenerics_1.18.1 KernSmooth_2.23-24
[63] foreach_1.5.2 stats4_4.4.2
[65] generics_0.1.3 S4Vectors_0.44.0
[67] commonmark_1.9.5 scales_1.3.0.9000
[69] gtools_3.9.5 glue_1.8.0
[71] metapod_1.14.0 tools_4.4.2
[73] BiocNeighbors_2.0.1 ScaledMatrix_1.14.0
[75] locfit_1.5-9.12 scran_1.34.0
[77] edgeR_4.4.2 colorspace_2.1-1
[79] SingleCellExperiment_1.28.1 GenomeInfoDbData_1.2.13
[81] BiocSingular_1.22.0 cli_3.6.4
[83] rsvd_1.0.5 bigmemory.sri_0.1.8
[85] S4Arrays_1.6.0 gtable_0.3.6
[87] digest_0.6.37 SparseArray_1.6.2
[89] dqrng_0.4.1 htmlwidgets_1.6.4
[91] farver_2.1.2 htmltools_0.5.8.1
[93] lifecycle_1.0.4 httr_1.4.7
[95] GlobalOptions_0.1.2 statmod_1.5.0
[97] gridtext_0.1.5