suppressPackageStartupMessages({
library(dplyr)
library(ggplot2)
library(tidyr)
library(cowplot)
library(BayesPrism)
library(tibble)
library(ggnewscale)
library(ggh4x)
library(grid)
})
source("../params.R")
source("../helpers.R")
if (!file.exists("paper_figures_deconvolution")) {
dir.create("paper_figures_deconvolution", showWarnings = FALSE, recursive = FALSE)
}Generating plots for paper (deconvolution)
Load packages
Load data
bayesprism <- readRDS("bayesprism_predicted_celltypes_marker_genes.rds")
counts <- read.delim("data/IVMC.raw_counts.20250403.tab", header = TRUE)
annot <- data.frame(names = colnames(counts)) |>
separate_wider_delim(cols = names, delim = "_",
names = c("donor", "timepoint", "breakdown", "hormones"),
cols_remove = FALSE) |>
as.data.frame()
rownames(annot) <- annot$names
annot$protocol <- case_when(
annot$hormones == "H" & annot$timepoint %in% c("tp1", "tp2", "tp3") ~ "IVMC",
annot$hormones == "H" & annot$breakdown == "B" &
annot$timepoint %in% c("tp4", "tp5", "tp6") ~ "IVMC",
annot$timepoint == "tp0" ~ "IVMC",
annot$hormones == "NH" & annot$timepoint %in% c("tp1", "tp2", "tp3") ~ "B_NH",
annot$hormones == "NH" & annot$breakdown == "B" &
annot$timepoint %in% c("tp4", "tp5", "tp6") ~ "B_NH",
.default = NA_character_
)
annot$timepointexp <- bulk_timepoint_map_mll[annot$timepoint]
annot$timepointexp <- factor(annot$timepointexp, levels = unname(bulk_timepoint_map_mll))Reformat data
mprop <- get.fraction(bp = bayesprism,
which.theta = "final",
state.or.type = "type")
cdm <- as.data.frame(annot[rownames(mprop), , drop = FALSE])
mprop <- as.data.frame(mprop) |>
rownames_to_column("names") |>
pivot_longer(names_to = "celltype", values_to = "proportion",
-names) |>
left_join(cdm) |>
group_by(celltype) |>
mutate(ndet = sum(proportion > 0.01)) |>
filter(!is.na(protocol)) Joining with `by = join_by(names)`
# Replicate the pre-diff timepoint also for the B_NH protocol
prediff <- mprop |>
filter(timepointexp == "Pre-diff")
prediff_bnh <- prediff |>
mutate(protocol = "B_NH")
mprop <- bind_rows(mprop, prediff_bnh)Figure 2D, Extended Data 8A (deconvolution)
Keep only cell types for which the proportion in at least two samples is above 1%. Put together t0-t3 (no breakdown) and t4-t6 (breakdown)
(p_area_all <- ggplot(mprop |> filter(ndet >= 2),
aes(x = timepointexp, y = proportion,
group = celltype, fill = celltype)) +
geom_area() +
scale_fill_manual(values = tissue_leiden_res0.4_annot_colors,
name = "") +
guides(fill = guide_legend(nrow = 5)) +
new_scale_fill() +
geom_rect(data = data.frame(
donor = "B080", protocol = rep(c("IVMC", "B_NH"), c(7, 7)),
timepointexp = c(levels(mprop$timepointexp),
levels(mprop$timepointexp)),
xmin = c(1:7, 1:7) - 0.45, xmax = c(1:7, 1:7) + 0.45,
ymin = rep(0, 14), ymax = rep(-0.85, 14)),
aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,
fill = timepointexp), inherit.aes = FALSE,
show.legend = FALSE) +
scale_fill_manual(values = alpha(bulk_timepoint_colors, 0.5)) +
theme_cowplot() +
labs(x = "", y = "Proportion") +
scale_y_continuous(expand = expansion(mult = c(0, 0), add = c(0, 0))) +
coord_cartesian(clip = "off", ylim = c(0, 1.05)) +
theme(legend.position = "bottom",
axis.text.x = element_text(size = 16, angle = 90, hjust = 0.5, vjust = 0.5),
axis.title = element_text(size = 20),
legend.text = element_text(size = 16),
strip.text = element_text(size = 16),
panel.border = element_rect(fill = NA, colour = "grey20"),
strip.background = element_rect(color = "black", fill = "white")) +
facet_grid(donor ~ protocol, scales = "free_x", space = "free_x"))pdf("paper_figures_deconvolution/areaplot_allclusters.pdf", height = 18, width = 9)
p_area_all
dev.off()png
2
# now do the same with average values for all lines
table(paste(mprop$timepointexp, mprop$breakdown, mprop$hormones, sep = "_"),
mprop$protocol, useNA = "ifany")
B_NH IVMC
E2-diff\n24h_B_H 0 84
E2-diff\n24h_B_NH 84 0
E2-diff\n48h_B_H 0 84
E2-diff\n48h_B_NH 84 0
Horm\nWithdr 24h_NB_H 0 84
Horm\nWithdr 24h_NB_NH 84 0
Horm\nWithdr 48h_NB_H 0 84
Horm\nWithdr 48h_NB_NH 84 0
P4-diff_NB_H 0 84
P4-diff_NB_NH 84 0
Post-breakdown\n24h_B_H 0 84
Post-breakdown\n24h_B_NH 84 0
Pre-diff_NB_NH 84 84
(p_area_avg <- ggplot(mprop |>
filter(ndet >= 2) |>
group_by(celltype, timepointexp, protocol) |>
summarize(proportion = mean(proportion), .groups = "drop") |>
filter(protocol == "IVMC"),
aes(x = timepointexp, y = proportion,
group = celltype, fill = celltype)) +
geom_area() +
scale_fill_manual(values = tissue_leiden_res0.4_annot_colors, name = "") +
new_scale_fill() +
geom_rect(data = data.frame(
timepointexp = levels(mprop$timepointexp),
xmin = (1:7) - 0.35, xmax = (1:7) + 0.35,
ymin = rep(0, 7), ymax = rep(-0.95, 7)),
aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,
fill = timepointexp), inherit.aes = FALSE,
show.legend = FALSE) +
scale_fill_manual(values = alpha(bulk_timepoint_colors, 0.5)) +
theme_cowplot() +
labs(x = "", y = "Proportion") +
scale_y_continuous(expand = expansion(mult = c(0, 0), add = c(0, 0))) +
coord_cartesian(clip = "off", ylim = c(0, 1.05)) +
theme(legend.position = "bottom",
axis.text.y = element_text(size = 16),
axis.title.y = element_text(size = 20),
axis.text.x = element_text(size = 20, angle = 90, hjust = 0.5, vjust = 0.5)))pdf("paper_figures_deconvolution/areaplot_allclusters_avg_donors.pdf", height = 5, width = 10)
p_area_avg + theme(legend.position = "none")
dev.off()png
2
png("paper_figures_deconvolution/areaplot_allclusters_avg_donors.png", height = 5, width = 10,
unit = "in", res = 400)
p_area_avg + theme(legend.position = "none")
dev.off()png
2
pdf("paper_figures_deconvolution/areaplot_allclusters_avg_donors_legend.pdf",
height = 1.75, width = 11)
grid.newpage()
grid.draw(get_legend2(p_area_avg + theme(legend.position = "bottom",
legend.text = element_text(size = 12))))
dev.off()png
2
Figure 2E, Extended Data 8B (deconvolution box plots)
(p_box <- ggplot(
mprop |>
filter(ndet >= 2) |>
mutate(celltype = replace(celltype, celltype == "hormone responsive late",
"hormone\nresponsive\nlate")) |>
mutate(celltype = replace(celltype, celltype == "hormone responsive early",
"hormone\nresponsive\nearly")) |>
mutate(celltype = replace(celltype, celltype == "secretory late",
"secretory\nlate")) |>
mutate(celltype = replace(celltype, celltype == "secretory mid",
"secretory\nmid")) |>
mutate(celltype = replace(celltype, celltype == "secretory early",
"secretory\nearly")) |>
mutate(celltype = replace(celltype, celltype == "luminal late",
"luminal\nlate")) |>
mutate(celltype = replace(celltype, celltype == "luminal early",
"luminal\nearly")) |>
mutate(celltype = replace(celltype, celltype == "transcriptionally active",
"trans-\ncriptionally\nactive")),
aes(x = timepointexp, y = proportion)) +
geom_boxplot(position = position_dodge2(0.75, preserve = "single"),
outlier.size = -1, alpha = 0.5) +
geom_jitter(position = position_dodge2(0.75, preserve = "single"),
size = 2, aes(color = donor)) +
geom_rect(data = data.frame(
celltype = "trans-\ncriptionally\nactive",
protocol = rep(c("IVMC", "B_NH"), c(7, 7)),
timepointexp = c(levels(mprop$timepointexp),
levels(mprop$timepointexp)),
xmin = c(1:7, 1:7) - 0.45, xmax = c(1:7, 1:7) + 0.45,
ymin = rep(-0.05, 14), ymax = rep(-0.9, 14)),
aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,
fill = timepointexp), inherit.aes = FALSE,
show.legend = FALSE) +
scale_fill_manual(values = alpha(bulk_timepoint_colors, 0.5)) +
guides(color = guide_legend(override.aes = list(size = 4))) +
facet_grid(celltype ~ protocol,
scales = "free") +
labs(x = "", y = "Proportion") +
scale_y_continuous(expand = expansion(mult = c(0, 0), add = c(0.05, 0.05))) +
coord_cartesian(clip = "off", ylim = c(0, NA)) +
theme_cowplot() +
theme(axis.text.y = element_text(size = 12),
axis.text.x = element_text(size = 16, angle = 90, hjust = 0.5, vjust = 0.5),
axis.title = element_text(size = 20),
strip.text = element_text(size = 14),
panel.border = element_rect(fill = NA, colour = "grey20"),
strip.background = element_rect(fill = "white", color = "black")) +
scale_color_manual(values = donor_colors, name = "Donor"))pdf("paper_figures_deconvolution/boxplot_allclusters_proportions.pdf", height = 18, width = 9)
p_box + theme(legend.position = "none")
dev.off()png
2
pdf("paper_figures_deconvolution/boxplot_allclusters_proportions_legend.pdf", height = 3, width = 2)
grid.newpage()
grid.draw(get_legend2(p_box + theme(legend.position = "right",
legend.text = element_text(size = 14),
legend.title = element_text(size = 16))))
dev.off()png
2
mpropsub <- mprop |>
filter(celltype %in% c("cycling", "luminal early"),
protocol == "IVMC") |>
mutate(celltype = factor(celltype,
levels = c("luminal early",
"cycling")))
sz <- mpropsub |>
group_by(celltype) |>
summarize(rng = 0.1 + 1.0 * (max(proportion) - min(proportion)))
(p_box_sub <- ggplot(mpropsub,
aes(x = timepointexp, y = proportion)) +
geom_boxplot(position = position_dodge2(0.75, preserve = "single"),
outlier.size = -1, alpha = 0.5) +
geom_jitter(position = position_dodge2(0.75, preserve = "single"),
size = 2, aes(color = donor)) +
geom_rect(data = data.frame(
timepointexp = rep(levels(mpropsub$timepointexp), nlevels(mpropsub$celltype)),
celltype = factor(rep(levels(mpropsub$celltype), each = nlevels(mpropsub$timepointexp)),
c("luminal early", "cycling")),
xmin = rep((1:nlevels(mpropsub$timepointexp)) - 0.35, nlevels(mpropsub$celltype)),
xmax = rep((1:nlevels(mpropsub$timepointexp)) + 0.35, nlevels(mpropsub$celltype)),
ymin = rep(rep(-0.05, nlevels(mpropsub$timepointexp)), nlevels(mpropsub$celltype)),
ymax = rep(rep(-0.9, nlevels(mpropsub$timepointexp)), nlevels(mpropsub$celltype))) |>
left_join(sz) |> mutate(ymax = ymax * rng / rng[celltype == "cycling"]),
aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,
fill = timepointexp), inherit.aes = FALSE,
show.legend = FALSE) +
scale_fill_manual(values = alpha(bulk_timepoint_colors, 0.5)) +
facet_wrap2(
~ celltype, nrow = 1,
strip = strip_themed(clip = "off", background_x = elem_list_rect(
fill = alpha(tissue_leiden_res0.4_annot_colors[levels(mpropsub$celltype)], 1))),
scales = "free_y") +
labs(x = "", y = "Proportion") +
scale_y_continuous(expand = expansion(mult = c(0, 0), add = c(0.05, 0.05))) +
coord_cartesian(clip = "off", ylim = c(0, NA)) +
theme_cowplot() +
theme(strip.text = element_text(size = 20, hjust = 0.5,
margin = margin(0.15, 0, 0.15, 0, "cm")),
strip.background = element_rect(fill = "white", color = "black"),
axis.text.x = element_text(size = 20, angle = 90, hjust = 0.5, vjust = 0.5),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 20),
legend.text = element_text(size = 20),
panel.border = element_rect(fill = NA, colour = "grey20"),
axis.title = element_text(size = 20)) +
scale_color_manual(values = donor_colors, name = "Donor"))Joining with `by = join_by(celltype)`
pdf("paper_figures_deconvolution/boxplot_sub_proportions.pdf", height = 5.5, width = 15)
p_box_sub
dev.off()png
2
for (ct in c("cycling", "luminal early")) {
tmp <- mpropsub |> filter(celltype == ct) |> droplevels()
(p_box_sub <- ggplot(tmp, aes(x = timepointexp, y = proportion)) +
geom_boxplot(position = position_dodge2(0.75, preserve = "single"),
outlier.size = -1, alpha = 0.5) +
geom_jitter(position = position_dodge2(0.75, preserve = "single"),
size = 2, aes(color = donor)) +
geom_rect(data = data.frame(
timepointexp = rep(levels(tmp$timepointexp), 1),
celltype = rep(ct, each = 7),
xmin = rep((1:7) - 0.35, 1), xmax = rep((1:7) + 0.35, 1),
ymin = rep(rep(-0.05, 7), 1), ymax = rep(rep(-0.9, 7), 1)) |>
left_join(sz) |> mutate(ymax = ymax * rng / sz$rng[sz$celltype == "cycling"]),
aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,
fill = timepointexp), inherit.aes = FALSE,
show.legend = FALSE) +
scale_fill_manual(values = alpha(bulk_timepoint_colors, 0.5)) +
facet_wrap2(
~ celltype, nrow = 1,
strip = strip_themed(clip = "off", background_x = elem_list_rect(
fill = alpha(tissue_leiden_res0.4_annot_colors[levels(tmp$celltype)], 1))),
scales = "free_y") +
labs(x = "", y = "Proportion") +
scale_y_continuous(expand = expansion(mult = c(0, 0), add = c(0.05, 0.05))) +
coord_cartesian(clip = "off", ylim = c(0, NA)) +
theme_cowplot() +
theme(strip.text = element_text(size = 20, hjust = 0.5,
margin = margin(0.15, 0, 0.15, 0, "cm")),
strip.background = element_rect(fill = "white", color = "black"),
axis.text.x = element_text(size = 20, angle = 90, hjust = 0.5, vjust = 0.5),
axis.text.y = element_text(size = 16),
legend.title = element_text(size = 20),
legend.text = element_text(size = 20),
panel.border = element_rect(fill = NA, colour = "grey20"),
axis.title = element_text(size = 20)) +
scale_color_manual(values = donor_colors, name = "Donor"))
pdf(paste0("paper_figures_deconvolution/boxplot_sub_proportions_", ct, ".pdf"),
height = 5.75, width = 9)
print(p_box_sub)
dev.off()
}Joining with `by = join_by(celltype)`
Joining with `by = join_by(celltype)`
Session info
sessionInfo()R version 4.4.2 (2024-10-31)
Platform: x86_64-pc-linux-gnu
Running under: Rocky Linux 8.10 (Green Obsidian)
Matrix products: default
BLAS/LAPACK: FlexiBLAS OPENBLAS; LAPACK version 3.12.0
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Zurich
tzcode source: system (glibc)
attached base packages:
[1] grid stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] ggh4x_0.3.0 ggnewscale_0.5.1 tibble_3.2.1
[4] BayesPrism_2.2.2 NMF_0.28 bigmemory_4.6.4
[7] Biobase_2.66.0 BiocGenerics_0.52.0 cluster_2.1.6
[10] rngtools_1.5.2 registry_0.5-1 snowfall_1.84-6.3
[13] snow_0.4-4 cowplot_1.1.3 tidyr_1.3.1
[16] ggplot2_3.5.2 dplyr_1.1.4
loaded via a namespace (and not attached):
[1] bitops_1.0-9 rlang_1.1.6
[3] magrittr_2.0.3 gridBase_0.4-7
[5] matrixStats_1.5.0 compiler_4.4.2
[7] vctrs_0.6.5 reshape2_1.4.4
[9] stringr_1.5.1 shape_1.4.6.1
[11] pkgconfig_2.0.3 crayon_1.5.3
[13] fastmap_1.2.0 XVector_0.46.0
[15] labeling_0.4.3 scuttle_1.16.0
[17] caTools_1.18.3 rmarkdown_2.29
[19] UCSC.utils_1.2.0 purrr_1.0.4
[21] xfun_0.52 bluster_1.16.0
[23] zlibbioc_1.52.0 beachmat_2.22.0
[25] GenomeInfoDb_1.42.3 jsonlite_2.0.0
[27] DelayedArray_0.32.0 uuid_1.2-1
[29] BiocParallel_1.40.2 irlba_2.3.5.1
[31] parallel_4.4.2 R6_2.6.1
[33] stringi_1.8.7 RColorBrewer_1.1-3
[35] limma_3.62.2 GenomicRanges_1.58.0
[37] Rcpp_1.0.14 SummarizedExperiment_1.36.0
[39] iterators_1.0.14 knitr_1.50
[41] IRanges_2.40.1 Matrix_1.7-1
[43] igraph_2.1.4 tidyselect_1.2.1
[45] dichromat_2.0-0.1 abind_1.4-8
[47] yaml_2.3.10 doParallel_1.0.17
[49] gplots_3.2.0 codetools_0.2-20
[51] lattice_0.22-6 plyr_1.8.9
[53] withr_3.0.2 evaluate_1.0.3
[55] circlize_0.4.16 pillar_1.10.2
[57] BiocManager_1.30.25 MatrixGenerics_1.18.1
[59] KernSmooth_2.23-24 foreach_1.5.2
[61] stats4_4.4.2 generics_0.1.3
[63] S4Vectors_0.44.0 scales_1.3.0.9000
[65] gtools_3.9.5 glue_1.8.0
[67] metapod_1.14.0 tools_4.4.2
[69] BiocNeighbors_2.0.1 ScaledMatrix_1.14.0
[71] locfit_1.5-9.12 scran_1.34.0
[73] edgeR_4.4.2 colorspace_2.1-1
[75] SingleCellExperiment_1.28.1 GenomeInfoDbData_1.2.13
[77] BiocSingular_1.22.0 cli_3.6.4
[79] rsvd_1.0.5 bigmemory.sri_0.1.8
[81] S4Arrays_1.6.0 gtable_0.3.6
[83] digest_0.6.37 SparseArray_1.6.2
[85] dqrng_0.4.1 htmlwidgets_1.6.4
[87] farver_2.1.2 htmltools_0.5.8.1
[89] lifecycle_1.0.4 httr_1.4.7
[91] GlobalOptions_0.1.2 statmod_1.5.0