Generating plots for paper (deconvolution)

Published

July 3, 2025

Load packages

suppressPackageStartupMessages({
    library(dplyr)
    library(ggplot2)
    library(tidyr)
    library(cowplot)
    library(BayesPrism)
    library(tibble)
    library(ggnewscale)
    library(ggh4x)
    library(grid)
})

source("../params.R")
source("../helpers.R")

if (!file.exists("paper_figures_deconvolution")) {
    dir.create("paper_figures_deconvolution", showWarnings = FALSE, recursive = FALSE)
}

Load data

bayesprism <- readRDS("bayesprism_predicted_celltypes_marker_genes.rds")
counts <- read.delim("data/IVMC.raw_counts.20250403.tab", header = TRUE)

annot <- data.frame(names = colnames(counts)) |>
    separate_wider_delim(cols = names, delim = "_", 
                         names = c("donor", "timepoint", "breakdown", "hormones"),
                         cols_remove = FALSE) |>
    as.data.frame()
rownames(annot) <- annot$names

annot$protocol <- case_when(
    annot$hormones == "H" & annot$timepoint %in% c("tp1", "tp2", "tp3") ~ "IVMC",
    annot$hormones == "H" & annot$breakdown == "B" & 
        annot$timepoint %in% c("tp4", "tp5", "tp6") ~ "IVMC",
    annot$timepoint == "tp0" ~ "IVMC",
    annot$hormones == "NH" & annot$timepoint %in% c("tp1", "tp2", "tp3") ~ "B_NH",
    annot$hormones == "NH" & annot$breakdown == "B" & 
        annot$timepoint %in% c("tp4", "tp5", "tp6") ~ "B_NH",
    .default = NA_character_
)

annot$timepointexp <- bulk_timepoint_map_mll[annot$timepoint]
annot$timepointexp <- factor(annot$timepointexp, levels = unname(bulk_timepoint_map_mll))

Reformat data

mprop <- get.fraction(bp = bayesprism,
                      which.theta = "final",
                      state.or.type = "type")
cdm <- as.data.frame(annot[rownames(mprop), , drop = FALSE])

mprop <- as.data.frame(mprop) |> 
    rownames_to_column("names") |>
    pivot_longer(names_to = "celltype", values_to = "proportion", 
                 -names) |>
    left_join(cdm) |>
    group_by(celltype) |>
    mutate(ndet = sum(proportion > 0.01)) |>
    filter(!is.na(protocol)) 
Joining with `by = join_by(names)`
# Replicate the pre-diff timepoint also for the B_NH protocol
prediff <- mprop |>
    filter(timepointexp == "Pre-diff")
prediff_bnh <- prediff |>
    mutate(protocol = "B_NH")
mprop <- bind_rows(mprop, prediff_bnh)

Figure 2D, Extended Data 8A (deconvolution)

Keep only cell types for which the proportion in at least two samples is above 1%. Put together t0-t3 (no breakdown) and t4-t6 (breakdown)

(p_area_all <- ggplot(mprop |> filter(ndet >= 2), 
                      aes(x = timepointexp, y = proportion,
                          group = celltype, fill = celltype)) + 
        geom_area() + 
        scale_fill_manual(values = tissue_leiden_res0.4_annot_colors, 
                          name = "") + 
        guides(fill = guide_legend(nrow = 5)) + 
        new_scale_fill() + 
        geom_rect(data = data.frame(
            donor = "B080", protocol = rep(c("IVMC", "B_NH"), c(7, 7)),
            timepointexp = c(levels(mprop$timepointexp),
                             levels(mprop$timepointexp)),
            xmin = c(1:7, 1:7) - 0.45, xmax = c(1:7, 1:7) + 0.45,
            ymin = rep(0, 14), ymax = rep(-0.85, 14)),
            aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,
                fill = timepointexp), inherit.aes = FALSE, 
            show.legend = FALSE) +
        scale_fill_manual(values = alpha(bulk_timepoint_colors, 0.5)) + 
        theme_cowplot() + 
        labs(x = "", y = "Proportion") + 
        scale_y_continuous(expand = expansion(mult = c(0, 0), add = c(0, 0))) +
        coord_cartesian(clip = "off", ylim = c(0, 1.05)) + 
        theme(legend.position = "bottom", 
              axis.text.x = element_text(size = 16, angle = 90, hjust = 0.5, vjust = 0.5),
              axis.title = element_text(size = 20),
              legend.text = element_text(size = 16),
              strip.text = element_text(size = 16),
              panel.border = element_rect(fill = NA, colour = "grey20"),
              strip.background = element_rect(color = "black", fill = "white")) + 
        facet_grid(donor ~ protocol, scales = "free_x", space = "free_x"))

pdf("paper_figures_deconvolution/areaplot_allclusters.pdf", height = 18, width = 9)
p_area_all
dev.off()
png 
  2 
# now do the same with average values for all lines 
table(paste(mprop$timepointexp, mprop$breakdown, mprop$hormones, sep = "_"), 
      mprop$protocol, useNA = "ifany")
                          
                           B_NH IVMC
  E2-diff\n24h_B_H            0   84
  E2-diff\n24h_B_NH          84    0
  E2-diff\n48h_B_H            0   84
  E2-diff\n48h_B_NH          84    0
  Horm\nWithdr 24h_NB_H       0   84
  Horm\nWithdr 24h_NB_NH     84    0
  Horm\nWithdr 48h_NB_H       0   84
  Horm\nWithdr 48h_NB_NH     84    0
  P4-diff_NB_H                0   84
  P4-diff_NB_NH              84    0
  Post-breakdown\n24h_B_H     0   84
  Post-breakdown\n24h_B_NH   84    0
  Pre-diff_NB_NH             84   84
(p_area_avg <- ggplot(mprop |> 
                          filter(ndet >= 2) |>
                          group_by(celltype, timepointexp, protocol) |> 
                          summarize(proportion = mean(proportion), .groups = "drop") |>
                          filter(protocol == "IVMC"), 
                      aes(x = timepointexp, y = proportion,
                          group = celltype, fill = celltype)) + 
        geom_area() + 
        scale_fill_manual(values = tissue_leiden_res0.4_annot_colors, name = "") + 
        new_scale_fill() + 
        geom_rect(data = data.frame(
            timepointexp = levels(mprop$timepointexp),
            xmin = (1:7) - 0.35, xmax = (1:7) + 0.35,
            ymin = rep(0, 7), ymax = rep(-0.95, 7)),
            aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,
                fill = timepointexp), inherit.aes = FALSE, 
            show.legend = FALSE) +
        scale_fill_manual(values = alpha(bulk_timepoint_colors, 0.5)) + 
        theme_cowplot() + 
        labs(x = "", y = "Proportion") + 
        scale_y_continuous(expand = expansion(mult = c(0, 0), add = c(0, 0))) +
        coord_cartesian(clip = "off", ylim = c(0, 1.05)) + 
        theme(legend.position = "bottom", 
              axis.text.y = element_text(size = 16),
              axis.title.y = element_text(size = 20),
              axis.text.x = element_text(size = 20, angle = 90, hjust = 0.5, vjust = 0.5)))

pdf("paper_figures_deconvolution/areaplot_allclusters_avg_donors.pdf", height = 5, width = 10)
p_area_avg + theme(legend.position = "none")
dev.off()
png 
  2 
png("paper_figures_deconvolution/areaplot_allclusters_avg_donors.png", height = 5, width = 10, 
    unit = "in", res = 400)
p_area_avg + theme(legend.position = "none")
dev.off()
png 
  2 
pdf("paper_figures_deconvolution/areaplot_allclusters_avg_donors_legend.pdf", 
    height = 1.75, width = 11)
grid.newpage()
grid.draw(get_legend2(p_area_avg + theme(legend.position = "bottom",
                                         legend.text = element_text(size = 12))))
dev.off()
png 
  2 

Figure 2E, Extended Data 8B (deconvolution box plots)

(p_box <- ggplot(
    mprop |> 
        filter(ndet >= 2) |>
        mutate(celltype = replace(celltype, celltype == "hormone responsive late",
                                  "hormone\nresponsive\nlate")) |>
        mutate(celltype = replace(celltype, celltype == "hormone responsive early",
                                  "hormone\nresponsive\nearly")) |>
        mutate(celltype = replace(celltype, celltype == "secretory late",
                                  "secretory\nlate")) |>
        mutate(celltype = replace(celltype, celltype == "secretory mid",
                                  "secretory\nmid")) |>
        mutate(celltype = replace(celltype, celltype == "secretory early",
                                  "secretory\nearly")) |>
        mutate(celltype = replace(celltype, celltype == "luminal late",
                                  "luminal\nlate")) |>
        mutate(celltype = replace(celltype, celltype == "luminal early",
                                  "luminal\nearly")) |>
        mutate(celltype = replace(celltype, celltype == "transcriptionally active",
                                  "trans-\ncriptionally\nactive")), 
    aes(x = timepointexp, y = proportion)) + 
        geom_boxplot(position = position_dodge2(0.75, preserve = "single"), 
                     outlier.size = -1, alpha = 0.5) + 
        geom_jitter(position = position_dodge2(0.75, preserve = "single"), 
                    size = 2, aes(color = donor)) + 
        geom_rect(data = data.frame(
            celltype = "trans-\ncriptionally\nactive", 
            protocol = rep(c("IVMC", "B_NH"), c(7, 7)),
            timepointexp = c(levels(mprop$timepointexp),
                             levels(mprop$timepointexp)),
            xmin = c(1:7, 1:7) - 0.45, xmax = c(1:7, 1:7) + 0.45,
            ymin = rep(-0.05, 14), ymax = rep(-0.9, 14)),
            aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,
                fill = timepointexp), inherit.aes = FALSE, 
            show.legend = FALSE) +
        scale_fill_manual(values = alpha(bulk_timepoint_colors, 0.5)) + 
        guides(color = guide_legend(override.aes = list(size = 4))) + 
        facet_grid(celltype ~ protocol,
                   scales = "free") + 
        labs(x = "", y = "Proportion") + 
        scale_y_continuous(expand = expansion(mult = c(0, 0), add = c(0.05, 0.05))) +
        coord_cartesian(clip = "off", ylim = c(0, NA)) + 
        theme_cowplot() + 
        theme(axis.text.y = element_text(size = 12),
              axis.text.x = element_text(size = 16, angle = 90, hjust = 0.5, vjust = 0.5),
              axis.title = element_text(size = 20),
              strip.text = element_text(size = 14),
              panel.border = element_rect(fill = NA, colour = "grey20"),
              strip.background = element_rect(fill = "white", color = "black")) + 
        scale_color_manual(values = donor_colors, name = "Donor"))

pdf("paper_figures_deconvolution/boxplot_allclusters_proportions.pdf", height = 18, width = 9)
p_box + theme(legend.position = "none")
dev.off()
png 
  2 
pdf("paper_figures_deconvolution/boxplot_allclusters_proportions_legend.pdf", height = 3, width = 2)
grid.newpage()
grid.draw(get_legend2(p_box + theme(legend.position = "right",
                                    legend.text = element_text(size = 14),
                                    legend.title = element_text(size = 16))))
dev.off()
png 
  2 
mpropsub <- mprop |> 
    filter(celltype %in% c("cycling", "luminal early"),
           protocol == "IVMC") |>
    mutate(celltype = factor(celltype, 
                             levels = c("luminal early", 
                                        "cycling")))
sz <- mpropsub |> 
    group_by(celltype) |>
    summarize(rng = 0.1 + 1.0 * (max(proportion) - min(proportion)))
(p_box_sub <- ggplot(mpropsub, 
                     aes(x = timepointexp, y = proportion)) + 
        geom_boxplot(position = position_dodge2(0.75, preserve = "single"), 
                     outlier.size = -1, alpha = 0.5) + 
        geom_jitter(position = position_dodge2(0.75, preserve = "single"), 
                    size = 2, aes(color = donor)) + 
        geom_rect(data = data.frame(
            timepointexp = rep(levels(mpropsub$timepointexp), nlevels(mpropsub$celltype)),
            celltype = factor(rep(levels(mpropsub$celltype), each = nlevels(mpropsub$timepointexp)),
                              c("luminal early", "cycling")),
            xmin = rep((1:nlevels(mpropsub$timepointexp)) - 0.35, nlevels(mpropsub$celltype)), 
            xmax = rep((1:nlevels(mpropsub$timepointexp)) + 0.35, nlevels(mpropsub$celltype)),
            ymin = rep(rep(-0.05, nlevels(mpropsub$timepointexp)), nlevels(mpropsub$celltype)), 
            ymax = rep(rep(-0.9, nlevels(mpropsub$timepointexp)), nlevels(mpropsub$celltype))) |>
                left_join(sz) |> mutate(ymax = ymax * rng / rng[celltype == "cycling"]),
            aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,
                fill = timepointexp), inherit.aes = FALSE, 
            show.legend = FALSE) +
        scale_fill_manual(values = alpha(bulk_timepoint_colors, 0.5)) + 
        facet_wrap2(
            ~ celltype, nrow = 1, 
            strip = strip_themed(clip = "off", background_x = elem_list_rect(
                fill = alpha(tissue_leiden_res0.4_annot_colors[levels(mpropsub$celltype)], 1))), 
            scales = "free_y") + 
        labs(x = "", y = "Proportion") + 
        scale_y_continuous(expand = expansion(mult = c(0, 0), add = c(0.05, 0.05))) +
        coord_cartesian(clip = "off", ylim = c(0, NA)) + 
        theme_cowplot() + 
        theme(strip.text = element_text(size = 20, hjust = 0.5, 
                                        margin = margin(0.15, 0, 0.15, 0, "cm")),
              strip.background = element_rect(fill = "white", color = "black"),
              axis.text.x = element_text(size = 20, angle = 90, hjust = 0.5, vjust = 0.5),
              axis.text.y = element_text(size = 16),
              legend.title = element_text(size = 20),
              legend.text = element_text(size = 20),
              panel.border = element_rect(fill = NA, colour = "grey20"),
              axis.title = element_text(size = 20)) + 
        scale_color_manual(values = donor_colors, name = "Donor"))
Joining with `by = join_by(celltype)`

pdf("paper_figures_deconvolution/boxplot_sub_proportions.pdf", height = 5.5, width = 15)
p_box_sub
dev.off()
png 
  2 
for (ct in c("cycling", "luminal early")) {
    tmp <- mpropsub |> filter(celltype == ct) |> droplevels()
    (p_box_sub <- ggplot(tmp, aes(x = timepointexp, y = proportion)) + 
            geom_boxplot(position = position_dodge2(0.75, preserve = "single"), 
                         outlier.size = -1, alpha = 0.5) + 
            geom_jitter(position = position_dodge2(0.75, preserve = "single"), 
                        size = 2, aes(color = donor)) + 
            geom_rect(data = data.frame(
                timepointexp = rep(levels(tmp$timepointexp), 1),
                celltype = rep(ct, each = 7),
                xmin = rep((1:7) - 0.35, 1), xmax = rep((1:7) + 0.35, 1),
                ymin = rep(rep(-0.05, 7), 1), ymax = rep(rep(-0.9, 7), 1)) |>
                    left_join(sz) |> mutate(ymax = ymax * rng / sz$rng[sz$celltype == "cycling"]),
                aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax,
                    fill = timepointexp), inherit.aes = FALSE, 
                show.legend = FALSE) +
            scale_fill_manual(values = alpha(bulk_timepoint_colors, 0.5)) + 
            facet_wrap2(
                ~ celltype, nrow = 1, 
                strip = strip_themed(clip = "off", background_x = elem_list_rect(
                    fill = alpha(tissue_leiden_res0.4_annot_colors[levels(tmp$celltype)], 1))), 
                scales = "free_y") + 
            labs(x = "", y = "Proportion") + 
            scale_y_continuous(expand = expansion(mult = c(0, 0), add = c(0.05, 0.05))) +
            coord_cartesian(clip = "off", ylim = c(0, NA)) + 
            theme_cowplot() + 
            theme(strip.text = element_text(size = 20, hjust = 0.5, 
                                            margin = margin(0.15, 0, 0.15, 0, "cm")),
                  strip.background = element_rect(fill = "white", color = "black"),
                  axis.text.x = element_text(size = 20, angle = 90, hjust = 0.5, vjust = 0.5),
                  axis.text.y = element_text(size = 16),
                  legend.title = element_text(size = 20),
                  legend.text = element_text(size = 20),
                  panel.border = element_rect(fill = NA, colour = "grey20"),
                  axis.title = element_text(size = 20)) + 
            scale_color_manual(values = donor_colors, name = "Donor"))
    
    pdf(paste0("paper_figures_deconvolution/boxplot_sub_proportions_", ct, ".pdf"), 
        height = 5.75, width = 9)
    print(p_box_sub)
    dev.off()
}
Joining with `by = join_by(celltype)`
Joining with `by = join_by(celltype)`

Session info

sessionInfo()
R version 4.4.2 (2024-10-31)
Platform: x86_64-pc-linux-gnu
Running under: Rocky Linux 8.10 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: FlexiBLAS OPENBLAS;  LAPACK version 3.12.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: Europe/Zurich
tzcode source: system (glibc)

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] ggh4x_0.3.0         ggnewscale_0.5.1    tibble_3.2.1       
 [4] BayesPrism_2.2.2    NMF_0.28            bigmemory_4.6.4    
 [7] Biobase_2.66.0      BiocGenerics_0.52.0 cluster_2.1.6      
[10] rngtools_1.5.2      registry_0.5-1      snowfall_1.84-6.3  
[13] snow_0.4-4          cowplot_1.1.3       tidyr_1.3.1        
[16] ggplot2_3.5.2       dplyr_1.1.4        

loaded via a namespace (and not attached):
 [1] bitops_1.0-9                rlang_1.1.6                
 [3] magrittr_2.0.3              gridBase_0.4-7             
 [5] matrixStats_1.5.0           compiler_4.4.2             
 [7] vctrs_0.6.5                 reshape2_1.4.4             
 [9] stringr_1.5.1               shape_1.4.6.1              
[11] pkgconfig_2.0.3             crayon_1.5.3               
[13] fastmap_1.2.0               XVector_0.46.0             
[15] labeling_0.4.3              scuttle_1.16.0             
[17] caTools_1.18.3              rmarkdown_2.29             
[19] UCSC.utils_1.2.0            purrr_1.0.4                
[21] xfun_0.52                   bluster_1.16.0             
[23] zlibbioc_1.52.0             beachmat_2.22.0            
[25] GenomeInfoDb_1.42.3         jsonlite_2.0.0             
[27] DelayedArray_0.32.0         uuid_1.2-1                 
[29] BiocParallel_1.40.2         irlba_2.3.5.1              
[31] parallel_4.4.2              R6_2.6.1                   
[33] stringi_1.8.7               RColorBrewer_1.1-3         
[35] limma_3.62.2                GenomicRanges_1.58.0       
[37] Rcpp_1.0.14                 SummarizedExperiment_1.36.0
[39] iterators_1.0.14            knitr_1.50                 
[41] IRanges_2.40.1              Matrix_1.7-1               
[43] igraph_2.1.4                tidyselect_1.2.1           
[45] dichromat_2.0-0.1           abind_1.4-8                
[47] yaml_2.3.10                 doParallel_1.0.17          
[49] gplots_3.2.0                codetools_0.2-20           
[51] lattice_0.22-6              plyr_1.8.9                 
[53] withr_3.0.2                 evaluate_1.0.3             
[55] circlize_0.4.16             pillar_1.10.2              
[57] BiocManager_1.30.25         MatrixGenerics_1.18.1      
[59] KernSmooth_2.23-24          foreach_1.5.2              
[61] stats4_4.4.2                generics_0.1.3             
[63] S4Vectors_0.44.0            scales_1.3.0.9000          
[65] gtools_3.9.5                glue_1.8.0                 
[67] metapod_1.14.0              tools_4.4.2                
[69] BiocNeighbors_2.0.1         ScaledMatrix_1.14.0        
[71] locfit_1.5-9.12             scran_1.34.0               
[73] edgeR_4.4.2                 colorspace_2.1-1           
[75] SingleCellExperiment_1.28.1 GenomeInfoDbData_1.2.13    
[77] BiocSingular_1.22.0         cli_3.6.4                  
[79] rsvd_1.0.5                  bigmemory.sri_0.1.8        
[81] S4Arrays_1.6.0              gtable_0.3.6               
[83] digest_0.6.37               SparseArray_1.6.2          
[85] dqrng_0.4.1                 htmlwidgets_1.6.4          
[87] farver_2.1.2                htmltools_0.5.8.1          
[89] lifecycle_1.0.4             httr_1.4.7                 
[91] GlobalOptions_0.1.2         statmod_1.5.0