Generating plots for paper (scRNA-seq time course)

Published

July 2, 2025

Load packages

suppressPackageStartupMessages({
    library(SingleCellExperiment)
    library(scuttle)
    library(scater)
    library(dplyr)
    library(tidyr)
    library(ggplot2)
    library(cowplot)
    library(ComplexHeatmap)
    library(circlize)
    library(ggtext)
    library(scales)
    library(ggh4x)
    library(swissknife)
    library(BiocParallel)
    library(Matrix)
    library(gprofiler2)
    library(forcats)
})

source("../params.R")
source("../helpers.R")

if (!file.exists("paper_figures")) {
    dir.create("paper_figures", showWarnings = FALSE, recursive = FALSE)
}
if (!file.exists("gprofiler")) {
    dir.create("gprofiler", showWarnings = FALSE, recursive = FALSE)
}

Load data

organ <- readRDS("organoid_scrnaseq_timecourse.rds")

# check for consistency
stopifnot(all(organ$leiden_res0.125_annot %in% sc_leiden_res0.125_annot_order))
stopifnot(all(organ$pred_from_invivo %in% tissue_leiden_res0.4_annot_order))

organ$leiden_res0.125_annot <- factor(organ$leiden_res0.125_annot,
                                      levels = sc_leiden_res0.125_annot_order)
organ$pred_from_invivo <- factor(organ$pred_from_invivo, 
                                 levels = tissue_leiden_res0.4_annot_order)

stopifnot(all(organ$timepoint %in% sc_timepoint_order))
organ$timepoint <- factor(organ$timepoint, 
                          levels = sc_timepoint_order)

Numeric summaries

# number of genes and cells
dim(organ)
[1] 20231 85628

Figure 4B (UMAPs for time points)

x <- makePerCellDF(organ, use.coldata = TRUE, use.dimred = TRUE)

(umap_facet <- ggplot(x, aes(x = UMAP_sketch.1, y = UMAP_sketch.2)) + 
        geom_point(data = transform(x, timepoint = NULL), 
                   color = "grey95", alpha = 1, size = 0.5) + 
        geom_point(aes(color = timepoint), alpha = 0.5, size = 0.5) + 
        facet_wrap2(~ timepoint, nrow = 1, 
                    strip = strip_themed(clip = "off", background_x = elem_list_rect(
                        fill = alpha(sc_timepoint_colors[levels(x$timepoint)], 0.5)))) + 
        scale_color_manual(values = sc_timepoint_colors) + 
        theme_void() + 
        theme(axis.text = element_blank(),
              axis.title = element_blank(),
              strip.text = element_text(size = 20, hjust = 0.5, 
                                        margin = margin(0.15, 0, 0.15, 0, "cm")),
              legend.position = "none",
              panel.background = element_rect(fill = "white", color = NA),
              panel.grid.major = element_blank(),
              panel.grid.minor = element_blank(),
              axis.line = element_line(color = "black"),
              strip.background = element_rect(fill = "white", color = "white")
        ))

png("paper_figures/UMAP_timepoints_separate.png", 
    height = 4, width = 22, units = "in", res = 500)
umap_facet
dev.off()
png 
  2 

Figure 4C (UMAP with clusters)

(umap_clusters <- plotReducedDim(
    organ, dimred = "UMAP_sketch", 
    colour_by = "leiden_res0.125_annot", point_size = 0.5) + 
        scale_color_manual(values = sc_leiden_res0.125_annot_colors, name = "") + 
        guides(color = guide_legend(override.aes = list(size = 5, alpha = 1, 
                                                        shape = 15), 
                                    ncol = 2)) + 
        theme_void() + 
        theme(legend.text = element_text(size = 12)))
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.

png("paper_figures/UMAP_organoids_clusters.png", 
    height = 4, width = 4, units = "in", res = 500)
umap_clusters + theme(legend.position = "none")
dev.off()
png 
  2 
pdf("paper_figures/UMAP_organoids_clusters_legend.pdf", 
    height = 3, width = 5)
grid.newpage()
grid.draw(get_legend2(umap_clusters + 
                          theme(legend.position = "bottom",
                                legend.text = element_text(size = 12))))
dev.off()
png 
  2 
pdf("paper_figures/UMAP_organoids_clusters.pdf", height = 4, width = 4)
umap_clusters + theme(legend.position = "none")
dev.off()
png 
  2 

Figure 4H (predictions from atlas)

(umap_preds <- plotReducedDim(
    organ, "UMAP_sketch", colour_by = "pred_from_invivo", point_size = 0.5) + 
        scale_color_manual(values = tissue_leiden_res0.4_annot_colors, name = "") +  
        theme_void() + 
        theme(legend.text = element_text(size = 12),       
              legend.title = element_text(size = 12),      
              legend.key.size = unit(1.5, "lines")) + 
        guides(color = guide_legend(override.aes = list(size = 4), title = NULL))
)
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.

png("paper_figures/UMAP_organoids_pred_from_invivo.png", 
    height = 4, width = 4, units = "in", res = 500)
umap_preds + theme(legend.position = "none")
dev.off()
png 
  2 
pdf("paper_figures/UMAP_organoids_pred_from_invivo_legend.pdf", 
    height = 1.75, width = 9)
grid.newpage()
grid.draw(get_legend2(umap_preds + theme(legend.position = "bottom",
                                         legend.text = element_text(size = 12))))
dev.off()
png 
  2 

Figure 4E (dot plot)

markers <- intersect(sc_timecourse_markers, rownames(organ))

(dotplot_norm <- .makeDotPlot(
    dat = organ, markers = markers, 
    cluster_column = "leiden_res0.125_annot", 
    cluster_order = sc_leiden_res0.125_annot_order, 
    cluster_colors = sc_leiden_res0.125_annot_colors, 
    transpose = TRUE, 
    split_by_ciliated = TRUE))
Warning: Removed 447 rows containing missing values or values outside the scale range
(`geom_point()`).

pdf("paper_figures/dotplot_organoids_normalized.pdf", height = 6, width = 20)
dotplot_norm
Warning: Removed 447 rows containing missing values or values outside the scale range
(`geom_point()`).
dev.off()
png 
  2 
png("paper_figures/dotplot_organoids_normalized.png", height = 6, width = 20, 
    unit = "in", res = 500)
dotplot_norm
Warning: Removed 447 rows containing missing values or values outside the scale range
(`geom_point()`).
dev.off()
png 
  2 

Dot plot of WNT ligands and FZD receptors

fzd <- paste0("FZD", 1:10)

(dotplot_norm_fzd <- .makeDotPlot(
    dat = organ, markers = fzd, 
    cluster_column = "leiden_res0.125_annot", 
    cluster_order = sc_leiden_res0.125_annot_order, 
    cluster_colors = sc_leiden_res0.125_annot_colors, 
    transpose = TRUE, 
    split_by_ciliated = FALSE))
Warning: Removed 118 rows containing missing values or values outside the scale range
(`geom_point()`).

pdf("paper_figures/dotplot_organoids_fzd_normalized.pdf", height = 6, width = 7)
dotplot_norm_fzd
Warning: Removed 118 rows containing missing values or values outside the scale range
(`geom_point()`).
dev.off()
png 
  2 
png("paper_figures/dotplot_organoids_fzd_normalized.png", height = 6, width = 7, 
    unit = "in", res = 500)
dotplot_norm_fzd
Warning: Removed 118 rows containing missing values or values outside the scale range
(`geom_point()`).
dev.off()
png 
  2 

Figures 4D + 4G + 4I, Extended Data 9G (relative abundance over time)

Time course clusters

df <- data.frame(colData(organ)[, c("timepoint", "leiden_res0.125_annot")]) |>
    group_by(timepoint, leiden_res0.125_annot) |>
    tally() |>
    group_by(timepoint) |>
    mutate(perc = n / sum(n) * 100) |>
    ungroup() |>
    complete(timepoint, leiden_res0.125_annot, fill = list(n = 0, perc = 0)) |>
    mutate(cluster_group = ifelse(grepl("ciliated", leiden_res0.125_annot), 
                                  "ciliated", "non-ciliated"))

dfdonor <- data.frame(colData(organ)[, c("timepoint", "donor", "leiden_res0.125_annot")]) |>
    group_by(timepoint, donor, leiden_res0.125_annot) |>
    tally() |>
    group_by(timepoint, donor) |>
    mutate(perc = n / sum(n) * 100) |>
    ungroup() |>
    complete(timepoint, donor, leiden_res0.125_annot, fill = list(n = 0, perc = 0))

# timepoint vs. relative abundance
# ... ciliated
(plot_relab_cil <- ggplot(df |> dplyr::filter(cluster_group == "ciliated") |>
                              droplevels(), 
                          aes(x = timepoint, y = perc, color = leiden_res0.125_annot)) + 
        geom_point() + 
        geom_line(aes(x = as.numeric(timepoint)), linewidth = 1) + 
        theme_cowplot() + 
        labs(x = "", y = "Relative\nabundance (%)",
             title = "") + 
        theme(legend.position = "bottom",
              axis.text.x = element_text(angle = 45, hjust = 1, size = 16),
              axis.text.y = element_text(size = 16),
              axis.title = element_text(size = 16),
              strip.text = element_text(size = 16, margin = margin(0.2, 0, 0.2, 0, "cm")),
              strip.background = element_rect(fill = "white", color = "white")) + 
        facet_wrap(~ cluster_group, ncol = 1) + 
        scale_color_manual(values = sc_leiden_res0.125_annot_colors, name = ""))

pdf(paste0("paper_figures/relative_abundance_leiden_res0.125_annot_ciliated.pdf"), 
    height = 4.5, width = 6)
print(plot_relab_cil + theme(legend.position = "none"))
dev.off()
png 
  2 
pdf("paper_figures/relative_abundance_leiden_res0.125_annot_ciliated_legend.pdf", 
    height = 3, width = 4)
grid.newpage()
grid.draw(get_legend2(plot_relab_cil + 
                          theme(legend.position = "bottom",
                                legend.text = element_text(size = 12)) + 
                          guides(color = guide_legend(ncol = 1))))
dev.off()
png 
  2 
# ... non-ciliated
(plot_relab_noncil <- ggplot(df |> dplyr::filter(cluster_group == "non-ciliated") |>
                                 droplevels(), 
                             aes(x = timepoint, y = perc, color = leiden_res0.125_annot)) + 
        geom_point() + 
        geom_line(aes(x = as.numeric(timepoint)), linewidth = 1) + 
        theme_cowplot() + 
        labs(x = "", y = "Relative\nabundance (%)",
             title = "") + 
        theme(legend.position = "bottom",
              axis.text.x = element_text(angle = 45, hjust = 1, size = 16),
              axis.text.y = element_text(size = 16),
              axis.title = element_text(size = 16),
              strip.text = element_text(size = 16, margin = margin(0.2, 0, 0.2, 0, "cm")),
              strip.background = element_rect(fill = "white", color = "white")) + 
        facet_wrap(~ cluster_group, ncol = 1) + 
        scale_color_manual(values = sc_leiden_res0.125_annot_colors, name = ""))

pdf(paste0("paper_figures/relative_abundance_leiden_res0.125_annot_nonciliated.pdf"), 
    height = 4.5, width = 6)
print(plot_relab_noncil + theme(legend.position = "none"))
dev.off()
png 
  2 
pdf("paper_figures/relative_abundance_leiden_res0.125_annot_nonciliated_legend.pdf", 
    height = 2, width = 4.5)
grid.newpage()
grid.draw(get_legend2(plot_relab_noncil + 
                          theme(legend.position = "bottom",
                                legend.text = element_text(size = 12)) + 
                          guides(color = guide_legend(ncol = 2))))
dev.off()
png 
  2 
# one panel per cluster
(plot_relab_facet <- ggplot(
    bind_rows(df |> mutate(donor = "overall"), dfdonor), 
    aes(x = timepoint, y = perc, group = donor, 
        alpha = donor == "overall", 
        linewidth = donor == "overall", color = leiden_res0.125_annot)) + 
        geom_point() + 
        geom_line(aes(x = as.numeric(timepoint))) + 
        scale_alpha_manual(values = c(`TRUE` = 1, `FALSE` = 0.25)) + 
        scale_linewidth_manual(values = c(`TRUE` = 2, `FALSE` = 1)) + 
        theme_cowplot() + 
        labs(x = "", y = "Relative abundance (%)",
             title = "") + 
        scale_color_manual(values = sc_leiden_res0.125_annot_colors, name = "") + 
        facet_wrap(~ leiden_res0.125_annot, scales = "free_y") + 
        guides(alpha = "none", linewidth = "none") + 
        theme(legend.position = "none",
              axis.text.x = element_text(angle = 45, hjust = 1),
              strip.text = element_text(size = 16),
              strip.background = element_rect(fill = "white", color = "white")))

pdf(paste0("paper_figures/relative_abundance_cluster_leiden_res0.125_annot.pdf"), 
    height = 9, width = 14)
print(plot_relab_facet)
dev.off()
png 
  2 
# bar plot
(plotbar <- ggplot(df, aes(x = fct_rev(timepoint), y = perc, fill = leiden_res0.125_annot)) + 
        geom_col() + 
        theme_cowplot() + 
        labs(x = "", y = "Relative abundance (%)", title = "") +
        coord_flip() + 
        theme(legend.position = "bottom",
              axis.text = element_text(size = 20),
              axis.title = element_text(size = 20)) + 
        scale_fill_manual(values = sc_leiden_res0.125_annot_colors, name = ""))

pdf("paper_figures/relative_abundance_barplot_leiden_res0.125_annot.pdf", 
    height = 5, width = 8)
plotbar + theme(legend.position = "none")
dev.off()
png 
  2 
png("paper_figures/relative_abundance_barplot_leiden_res0.125_annot.png", 
    height = 5, width = 8, unit = "in", res = 400)
plotbar + theme(legend.position = "none")
dev.off()
png 
  2 
pdf("paper_figures/relative_abundance_barplot_leiden_res0.125_annot_legend.pdf", 
    height = 1.75, width = 9)
grid.newpage()
grid.draw(get_legend2(plotbar + theme(legend.position = "bottom",
                                      legend.text = element_text(size = 12))))
dev.off()
png 
  2 

Predicted labels from atlas

df <- data.frame(colData(organ)[, c("timepoint", "pred_from_invivo")]) |>
    group_by(timepoint, pred_from_invivo) |>
    tally() |>
    group_by(timepoint) |>
    mutate(perc = n / sum(n) * 100) |>
    ungroup() |>
    complete(timepoint, pred_from_invivo, fill = list(n = 0, perc = 0))

dfdonor <- data.frame(colData(organ)[, c("timepoint", "donor", "pred_from_invivo")]) |>
    group_by(timepoint, donor, pred_from_invivo) |>
    tally() |>
    group_by(timepoint, donor) |>
    mutate(perc = n / sum(n) * 100) |>
    ungroup() |>
    complete(timepoint, donor, pred_from_invivo, fill = list(n = 0, perc = 0))

# timepoint vs. relative abundance
(plot_relab <- ggplot(df, aes(x = timepoint, y = perc, color = pred_from_invivo)) + 
        geom_point() + 
        geom_line(aes(x = as.numeric(timepoint)), linewidth = 1) + 
        theme_cowplot() + 
        labs(x = "", y = "Relative\nabundance (%)",
             title = "") + 
        theme(legend.position = "bottom",
              axis.text.x = element_text(angle = 45, hjust = 1, size = 16),
              axis.text.y = element_text(size = 16),
              axis.title = element_text(size = 16),
              strip.background = element_rect(fill = "white", color = "black")) + 
        scale_color_manual(values = tissue_leiden_res0.4_annot_colors, name = ""))

pdf(paste0("paper_figures/relative_abundance_pred_from_invivo.pdf"), 
    height = 4, width = 6)
print(plot_relab + theme(legend.position = "none"))
dev.off()
png 
  2 
pdf("paper_figures/relative_abundance_pred_from_invivo_legend.pdf", 
    height = 1.75, width = 11)
grid.newpage()
grid.draw(get_legend2(plot_relab + theme(legend.position = "bottom",
                                         legend.text = element_text(size = 12))))
dev.off()
png 
  2 
# one panel per cluster
(plot_relab_facet <- ggplot(
    bind_rows(df |> mutate(donor = "overall"), dfdonor), 
    aes(x = timepoint, y = perc, group = donor, 
        alpha = donor == "overall", 
        linewidth = donor == "overall", color = pred_from_invivo)) + 
        geom_point() + 
        geom_line(aes(x = as.numeric(timepoint))) + 
        scale_alpha_manual(values = c(`TRUE` = 1, `FALSE` = 0.25)) + 
        scale_linewidth_manual(values = c(`TRUE` = 2, `FALSE` = 1)) + 
        theme_cowplot() + 
        labs(x = "", y = "Relative abundance (%)",
             title = "") + 
        theme(legend.position = "none",
              axis.text.x = element_text(angle = 45, hjust = 1)) + 
        scale_color_manual(values = tissue_leiden_res0.4_annot_colors, name = "") + 
        facet_wrap(~ pred_from_invivo, scales = "free_y") + 
        guides(alpha = "none", linewidth = "none") + 
        theme(strip.text = element_text(size = 16),
              strip.background = element_rect(fill = "white", color = "white")))

pdf(paste0("paper_figures/relative_abundance_cluster_pred_from_invivo.pdf"), 
    height = 9, width = 14)
print(plot_relab_facet)
dev.off()
png 
  2 
# bar plot
(plotbar <- ggplot(df, aes(x = timepoint, y = perc, fill = pred_from_invivo)) + 
        geom_col() + 
        theme_cowplot() + 
        labs(x = "", y = "Relative abundance (%)", title = "") +
        coord_flip() + 
        theme(legend.position = "bottom",
              axis.text = element_text(size = 20),
              axis.title = element_text(size = 20)) + 
        scale_fill_manual(values = tissue_leiden_res0.4_annot_colors, name = ""))

pdf("paper_figures/relative_abundance_barplot_pred_from_invivo.pdf", 
    height = 5, width = 8)
plotbar + theme(legend.position = "none")
dev.off()
png 
  2 
pdf("paper_figures/relative_abundance_barplot_pred_from_invivo_legend.pdf", 
    height = 1.75, width = 11)
grid.newpage()
grid.draw(get_legend2(plotbar + theme(legend.position = "bottom",
                                      legend.text = element_text(size = 12))))
dev.off()
png 
  2 

Extended Data 9 (UMAPs)

(umap_donor <- plotReducedDim(
    organ, "UMAP_sketch", colour_by = "donorCol",
    point_size = 0.5) +
        scale_color_manual(values = donor_colors) + 
        guides(color = guide_legend(override.aes = list(size = 4),
                                    title = "Donor")) + 
        theme_void() + 
        theme(axis.title = element_blank(),
              axis.text = element_blank(),
              legend.text = element_text(size = 20),      
              legend.title = element_text(size = 20),      
              legend.key.size = unit(1.5, "lines")))
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.

png("paper_figures/UMAP_organoids_donorCol.png", 
    height = 4, width = 7, units = "in", res = 500)
plot_grid(
    umap_donor + theme(legend.position = "none"),
    get_legend2(umap_donor),
    nrow = 1, rel_widths = c(4, 3)
)
dev.off()
png 
  2 
(umap_cc <- plotReducedDim(
    organ, "UMAP_sketch", colour_by = "SeuratCC",
    point_size = 0.5) +
        scale_color_manual(values = cc_colors) + 
        guides(color = guide_legend(override.aes = list(size = 4),
                                    title = "Cell cycle")) + 
        theme_void() + 
        theme(axis.title = element_blank(),
              axis.text = element_blank(),
              legend.text = element_text(size = 20),      
              legend.title = element_text(size = 20),      
              legend.key.size = unit(1.5, "lines")))
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.

png("paper_figures/UMAP_organoids_seuratCC.png", 
    height = 4, width = 7, units = "in", res = 500)
plot_grid(
    umap_cc + theme(legend.position = "none"),
    get_legend2(umap_cc),
    nrow = 1, rel_widths = c(4, 3)
)
dev.off()
png 
  2 
(umap_tricycle <- plotReducedDim(
    organ, "UMAP_sketch", colour_by = "tricyclePosition",
    point_size = 0.5) +   
        guides(color = guide_colorbar(title = "tricycle\nposition")) + 
        theme_void() + 
        theme(axis.title = element_blank(),
              axis.text = element_blank(),
              legend.text = element_text(size = 20),      
              legend.title = element_text(size = 20),      
              legend.key.size = unit(1.5, "lines")))

png("paper_figures/UMAP_organoids_tricyclePosition.png", 
    height = 4, width = 7, units = "in", res = 500)
plot_grid(
    umap_tricycle + theme(legend.position = "none"),
    get_legend2(umap_tricycle),
    nrow = 1, rel_widths = c(4, 3)
)
dev.off()
png 
  2 
(umap_intronic <- plotReducedDim(
    organ, "UMAP_sketch", colour_by = "intronic_fraction",
    point_size = 0.5) +   
        guides(color = guide_colorbar(title = "intronic\nfraction")) + 
        theme_void() + 
        theme(axis.title = element_blank(),
              axis.text = element_blank(),
              legend.text = element_text(size = 20),      
              legend.title = element_text(size = 20),      
              legend.key.size = unit(1.5, "lines")))

png("paper_figures/UMAP_organoids_intronic_fraction.png", 
    height = 4, width = 7, units = "in", res = 500)
plot_grid(
    umap_intronic + theme(legend.position = "none"),
    get_legend2(umap_intronic),
    nrow = 1, rel_widths = c(4, 3)
)
dev.off()
png 
  2 
(umap_mt <- plotReducedDim(
    organ, "UMAP_sketch", colour_by = "subsets_MT_percent",
    point_size = 0.5) +   
        guides(color = guide_colorbar(title = "% MT")) + 
        theme_void() + 
        theme(axis.title = element_blank(),
              axis.text = element_blank(),
              legend.text = element_text(size = 20),      
              legend.title = element_text(size = 20),      
              legend.key.size = unit(1.5, "lines")))

png("paper_figures/UMAP_organoids_subsets_MT_percent.png", 
    height = 4, width = 7, units = "in", res = 500)
plot_grid(
    umap_mt + theme(legend.position = "none"),
    get_legend2(umap_mt),
    nrow = 1, rel_widths = c(4, 3)
)
dev.off()
png 
  2 

Figure 4F (UMAPs)

for (gn in c("WNT7A", "E2F7")) {
    umap_gn <- plotReducedDim(
        organ, "UMAP_sketch", colour_by = gn,
        point_size = 1) + 
        scale_color_gradientn(colors = c("#d9d9d9", "#f4a582", "#d6604d","#8b0000", '#5c0000')) +
        labs(color = gn) + 
        theme_void() + 
        theme(axis.title = element_blank(),
              axis.text = element_blank(),
              legend.text = element_text(size = 20),      
              legend.title = element_text(size = 20),      
              legend.key.size = unit(1.5, "lines"))
    print(umap_gn)
    
    png(paste0("paper_figures/UMAP_organoids_", gn, ".png"), 
        height = 4, width = 6, units = "in", res = 500)
    print(plot_grid(
        umap_gn + theme(legend.position = "none"),
        get_legend2(umap_gn),
        nrow = 1, rel_widths = c(4, 2)
    ))
    dev.off()
}
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.

Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.

Luminal early signature over time

Use the top 100 marker genes for the luminal early cluster to calculate a gene set score for each cell.

luminal_early_markers <- read.delim("../scRNAseq_invivo_endometrium_epithelial_cell_atlas/marker_genes_all_leiden_res0.4_annot/leiden_res0.4_annot_cluster_luminal early_markers.txt") |>
    arrange(p.value) |>
    slice_head(n = 100) |>
    pull(gene)

organ$lum_early_score <- swissknife::normGenesetExpression(
    organ, genes = luminal_early_markers, 
    BPPARAM = MulticoreParam(workers = 8L))
Warning in asMethod(object): sparse->dense coercion: allocating vector of size
12.9 GiB
(ggrep <- ggplot(as.data.frame(colData(organ)), 
                 aes(x = timepoint, y = lum_early_score,
                     fill = timepoint)) + 
        geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) + 
        labs(x = "", y = "Luminal early gene set score") + 
        scale_fill_manual(values = sc_timepoint_colors) + 
        theme_bw(base_size = 14) + 
        theme(legend.position = "none",
              axis.text.x = element_text(size = 16, angle = 45, hjust = 1, vjust = 1),
              axis.text.y = element_text(size = 16),
              axis.title = element_text(size = 16)))

pdf("paper_figures/luminal_early_signature.pdf", width = 7, height = 5)
ggrep
dev.off()
png 
  2 
Figure 1

Instead of the top 100 marker genes for the luminal early cluster, we here use the top 100 upregulated genes in the luminal early cluster when compared to the luminal late cluster.

luminal_early_vs_late_markers <- read.delim("../scRNAseq_invivo_endometrium_epithelial_cell_atlas/pairwise_ttests_leiden_res0.4_annot/leiden_res0.4_annot_cluster_luminal early_vs_luminal late.txt") |>
    arrange(p.value) |>
    slice_head(n = 100) |>
    pull(gene)

organ$lum_early_vs_late_score <- swissknife::normGenesetExpression(
    organ, genes = luminal_early_vs_late_markers, 
    BPPARAM = MulticoreParam(workers = 8L))
Warning in asMethod(object): sparse->dense coercion: allocating vector of size
12.9 GiB
(ggrep3 <- ggplot(as.data.frame(colData(organ)), 
                 aes(x = timepoint, y = lum_early_vs_late_score,
                     fill = timepoint)) + 
        geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) + 
        labs(x = "", y = "Luminal early vs late gene set score") + 
        scale_fill_manual(values = sc_timepoint_colors) + 
        theme_bw(base_size = 14) + 
        theme(legend.position = "none",
              axis.text.x = element_text(size = 16, angle = 45, hjust = 1, vjust = 1),
              axis.text.y = element_text(size = 16),
              axis.title = element_text(size = 16)))

pdf("paper_figures/luminal_early_vs_late_signature.pdf", width = 7, height = 5)
ggrep3
dev.off()
png 
  2 
Figure 2

Luminal late signature over time

luminal_late_markers <- read.delim("../scRNAseq_invivo_endometrium_epithelial_cell_atlas/marker_genes_all_leiden_res0.4_annot/leiden_res0.4_annot_cluster_luminal late_markers.txt") |>
    arrange(p.value) |>
    slice_head(n = 100) |>
    pull(gene)

organ$lum_late_score <- swissknife::normGenesetExpression(
    organ, genes = luminal_late_markers, 
    BPPARAM = MulticoreParam(workers = 8L))
Warning in asMethod(object): sparse->dense coercion: allocating vector of size
12.9 GiB
(ggrep2 <- ggplot(as.data.frame(colData(organ)), 
                  aes(x = timepoint, y = lum_late_score,
                      fill = timepoint)) + 
        geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) + 
        labs(x = "", y = "Luminal late gene set score") + 
        scale_fill_manual(values = sc_timepoint_colors) + 
        theme_bw(base_size = 14) + 
        theme(legend.position = "none",
              axis.text.x = element_text(size = 16, angle = 45, hjust = 1, vjust = 1),
              axis.text.y = element_text(size = 16),
              axis.title = element_text(size = 16)))

pdf("paper_figures/luminal_late_signature.pdf", width = 7, height = 5)
ggrep2
dev.off()
png 
  2 
Figure 3

Overrepresentation analysis

# tested genes
timecourse_gene_bg <- read.delim("organoid_timecourse_gene_universe.txt", 
                                 header = FALSE)[, 1]

clusters_to_consider <- c("stress responsive",
                          "wound responsive 12h",
                          "wound responsive 24h",
                          "immunomodulatory",
                          "OxPhos high", 
                          "cycling",
                          "hormone responsive")


gprofiler2::set_base_url("https://biit.cs.ut.ee/gprofiler_archive3/e112_eg59_p19")
for (cl in clusters_to_consider) {
    mgenes <- read.delim(paste0(
        "marker_genes_within_lineage_all_leiden_res0.125_annot/leiden_res0.125_annot_cluster_", 
        cl, "_markers_within_lineage.txt")) |>
        arrange(p.value) |>
        slice_head(n = 100) |>
        pull(gene)
    
    gostres <- gost(
        query = mgenes, 
        organism = "hsapiens",
        ordered_query = FALSE, 
        multi_query = FALSE, 
        significant = TRUE, 
        exclude_iea = TRUE, 
        measure_underrepresentation = FALSE, 
        evcodes = FALSE, 
        user_threshold = 0.05, 
        correction_method = "fdr",
        domain_scope = "custom",
        custom_bg = timecourse_gene_bg, 
        numeric_ns = "", 
        sources = c("GO:BP"),
        as_short_link = FALSE, 
        highlight = TRUE
    )
    print(head(gostres$result |>
                   dplyr::select(term_name, p_value, term_size, query_size, intersection_size),
               n = 15) |>
              mutate(p_value = signif(p_value, digits = 3)))
    write.table(gostres$result |>
                    dplyr::select(term_name, p_value, term_size, query_size, intersection_size),
                file = paste0("gprofiler/", sub(" ", "_", cl), "_markers_gobp.tsv"),
                row.names = FALSE, quote = FALSE, col.names = TRUE, sep = "\t")
}
                                        term_name  p_value term_size query_size
1                    response to unfolded protein 8.90e-17       101         99
2     response to topologically incorrect protein 5.28e-16       116         99
3                                 protein folding 5.60e-15       163         99
4                               protein refolding 3.07e-14        23         99
5                                response to heat 9.56e-14        68         99
6                response to temperature stimulus 6.70e-12        94         99
7                 regulation of apoptotic process 7.14e-11       931         99
8             regulation of programmed cell death 1.42e-10       962         99
9                               apoptotic process 6.18e-10      1181         99
10                          programmed cell death 1.60e-09      1230         99
11                                     cell death 1.61e-09      1235         99
12                      cellular response to heat 2.80e-09        49         99
13                             response to stress 1.37e-08      2636         99
14                             protein maturation 1.94e-08       426         99
15 chaperone cofactor-dependent protein refolding 2.36e-08        26         99
   intersection_size
1                 17
2                 17
3                 18
4                 10
5                 13
6                 13
7                 29
8                 29
9                 31
10                31
11                31
12                 9
13                44
14                18
15                 7
                                                                                  term_name
1                                                                       ribosome biogenesis
2                                                      ribonucleoprotein complex biogenesis
3                                                                    rRNA metabolic process
4                                                                           rRNA processing
5                                                        ribosomal small subunit biogenesis
6                                                              endothelial cell development
7                                                        ribosomal large subunit biogenesis
8                                                                            RNA processing
9                                                             cellular component biogenesis
10                                                     establishment of endothelial barrier
11                                                                   maturation of SSU-rRNA
12                                                         endothelial cell differentiation
13 maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)
14                                                                  endothelium development
15                                                              epithelial cell development
    p_value term_size query_size intersection_size
1  9.68e-13       208         98                18
2  2.32e-10       342         98                19
3  3.30e-09       181         98                14
4  4.00e-08       145         98                12
5  3.39e-06        69         98                 8
6  5.14e-05        41         98                 6
7  1.32e-04        49         98                 6
8  2.16e-04       768         98                18
9  3.14e-04      2533         98                35
10 4.47e-04        36         98                 5
11 6.96e-04        40         98                 5
12 1.05e-03        76         98                 6
13 1.72e-03        24         98                 4
14 2.37e-03        91         98                 6
15 2.37e-03        91         98                 6
                                       term_name  p_value term_size query_size
1                actin cytoskeleton organization 0.000116       572        100
2                      cytoskeleton organization 0.000116      1206        100
3                   actin filament-based process 0.000316       632        100
4                                 cell migration 0.003460      1033        100
5                                  cell adhesion 0.007150      1008        100
6                                     locomotion 0.008380       850        100
7                                  cell motility 0.010300      1168        100
8          post-Golgi vesicle-mediated transport 0.010300        98        100
9                           cell-matrix adhesion 0.017300       168        100
10                    vesicle-mediated transport 0.017300      1128        100
11                  regulation of cell migration 0.017300       675        100
12               actin filament depolymerization 0.017300        38        100
13     positive regulation of biological process 0.018500      4486        100
14 regulation of actin cytoskeleton organization 0.018500       248        100
15                   regulation of cell motility 0.018500       718        100
   intersection_size
1                 17
2                 25
3                 17
4                 20
5                 19
6                 17
7                 20
8                  6
9                  7
10                19
11                14
12                 4
13                46
14                 8
15                14
                                                                   term_name
1                                               defense response to symbiont
2                                         defense response to other organism
3                                                            immune response
4                                                response to biotic stimulus
5                                       response to external biotic stimulus
6                                                 response to other organism
7  biological process involved in interspecies interaction between organisms
8                                                     innate immune response
9                                                           defense response
10                                                     immune system process
11                                                       response to peptide
12                                                      response to cytokine
13                                                 defense response to virus
14                                             response to external stimulus
15                peptide antigen assembly with MHC class II protein complex
    p_value term_size query_size intersection_size
1  7.20e-12       645         97                26
2  7.20e-12       699         97                27
3  7.20e-12      1043         97                32
4  1.22e-11       863         97                29
5  2.40e-11       831         97                28
6  2.40e-11       831         97                28
7  8.05e-11       950         97                29
8  4.29e-10       545         97                22
9  4.85e-10      1031         97                29
10 7.81e-10      1561         97                35
11 5.05e-09       506         97                20
12 5.05e-09       506         97                20
13 5.65e-09       207         97                14
14 6.25e-09      1335         97                31
15 1.11e-08        13         97                 6
                                                 term_name  p_value term_size
1                                oxidative phosphorylation 5.41e-20       125
2                                      aerobic respiration 1.51e-19       160
3                                     cellular respiration 2.04e-18       184
4      energy derivation by oxidation of organic compounds 3.82e-16       239
5           generation of precursor metabolites and energy 3.47e-15       349
6                                 ATP biosynthetic process 5.47e-14        88
7  purine ribonucleoside triphosphate biosynthetic process 5.71e-14        90
8      purine nucleoside triphosphate biosynthetic process 5.71e-14        90
9                 proton motive force-driven ATP synthesis 8.18e-14        72
10        ribonucleoside triphosphate biosynthetic process 1.01e-13        95
11              purine ribonucleotide biosynthetic process 1.63e-13       124
12            nucleoside triphosphate biosynthetic process 2.05e-13       101
13                  purine nucleotide biosynthetic process 2.27e-13       157
14                     ribonucleotide biosynthetic process 3.74e-13       133
15                   ribose phosphate biosynthetic process 6.11e-13       138
   query_size intersection_size
1          96                20
2          96                21
3          96                21
4          96                21
5          96                23
6          96                14
7          96                14
8          96                14
9          96                13
10         96                14
11         96                15
12         96                14
13         96                16
14         96                15
15         96                15
                                   term_name  p_value term_size query_size
1                         mitotic cell cycle 7.80e-38       754         99
2                         cell cycle process 2.32e-37      1042         99
3                 mitotic cell cycle process 1.18e-36       630         99
4                    chromosome organization 8.03e-35       525         99
5                                 cell cycle 1.30e-34      1319         99
6                     chromosome segregation 8.04e-31       346         99
7                   mitotic nuclear division 1.78e-28       246         99
8                           nuclear division 5.37e-28       349         99
9           regulation of cell cycle process 5.83e-28       586         99
10            nuclear chromosome segregation 9.72e-28       263         99
11              sister chromatid segregation 1.81e-27       214         99
12      mitotic sister chromatid segregation 9.74e-27       177         99
13                         organelle fission 1.66e-26       393         99
14      regulation of chromosome segregation 3.16e-26       121         99
15 positive regulation of cell cycle process 3.46e-26       187         99
   intersection_size
1                 51
2                 56
3                 47
4                 43
5                 58
6                 35
7                 30
8                 33
9                 39
10                30
11                28
12                26
13                33
14                23
15                26
                                             term_name  p_value term_size
1                          macromolecule glycosylation 0.000162       173
2                                protein glycosylation 0.000162       173
3                                        glycosylation 0.000213       192
4                       glycoprotein metabolic process 0.000213       289
5                    glycoprotein biosynthetic process 0.001560       245
6                     commissural neuron axon guidance 0.006380         9
7                       protein O-linked glycosylation 0.006380        88
8         carbohydrate derivative biosynthetic process 0.014900       482
9     branching involved in blood vessel morphogenesis 0.021000        14
10 positive regulation of smooth muscle cell migration 0.023500        15
11                                endoderm development 0.031200        47
12           carbohydrate derivative metabolic process 0.031200       747
13            L-arginine import across plasma membrane 0.031200         4
14                  central nervous system development 0.031200       484
15                positive regulation of cell adhesion 0.031200       324
   query_size intersection_size
1         100                10
2         100                10
3         100                10
4         100                12
5         100                10
6         100                 3
7         100                 6
8         100                12
9         100                 3
10        100                 3
11        100                 4
12        100                14
13        100                 2
14        100                11
15        100                 9
plotdf <- do.call(bind_rows, lapply(clusters_to_consider, function(cl) {
    read.delim(paste0("gprofiler/", sub(" ", "_", cl), "_markers_gobp.tsv")) |>
        slice_head(n = 5) |>
        mutate(cluster = cl) |>
        mutate(term_size = as.numeric(term_size),
               p_value = as.numeric(p_value))
})) |>
    mutate(cluster2 = sub("immunomodulatory", "immuno-\nmodulatory", 
                          gsub(" ", "\n", cluster))) |>
    mutate(cluster = factor(cluster, levels = rev(clusters_to_consider))) |>
    mutate(cluster2 = factor(cluster2, levels = sub("immunomodulatory", "immuno-\nmodulatory",
                                                    gsub(" ", "\n", levels(cluster)))))
(ggprof <- ggplot(plotdf, aes(x = -log10(p_value), y = fct_reorder(term_name, -p_value))) + 
    geom_point(size = 4, aes(color = cluster)) + 
    scale_color_manual(values = sc_leiden_res0.125_annot_colors) + 
    facet_grid2(cluster2 ~ .,
                strip = strip_themed(clip = "off", background_y = elem_list_rect(
                    fill = alpha(sc_leiden_res0.125_annot_colors[levels(plotdf$cluster)], 0.5))), 
                scales = "free_y") + 
    labs(x = "-log10 FDR", y = "") + 
    theme_cowplot() + 
    theme(legend.position = "none"))

pdf("paper_figures/gprofiler_selclusters.pdf", width = 7, height = 8)
ggprof
dev.off()
png 
  2 

Save count matrix and cell type annotations

if (!file.exists("upload_data")) {
    dir.create("upload_data", showWarnings = FALSE, recursive = FALSE)
}
writeMM(assay(organ, "counts"), 
        file = "upload_data/ivmc_scrnaseq_counts.mtx")
NULL
R.utils::gzip(filename = "upload_data/ivmc_scrnaseq_counts.mtx", 
              compression = 9, overwrite = TRUE, remove = TRUE)
write.table(rownames(organ), 
            file = "upload_data/ivmc_scrnaseq_genes.tsv", 
            row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE)
R.utils::gzip(filename = "upload_data/ivmc_scrnaseq_genes.tsv", 
              compression = 9, overwrite = TRUE, remove = TRUE)
write.table(colnames(organ), 
            file = "upload_data/ivmc_scrnaseq_cells.tsv", 
            row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE)
R.utils::gzip(filename = "upload_data/ivmc_scrnaseq_cells.tsv", 
              compression = 9, overwrite = TRUE, remove = TRUE)

write.table(
    cbind(colData(organ)[, c("cell", "leiden_res0.125_annot", 
                             "pred_from_invivo", "timepoint")], 
          reducedDim(organ, "UMAP_sketch")[, 1:2]),
    file = "upload_data/ivmc_scrnaseq_annot.tsv", 
    row.names = FALSE, col.names = TRUE, sep = "\t")
R.utils::gzip(filename = "upload_data/ivmc_scrnaseq_annot.tsv", 
              compression = 9, overwrite = TRUE, remove = TRUE)

Session info

# gProfiler/GO version
get_version_info(organism = "hsapiens")$gprofiler_version
[1] "e112_eg59_p19_25aa4782"
get_version_info(organism = "hsapiens")$sources$`GO:MF`
$name
[1] "molecular function"

$version
[1] "annotations: BioMart\nclasses: releases/2024-10-27"
sessionInfo()
R version 4.4.2 (2024-10-31)
Platform: x86_64-pc-linux-gnu
Running under: Rocky Linux 8.10 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: FlexiBLAS OPENBLAS;  LAPACK version 3.12.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: Europe/Zurich
tzcode source: system (glibc)

attached base packages:
[1] grid      stats4    stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] forcats_1.0.0               gprofiler2_0.2.3           
 [3] Matrix_1.7-1                BiocParallel_1.40.2        
 [5] swissknife_0.43             ggh4x_0.3.0                
 [7] scales_1.3.0.9000           ggtext_0.1.2               
 [9] circlize_0.4.16             ComplexHeatmap_2.22.0      
[11] cowplot_1.1.3               tidyr_1.3.1                
[13] dplyr_1.1.4                 scater_1.34.1              
[15] ggplot2_3.5.2               scuttle_1.16.0             
[17] SingleCellExperiment_1.28.1 SummarizedExperiment_1.36.0
[19] Biobase_2.66.0              GenomicRanges_1.58.0       
[21] GenomeInfoDb_1.42.3         IRanges_2.40.1             
[23] S4Vectors_0.44.0            BiocGenerics_0.52.0        
[25] MatrixGenerics_1.18.1       matrixStats_1.5.0          

loaded via a namespace (and not attached):
 [1] bitops_1.0-9            gridExtra_2.3           rlang_1.1.6            
 [4] magrittr_2.0.3          clue_0.3-66             GetoptLong_1.0.5       
 [7] compiler_4.4.2          png_0.1-8               vctrs_0.6.5            
[10] stringr_1.5.1           pkgconfig_2.0.3         shape_1.4.6.1          
[13] crayon_1.5.3            fastmap_1.2.0           XVector_0.46.0         
[16] labeling_0.4.3          rmarkdown_2.29          markdown_2.0           
[19] UCSC.utils_1.2.0        ggbeeswarm_0.7.2        purrr_1.0.4            
[22] bluster_1.16.0          xfun_0.52               zlibbioc_1.52.0        
[25] beachmat_2.22.0         litedown_0.7            jsonlite_2.0.0         
[28] DelayedArray_0.32.0     irlba_2.3.5.1           parallel_4.4.2         
[31] cluster_2.1.6           R6_2.6.1                stringi_1.8.7          
[34] RColorBrewer_1.1-3      limma_3.62.2            Rcpp_1.0.14            
[37] iterators_1.0.14        knitr_1.50              usethis_3.1.0          
[40] R.utils_2.13.0          igraph_2.1.4            tidyselect_1.2.1       
[43] dichromat_2.0-0.1       abind_1.4-8             yaml_2.3.10            
[46] viridis_0.6.5           doParallel_1.0.17       codetools_0.2-20       
[49] lattice_0.22-6          tibble_3.2.1            withr_3.0.2            
[52] evaluate_1.0.3          xml2_1.3.8              pillar_1.10.2          
[55] KernSmooth_2.23-24      foreach_1.5.2           plotly_4.10.4          
[58] generics_0.1.3          RCurl_1.98-1.17         commonmark_1.9.5       
[61] glue_1.8.0              metapod_1.14.0          lazyeval_0.2.2         
[64] tools_4.4.2             BiocNeighbors_2.0.1     data.table_1.17.0      
[67] ScaledMatrix_1.14.0     locfit_1.5-9.12         scran_1.34.0           
[70] fs_1.6.6                edgeR_4.4.2             colorspace_2.1-1       
[73] GenomeInfoDbData_1.2.13 beeswarm_0.4.0          BiocSingular_1.22.0    
[76] vipor_0.4.7             cli_3.6.4               rsvd_1.0.5             
[79] S4Arrays_1.6.0          viridisLite_0.4.2       gtable_0.3.6           
[82] R.methodsS3_1.8.2       digest_0.6.37           dqrng_0.4.1            
[85] SparseArray_1.6.2       ggrepel_0.9.6           rjson_0.2.23           
[88] htmlwidgets_1.6.4       farver_2.1.2            R.oo_1.27.0            
[91] htmltools_0.5.8.1       lifecycle_1.0.4         httr_1.4.7             
[94] statmod_1.5.0           GlobalOptions_0.1.2     gridtext_0.1.5