suppressPackageStartupMessages({
library(SingleCellExperiment)
library(scuttle)
library(scater)
library(dplyr)
library(tidyr)
library(ggplot2)
library(cowplot)
library(ComplexHeatmap)
library(circlize)
library(ggtext)
library(scales)
library(ggh4x)
library(swissknife)
library(BiocParallel)
library(Matrix)
library(gprofiler2)
library(forcats)
})
source("../params.R")
source("../helpers.R")
if (!file.exists("paper_figures")) {
dir.create("paper_figures", showWarnings = FALSE, recursive = FALSE)
}
if (!file.exists("gprofiler")) {
dir.create("gprofiler", showWarnings = FALSE, recursive = FALSE)
}Generating plots for paper (scRNA-seq time course)
Load packages
Load data
organ <- readRDS("organoid_scrnaseq_timecourse.rds")
# check for consistency
stopifnot(all(organ$leiden_res0.125_annot %in% sc_leiden_res0.125_annot_order))
stopifnot(all(organ$pred_from_invivo %in% tissue_leiden_res0.4_annot_order))
organ$leiden_res0.125_annot <- factor(organ$leiden_res0.125_annot,
levels = sc_leiden_res0.125_annot_order)
organ$pred_from_invivo <- factor(organ$pred_from_invivo,
levels = tissue_leiden_res0.4_annot_order)
stopifnot(all(organ$timepoint %in% sc_timepoint_order))
organ$timepoint <- factor(organ$timepoint,
levels = sc_timepoint_order)Numeric summaries
# number of genes and cells
dim(organ)[1] 20231 85628
Figure 4B (UMAPs for time points)
x <- makePerCellDF(organ, use.coldata = TRUE, use.dimred = TRUE)
(umap_facet <- ggplot(x, aes(x = UMAP_sketch.1, y = UMAP_sketch.2)) +
geom_point(data = transform(x, timepoint = NULL),
color = "grey95", alpha = 1, size = 0.5) +
geom_point(aes(color = timepoint), alpha = 0.5, size = 0.5) +
facet_wrap2(~ timepoint, nrow = 1,
strip = strip_themed(clip = "off", background_x = elem_list_rect(
fill = alpha(sc_timepoint_colors[levels(x$timepoint)], 0.5)))) +
scale_color_manual(values = sc_timepoint_colors) +
theme_void() +
theme(axis.text = element_blank(),
axis.title = element_blank(),
strip.text = element_text(size = 20, hjust = 0.5,
margin = margin(0.15, 0, 0.15, 0, "cm")),
legend.position = "none",
panel.background = element_rect(fill = "white", color = NA),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(color = "black"),
strip.background = element_rect(fill = "white", color = "white")
))png("paper_figures/UMAP_timepoints_separate.png",
height = 4, width = 22, units = "in", res = 500)
umap_facet
dev.off()png
2
Figure 4C (UMAP with clusters)
(umap_clusters <- plotReducedDim(
organ, dimred = "UMAP_sketch",
colour_by = "leiden_res0.125_annot", point_size = 0.5) +
scale_color_manual(values = sc_leiden_res0.125_annot_colors, name = "") +
guides(color = guide_legend(override.aes = list(size = 5, alpha = 1,
shape = 15),
ncol = 2)) +
theme_void() +
theme(legend.text = element_text(size = 12)))Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
png("paper_figures/UMAP_organoids_clusters.png",
height = 4, width = 4, units = "in", res = 500)
umap_clusters + theme(legend.position = "none")
dev.off()png
2
pdf("paper_figures/UMAP_organoids_clusters_legend.pdf",
height = 3, width = 5)
grid.newpage()
grid.draw(get_legend2(umap_clusters +
theme(legend.position = "bottom",
legend.text = element_text(size = 12))))
dev.off()png
2
pdf("paper_figures/UMAP_organoids_clusters.pdf", height = 4, width = 4)
umap_clusters + theme(legend.position = "none")
dev.off()png
2
Figure 4H (predictions from atlas)
(umap_preds <- plotReducedDim(
organ, "UMAP_sketch", colour_by = "pred_from_invivo", point_size = 0.5) +
scale_color_manual(values = tissue_leiden_res0.4_annot_colors, name = "") +
theme_void() +
theme(legend.text = element_text(size = 12),
legend.title = element_text(size = 12),
legend.key.size = unit(1.5, "lines")) +
guides(color = guide_legend(override.aes = list(size = 4), title = NULL))
)Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
png("paper_figures/UMAP_organoids_pred_from_invivo.png",
height = 4, width = 4, units = "in", res = 500)
umap_preds + theme(legend.position = "none")
dev.off()png
2
pdf("paper_figures/UMAP_organoids_pred_from_invivo_legend.pdf",
height = 1.75, width = 9)
grid.newpage()
grid.draw(get_legend2(umap_preds + theme(legend.position = "bottom",
legend.text = element_text(size = 12))))
dev.off()png
2
Figure 4E (dot plot)
markers <- intersect(sc_timecourse_markers, rownames(organ))
(dotplot_norm <- .makeDotPlot(
dat = organ, markers = markers,
cluster_column = "leiden_res0.125_annot",
cluster_order = sc_leiden_res0.125_annot_order,
cluster_colors = sc_leiden_res0.125_annot_colors,
transpose = TRUE,
split_by_ciliated = TRUE))Warning: Removed 447 rows containing missing values or values outside the scale range
(`geom_point()`).
pdf("paper_figures/dotplot_organoids_normalized.pdf", height = 6, width = 20)
dotplot_normWarning: Removed 447 rows containing missing values or values outside the scale range
(`geom_point()`).
dev.off()png
2
png("paper_figures/dotplot_organoids_normalized.png", height = 6, width = 20,
unit = "in", res = 500)
dotplot_normWarning: Removed 447 rows containing missing values or values outside the scale range
(`geom_point()`).
dev.off()png
2
Dot plot of WNT ligands and FZD receptors
fzd <- paste0("FZD", 1:10)
(dotplot_norm_fzd <- .makeDotPlot(
dat = organ, markers = fzd,
cluster_column = "leiden_res0.125_annot",
cluster_order = sc_leiden_res0.125_annot_order,
cluster_colors = sc_leiden_res0.125_annot_colors,
transpose = TRUE,
split_by_ciliated = FALSE))Warning: Removed 118 rows containing missing values or values outside the scale range
(`geom_point()`).
pdf("paper_figures/dotplot_organoids_fzd_normalized.pdf", height = 6, width = 7)
dotplot_norm_fzdWarning: Removed 118 rows containing missing values or values outside the scale range
(`geom_point()`).
dev.off()png
2
png("paper_figures/dotplot_organoids_fzd_normalized.png", height = 6, width = 7,
unit = "in", res = 500)
dotplot_norm_fzdWarning: Removed 118 rows containing missing values or values outside the scale range
(`geom_point()`).
dev.off()png
2
Figures 4D + 4G + 4I, Extended Data 9G (relative abundance over time)
Time course clusters
df <- data.frame(colData(organ)[, c("timepoint", "leiden_res0.125_annot")]) |>
group_by(timepoint, leiden_res0.125_annot) |>
tally() |>
group_by(timepoint) |>
mutate(perc = n / sum(n) * 100) |>
ungroup() |>
complete(timepoint, leiden_res0.125_annot, fill = list(n = 0, perc = 0)) |>
mutate(cluster_group = ifelse(grepl("ciliated", leiden_res0.125_annot),
"ciliated", "non-ciliated"))
dfdonor <- data.frame(colData(organ)[, c("timepoint", "donor", "leiden_res0.125_annot")]) |>
group_by(timepoint, donor, leiden_res0.125_annot) |>
tally() |>
group_by(timepoint, donor) |>
mutate(perc = n / sum(n) * 100) |>
ungroup() |>
complete(timepoint, donor, leiden_res0.125_annot, fill = list(n = 0, perc = 0))
# timepoint vs. relative abundance
# ... ciliated
(plot_relab_cil <- ggplot(df |> dplyr::filter(cluster_group == "ciliated") |>
droplevels(),
aes(x = timepoint, y = perc, color = leiden_res0.125_annot)) +
geom_point() +
geom_line(aes(x = as.numeric(timepoint)), linewidth = 1) +
theme_cowplot() +
labs(x = "", y = "Relative\nabundance (%)",
title = "") +
theme(legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1, size = 16),
axis.text.y = element_text(size = 16),
axis.title = element_text(size = 16),
strip.text = element_text(size = 16, margin = margin(0.2, 0, 0.2, 0, "cm")),
strip.background = element_rect(fill = "white", color = "white")) +
facet_wrap(~ cluster_group, ncol = 1) +
scale_color_manual(values = sc_leiden_res0.125_annot_colors, name = ""))pdf(paste0("paper_figures/relative_abundance_leiden_res0.125_annot_ciliated.pdf"),
height = 4.5, width = 6)
print(plot_relab_cil + theme(legend.position = "none"))
dev.off()png
2
pdf("paper_figures/relative_abundance_leiden_res0.125_annot_ciliated_legend.pdf",
height = 3, width = 4)
grid.newpage()
grid.draw(get_legend2(plot_relab_cil +
theme(legend.position = "bottom",
legend.text = element_text(size = 12)) +
guides(color = guide_legend(ncol = 1))))
dev.off()png
2
# ... non-ciliated
(plot_relab_noncil <- ggplot(df |> dplyr::filter(cluster_group == "non-ciliated") |>
droplevels(),
aes(x = timepoint, y = perc, color = leiden_res0.125_annot)) +
geom_point() +
geom_line(aes(x = as.numeric(timepoint)), linewidth = 1) +
theme_cowplot() +
labs(x = "", y = "Relative\nabundance (%)",
title = "") +
theme(legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1, size = 16),
axis.text.y = element_text(size = 16),
axis.title = element_text(size = 16),
strip.text = element_text(size = 16, margin = margin(0.2, 0, 0.2, 0, "cm")),
strip.background = element_rect(fill = "white", color = "white")) +
facet_wrap(~ cluster_group, ncol = 1) +
scale_color_manual(values = sc_leiden_res0.125_annot_colors, name = ""))pdf(paste0("paper_figures/relative_abundance_leiden_res0.125_annot_nonciliated.pdf"),
height = 4.5, width = 6)
print(plot_relab_noncil + theme(legend.position = "none"))
dev.off()png
2
pdf("paper_figures/relative_abundance_leiden_res0.125_annot_nonciliated_legend.pdf",
height = 2, width = 4.5)
grid.newpage()
grid.draw(get_legend2(plot_relab_noncil +
theme(legend.position = "bottom",
legend.text = element_text(size = 12)) +
guides(color = guide_legend(ncol = 2))))
dev.off()png
2
# one panel per cluster
(plot_relab_facet <- ggplot(
bind_rows(df |> mutate(donor = "overall"), dfdonor),
aes(x = timepoint, y = perc, group = donor,
alpha = donor == "overall",
linewidth = donor == "overall", color = leiden_res0.125_annot)) +
geom_point() +
geom_line(aes(x = as.numeric(timepoint))) +
scale_alpha_manual(values = c(`TRUE` = 1, `FALSE` = 0.25)) +
scale_linewidth_manual(values = c(`TRUE` = 2, `FALSE` = 1)) +
theme_cowplot() +
labs(x = "", y = "Relative abundance (%)",
title = "") +
scale_color_manual(values = sc_leiden_res0.125_annot_colors, name = "") +
facet_wrap(~ leiden_res0.125_annot, scales = "free_y") +
guides(alpha = "none", linewidth = "none") +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1),
strip.text = element_text(size = 16),
strip.background = element_rect(fill = "white", color = "white")))pdf(paste0("paper_figures/relative_abundance_cluster_leiden_res0.125_annot.pdf"),
height = 9, width = 14)
print(plot_relab_facet)
dev.off()png
2
# bar plot
(plotbar <- ggplot(df, aes(x = fct_rev(timepoint), y = perc, fill = leiden_res0.125_annot)) +
geom_col() +
theme_cowplot() +
labs(x = "", y = "Relative abundance (%)", title = "") +
coord_flip() +
theme(legend.position = "bottom",
axis.text = element_text(size = 20),
axis.title = element_text(size = 20)) +
scale_fill_manual(values = sc_leiden_res0.125_annot_colors, name = ""))pdf("paper_figures/relative_abundance_barplot_leiden_res0.125_annot.pdf",
height = 5, width = 8)
plotbar + theme(legend.position = "none")
dev.off()png
2
png("paper_figures/relative_abundance_barplot_leiden_res0.125_annot.png",
height = 5, width = 8, unit = "in", res = 400)
plotbar + theme(legend.position = "none")
dev.off()png
2
pdf("paper_figures/relative_abundance_barplot_leiden_res0.125_annot_legend.pdf",
height = 1.75, width = 9)
grid.newpage()
grid.draw(get_legend2(plotbar + theme(legend.position = "bottom",
legend.text = element_text(size = 12))))
dev.off()png
2
Predicted labels from atlas
df <- data.frame(colData(organ)[, c("timepoint", "pred_from_invivo")]) |>
group_by(timepoint, pred_from_invivo) |>
tally() |>
group_by(timepoint) |>
mutate(perc = n / sum(n) * 100) |>
ungroup() |>
complete(timepoint, pred_from_invivo, fill = list(n = 0, perc = 0))
dfdonor <- data.frame(colData(organ)[, c("timepoint", "donor", "pred_from_invivo")]) |>
group_by(timepoint, donor, pred_from_invivo) |>
tally() |>
group_by(timepoint, donor) |>
mutate(perc = n / sum(n) * 100) |>
ungroup() |>
complete(timepoint, donor, pred_from_invivo, fill = list(n = 0, perc = 0))
# timepoint vs. relative abundance
(plot_relab <- ggplot(df, aes(x = timepoint, y = perc, color = pred_from_invivo)) +
geom_point() +
geom_line(aes(x = as.numeric(timepoint)), linewidth = 1) +
theme_cowplot() +
labs(x = "", y = "Relative\nabundance (%)",
title = "") +
theme(legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1, size = 16),
axis.text.y = element_text(size = 16),
axis.title = element_text(size = 16),
strip.background = element_rect(fill = "white", color = "black")) +
scale_color_manual(values = tissue_leiden_res0.4_annot_colors, name = ""))pdf(paste0("paper_figures/relative_abundance_pred_from_invivo.pdf"),
height = 4, width = 6)
print(plot_relab + theme(legend.position = "none"))
dev.off()png
2
pdf("paper_figures/relative_abundance_pred_from_invivo_legend.pdf",
height = 1.75, width = 11)
grid.newpage()
grid.draw(get_legend2(plot_relab + theme(legend.position = "bottom",
legend.text = element_text(size = 12))))
dev.off()png
2
# one panel per cluster
(plot_relab_facet <- ggplot(
bind_rows(df |> mutate(donor = "overall"), dfdonor),
aes(x = timepoint, y = perc, group = donor,
alpha = donor == "overall",
linewidth = donor == "overall", color = pred_from_invivo)) +
geom_point() +
geom_line(aes(x = as.numeric(timepoint))) +
scale_alpha_manual(values = c(`TRUE` = 1, `FALSE` = 0.25)) +
scale_linewidth_manual(values = c(`TRUE` = 2, `FALSE` = 1)) +
theme_cowplot() +
labs(x = "", y = "Relative abundance (%)",
title = "") +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_color_manual(values = tissue_leiden_res0.4_annot_colors, name = "") +
facet_wrap(~ pred_from_invivo, scales = "free_y") +
guides(alpha = "none", linewidth = "none") +
theme(strip.text = element_text(size = 16),
strip.background = element_rect(fill = "white", color = "white")))pdf(paste0("paper_figures/relative_abundance_cluster_pred_from_invivo.pdf"),
height = 9, width = 14)
print(plot_relab_facet)
dev.off()png
2
# bar plot
(plotbar <- ggplot(df, aes(x = timepoint, y = perc, fill = pred_from_invivo)) +
geom_col() +
theme_cowplot() +
labs(x = "", y = "Relative abundance (%)", title = "") +
coord_flip() +
theme(legend.position = "bottom",
axis.text = element_text(size = 20),
axis.title = element_text(size = 20)) +
scale_fill_manual(values = tissue_leiden_res0.4_annot_colors, name = ""))pdf("paper_figures/relative_abundance_barplot_pred_from_invivo.pdf",
height = 5, width = 8)
plotbar + theme(legend.position = "none")
dev.off()png
2
pdf("paper_figures/relative_abundance_barplot_pred_from_invivo_legend.pdf",
height = 1.75, width = 11)
grid.newpage()
grid.draw(get_legend2(plotbar + theme(legend.position = "bottom",
legend.text = element_text(size = 12))))
dev.off()png
2
Extended Data 9 (UMAPs)
(umap_donor <- plotReducedDim(
organ, "UMAP_sketch", colour_by = "donorCol",
point_size = 0.5) +
scale_color_manual(values = donor_colors) +
guides(color = guide_legend(override.aes = list(size = 4),
title = "Donor")) +
theme_void() +
theme(axis.title = element_blank(),
axis.text = element_blank(),
legend.text = element_text(size = 20),
legend.title = element_text(size = 20),
legend.key.size = unit(1.5, "lines")))Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
png("paper_figures/UMAP_organoids_donorCol.png",
height = 4, width = 7, units = "in", res = 500)
plot_grid(
umap_donor + theme(legend.position = "none"),
get_legend2(umap_donor),
nrow = 1, rel_widths = c(4, 3)
)
dev.off()png
2
(umap_cc <- plotReducedDim(
organ, "UMAP_sketch", colour_by = "SeuratCC",
point_size = 0.5) +
scale_color_manual(values = cc_colors) +
guides(color = guide_legend(override.aes = list(size = 4),
title = "Cell cycle")) +
theme_void() +
theme(axis.title = element_blank(),
axis.text = element_blank(),
legend.text = element_text(size = 20),
legend.title = element_text(size = 20),
legend.key.size = unit(1.5, "lines")))Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
png("paper_figures/UMAP_organoids_seuratCC.png",
height = 4, width = 7, units = "in", res = 500)
plot_grid(
umap_cc + theme(legend.position = "none"),
get_legend2(umap_cc),
nrow = 1, rel_widths = c(4, 3)
)
dev.off()png
2
(umap_tricycle <- plotReducedDim(
organ, "UMAP_sketch", colour_by = "tricyclePosition",
point_size = 0.5) +
guides(color = guide_colorbar(title = "tricycle\nposition")) +
theme_void() +
theme(axis.title = element_blank(),
axis.text = element_blank(),
legend.text = element_text(size = 20),
legend.title = element_text(size = 20),
legend.key.size = unit(1.5, "lines")))png("paper_figures/UMAP_organoids_tricyclePosition.png",
height = 4, width = 7, units = "in", res = 500)
plot_grid(
umap_tricycle + theme(legend.position = "none"),
get_legend2(umap_tricycle),
nrow = 1, rel_widths = c(4, 3)
)
dev.off()png
2
(umap_intronic <- plotReducedDim(
organ, "UMAP_sketch", colour_by = "intronic_fraction",
point_size = 0.5) +
guides(color = guide_colorbar(title = "intronic\nfraction")) +
theme_void() +
theme(axis.title = element_blank(),
axis.text = element_blank(),
legend.text = element_text(size = 20),
legend.title = element_text(size = 20),
legend.key.size = unit(1.5, "lines")))png("paper_figures/UMAP_organoids_intronic_fraction.png",
height = 4, width = 7, units = "in", res = 500)
plot_grid(
umap_intronic + theme(legend.position = "none"),
get_legend2(umap_intronic),
nrow = 1, rel_widths = c(4, 3)
)
dev.off()png
2
(umap_mt <- plotReducedDim(
organ, "UMAP_sketch", colour_by = "subsets_MT_percent",
point_size = 0.5) +
guides(color = guide_colorbar(title = "% MT")) +
theme_void() +
theme(axis.title = element_blank(),
axis.text = element_blank(),
legend.text = element_text(size = 20),
legend.title = element_text(size = 20),
legend.key.size = unit(1.5, "lines")))png("paper_figures/UMAP_organoids_subsets_MT_percent.png",
height = 4, width = 7, units = "in", res = 500)
plot_grid(
umap_mt + theme(legend.position = "none"),
get_legend2(umap_mt),
nrow = 1, rel_widths = c(4, 3)
)
dev.off()png
2
Figure 4F (UMAPs)
for (gn in c("WNT7A", "E2F7")) {
umap_gn <- plotReducedDim(
organ, "UMAP_sketch", colour_by = gn,
point_size = 1) +
scale_color_gradientn(colors = c("#d9d9d9", "#f4a582", "#d6604d","#8b0000", '#5c0000')) +
labs(color = gn) +
theme_void() +
theme(axis.title = element_blank(),
axis.text = element_blank(),
legend.text = element_text(size = 20),
legend.title = element_text(size = 20),
legend.key.size = unit(1.5, "lines"))
print(umap_gn)
png(paste0("paper_figures/UMAP_organoids_", gn, ".png"),
height = 4, width = 6, units = "in", res = 500)
print(plot_grid(
umap_gn + theme(legend.position = "none"),
get_legend2(umap_gn),
nrow = 1, rel_widths = c(4, 2)
))
dev.off()
}Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Luminal early signature over time
Use the top 100 marker genes for the luminal early cluster to calculate a gene set score for each cell.
luminal_early_markers <- read.delim("../scRNAseq_invivo_endometrium_epithelial_cell_atlas/marker_genes_all_leiden_res0.4_annot/leiden_res0.4_annot_cluster_luminal early_markers.txt") |>
arrange(p.value) |>
slice_head(n = 100) |>
pull(gene)
organ$lum_early_score <- swissknife::normGenesetExpression(
organ, genes = luminal_early_markers,
BPPARAM = MulticoreParam(workers = 8L))Warning in asMethod(object): sparse->dense coercion: allocating vector of size
12.9 GiB
(ggrep <- ggplot(as.data.frame(colData(organ)),
aes(x = timepoint, y = lum_early_score,
fill = timepoint)) +
geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
labs(x = "", y = "Luminal early gene set score") +
scale_fill_manual(values = sc_timepoint_colors) +
theme_bw(base_size = 14) +
theme(legend.position = "none",
axis.text.x = element_text(size = 16, angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 16),
axis.title = element_text(size = 16)))
pdf("paper_figures/luminal_early_signature.pdf", width = 7, height = 5)
ggrep
dev.off()png
2
Instead of the top 100 marker genes for the luminal early cluster, we here use the top 100 upregulated genes in the luminal early cluster when compared to the luminal late cluster.
luminal_early_vs_late_markers <- read.delim("../scRNAseq_invivo_endometrium_epithelial_cell_atlas/pairwise_ttests_leiden_res0.4_annot/leiden_res0.4_annot_cluster_luminal early_vs_luminal late.txt") |>
arrange(p.value) |>
slice_head(n = 100) |>
pull(gene)
organ$lum_early_vs_late_score <- swissknife::normGenesetExpression(
organ, genes = luminal_early_vs_late_markers,
BPPARAM = MulticoreParam(workers = 8L))Warning in asMethod(object): sparse->dense coercion: allocating vector of size
12.9 GiB
(ggrep3 <- ggplot(as.data.frame(colData(organ)),
aes(x = timepoint, y = lum_early_vs_late_score,
fill = timepoint)) +
geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
labs(x = "", y = "Luminal early vs late gene set score") +
scale_fill_manual(values = sc_timepoint_colors) +
theme_bw(base_size = 14) +
theme(legend.position = "none",
axis.text.x = element_text(size = 16, angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 16),
axis.title = element_text(size = 16)))
pdf("paper_figures/luminal_early_vs_late_signature.pdf", width = 7, height = 5)
ggrep3
dev.off()png
2
Luminal late signature over time
luminal_late_markers <- read.delim("../scRNAseq_invivo_endometrium_epithelial_cell_atlas/marker_genes_all_leiden_res0.4_annot/leiden_res0.4_annot_cluster_luminal late_markers.txt") |>
arrange(p.value) |>
slice_head(n = 100) |>
pull(gene)
organ$lum_late_score <- swissknife::normGenesetExpression(
organ, genes = luminal_late_markers,
BPPARAM = MulticoreParam(workers = 8L))Warning in asMethod(object): sparse->dense coercion: allocating vector of size
12.9 GiB
(ggrep2 <- ggplot(as.data.frame(colData(organ)),
aes(x = timepoint, y = lum_late_score,
fill = timepoint)) +
geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
labs(x = "", y = "Luminal late gene set score") +
scale_fill_manual(values = sc_timepoint_colors) +
theme_bw(base_size = 14) +
theme(legend.position = "none",
axis.text.x = element_text(size = 16, angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 16),
axis.title = element_text(size = 16)))
pdf("paper_figures/luminal_late_signature.pdf", width = 7, height = 5)
ggrep2
dev.off()png
2
Overrepresentation analysis
# tested genes
timecourse_gene_bg <- read.delim("organoid_timecourse_gene_universe.txt",
header = FALSE)[, 1]
clusters_to_consider <- c("stress responsive",
"wound responsive 12h",
"wound responsive 24h",
"immunomodulatory",
"OxPhos high",
"cycling",
"hormone responsive")
gprofiler2::set_base_url("https://biit.cs.ut.ee/gprofiler_archive3/e112_eg59_p19")
for (cl in clusters_to_consider) {
mgenes <- read.delim(paste0(
"marker_genes_within_lineage_all_leiden_res0.125_annot/leiden_res0.125_annot_cluster_",
cl, "_markers_within_lineage.txt")) |>
arrange(p.value) |>
slice_head(n = 100) |>
pull(gene)
gostres <- gost(
query = mgenes,
organism = "hsapiens",
ordered_query = FALSE,
multi_query = FALSE,
significant = TRUE,
exclude_iea = TRUE,
measure_underrepresentation = FALSE,
evcodes = FALSE,
user_threshold = 0.05,
correction_method = "fdr",
domain_scope = "custom",
custom_bg = timecourse_gene_bg,
numeric_ns = "",
sources = c("GO:BP"),
as_short_link = FALSE,
highlight = TRUE
)
print(head(gostres$result |>
dplyr::select(term_name, p_value, term_size, query_size, intersection_size),
n = 15) |>
mutate(p_value = signif(p_value, digits = 3)))
write.table(gostres$result |>
dplyr::select(term_name, p_value, term_size, query_size, intersection_size),
file = paste0("gprofiler/", sub(" ", "_", cl), "_markers_gobp.tsv"),
row.names = FALSE, quote = FALSE, col.names = TRUE, sep = "\t")
} term_name p_value term_size query_size
1 response to unfolded protein 8.90e-17 101 99
2 response to topologically incorrect protein 5.28e-16 116 99
3 protein folding 5.60e-15 163 99
4 protein refolding 3.07e-14 23 99
5 response to heat 9.56e-14 68 99
6 response to temperature stimulus 6.70e-12 94 99
7 regulation of apoptotic process 7.14e-11 931 99
8 regulation of programmed cell death 1.42e-10 962 99
9 apoptotic process 6.18e-10 1181 99
10 programmed cell death 1.60e-09 1230 99
11 cell death 1.61e-09 1235 99
12 cellular response to heat 2.80e-09 49 99
13 response to stress 1.37e-08 2636 99
14 protein maturation 1.94e-08 426 99
15 chaperone cofactor-dependent protein refolding 2.36e-08 26 99
intersection_size
1 17
2 17
3 18
4 10
5 13
6 13
7 29
8 29
9 31
10 31
11 31
12 9
13 44
14 18
15 7
term_name
1 ribosome biogenesis
2 ribonucleoprotein complex biogenesis
3 rRNA metabolic process
4 rRNA processing
5 ribosomal small subunit biogenesis
6 endothelial cell development
7 ribosomal large subunit biogenesis
8 RNA processing
9 cellular component biogenesis
10 establishment of endothelial barrier
11 maturation of SSU-rRNA
12 endothelial cell differentiation
13 maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)
14 endothelium development
15 epithelial cell development
p_value term_size query_size intersection_size
1 9.68e-13 208 98 18
2 2.32e-10 342 98 19
3 3.30e-09 181 98 14
4 4.00e-08 145 98 12
5 3.39e-06 69 98 8
6 5.14e-05 41 98 6
7 1.32e-04 49 98 6
8 2.16e-04 768 98 18
9 3.14e-04 2533 98 35
10 4.47e-04 36 98 5
11 6.96e-04 40 98 5
12 1.05e-03 76 98 6
13 1.72e-03 24 98 4
14 2.37e-03 91 98 6
15 2.37e-03 91 98 6
term_name p_value term_size query_size
1 actin cytoskeleton organization 0.000116 572 100
2 cytoskeleton organization 0.000116 1206 100
3 actin filament-based process 0.000316 632 100
4 cell migration 0.003460 1033 100
5 cell adhesion 0.007150 1008 100
6 locomotion 0.008380 850 100
7 cell motility 0.010300 1168 100
8 post-Golgi vesicle-mediated transport 0.010300 98 100
9 cell-matrix adhesion 0.017300 168 100
10 vesicle-mediated transport 0.017300 1128 100
11 regulation of cell migration 0.017300 675 100
12 actin filament depolymerization 0.017300 38 100
13 positive regulation of biological process 0.018500 4486 100
14 regulation of actin cytoskeleton organization 0.018500 248 100
15 regulation of cell motility 0.018500 718 100
intersection_size
1 17
2 25
3 17
4 20
5 19
6 17
7 20
8 6
9 7
10 19
11 14
12 4
13 46
14 8
15 14
term_name
1 defense response to symbiont
2 defense response to other organism
3 immune response
4 response to biotic stimulus
5 response to external biotic stimulus
6 response to other organism
7 biological process involved in interspecies interaction between organisms
8 innate immune response
9 defense response
10 immune system process
11 response to peptide
12 response to cytokine
13 defense response to virus
14 response to external stimulus
15 peptide antigen assembly with MHC class II protein complex
p_value term_size query_size intersection_size
1 7.20e-12 645 97 26
2 7.20e-12 699 97 27
3 7.20e-12 1043 97 32
4 1.22e-11 863 97 29
5 2.40e-11 831 97 28
6 2.40e-11 831 97 28
7 8.05e-11 950 97 29
8 4.29e-10 545 97 22
9 4.85e-10 1031 97 29
10 7.81e-10 1561 97 35
11 5.05e-09 506 97 20
12 5.05e-09 506 97 20
13 5.65e-09 207 97 14
14 6.25e-09 1335 97 31
15 1.11e-08 13 97 6
term_name p_value term_size
1 oxidative phosphorylation 5.41e-20 125
2 aerobic respiration 1.51e-19 160
3 cellular respiration 2.04e-18 184
4 energy derivation by oxidation of organic compounds 3.82e-16 239
5 generation of precursor metabolites and energy 3.47e-15 349
6 ATP biosynthetic process 5.47e-14 88
7 purine ribonucleoside triphosphate biosynthetic process 5.71e-14 90
8 purine nucleoside triphosphate biosynthetic process 5.71e-14 90
9 proton motive force-driven ATP synthesis 8.18e-14 72
10 ribonucleoside triphosphate biosynthetic process 1.01e-13 95
11 purine ribonucleotide biosynthetic process 1.63e-13 124
12 nucleoside triphosphate biosynthetic process 2.05e-13 101
13 purine nucleotide biosynthetic process 2.27e-13 157
14 ribonucleotide biosynthetic process 3.74e-13 133
15 ribose phosphate biosynthetic process 6.11e-13 138
query_size intersection_size
1 96 20
2 96 21
3 96 21
4 96 21
5 96 23
6 96 14
7 96 14
8 96 14
9 96 13
10 96 14
11 96 15
12 96 14
13 96 16
14 96 15
15 96 15
term_name p_value term_size query_size
1 mitotic cell cycle 7.80e-38 754 99
2 cell cycle process 2.32e-37 1042 99
3 mitotic cell cycle process 1.18e-36 630 99
4 chromosome organization 8.03e-35 525 99
5 cell cycle 1.30e-34 1319 99
6 chromosome segregation 8.04e-31 346 99
7 mitotic nuclear division 1.78e-28 246 99
8 nuclear division 5.37e-28 349 99
9 regulation of cell cycle process 5.83e-28 586 99
10 nuclear chromosome segregation 9.72e-28 263 99
11 sister chromatid segregation 1.81e-27 214 99
12 mitotic sister chromatid segregation 9.74e-27 177 99
13 organelle fission 1.66e-26 393 99
14 regulation of chromosome segregation 3.16e-26 121 99
15 positive regulation of cell cycle process 3.46e-26 187 99
intersection_size
1 51
2 56
3 47
4 43
5 58
6 35
7 30
8 33
9 39
10 30
11 28
12 26
13 33
14 23
15 26
term_name p_value term_size
1 macromolecule glycosylation 0.000162 173
2 protein glycosylation 0.000162 173
3 glycosylation 0.000213 192
4 glycoprotein metabolic process 0.000213 289
5 glycoprotein biosynthetic process 0.001560 245
6 commissural neuron axon guidance 0.006380 9
7 protein O-linked glycosylation 0.006380 88
8 carbohydrate derivative biosynthetic process 0.014900 482
9 branching involved in blood vessel morphogenesis 0.021000 14
10 positive regulation of smooth muscle cell migration 0.023500 15
11 endoderm development 0.031200 47
12 carbohydrate derivative metabolic process 0.031200 747
13 L-arginine import across plasma membrane 0.031200 4
14 central nervous system development 0.031200 484
15 positive regulation of cell adhesion 0.031200 324
query_size intersection_size
1 100 10
2 100 10
3 100 10
4 100 12
5 100 10
6 100 3
7 100 6
8 100 12
9 100 3
10 100 3
11 100 4
12 100 14
13 100 2
14 100 11
15 100 9
plotdf <- do.call(bind_rows, lapply(clusters_to_consider, function(cl) {
read.delim(paste0("gprofiler/", sub(" ", "_", cl), "_markers_gobp.tsv")) |>
slice_head(n = 5) |>
mutate(cluster = cl) |>
mutate(term_size = as.numeric(term_size),
p_value = as.numeric(p_value))
})) |>
mutate(cluster2 = sub("immunomodulatory", "immuno-\nmodulatory",
gsub(" ", "\n", cluster))) |>
mutate(cluster = factor(cluster, levels = rev(clusters_to_consider))) |>
mutate(cluster2 = factor(cluster2, levels = sub("immunomodulatory", "immuno-\nmodulatory",
gsub(" ", "\n", levels(cluster)))))
(ggprof <- ggplot(plotdf, aes(x = -log10(p_value), y = fct_reorder(term_name, -p_value))) +
geom_point(size = 4, aes(color = cluster)) +
scale_color_manual(values = sc_leiden_res0.125_annot_colors) +
facet_grid2(cluster2 ~ .,
strip = strip_themed(clip = "off", background_y = elem_list_rect(
fill = alpha(sc_leiden_res0.125_annot_colors[levels(plotdf$cluster)], 0.5))),
scales = "free_y") +
labs(x = "-log10 FDR", y = "") +
theme_cowplot() +
theme(legend.position = "none"))pdf("paper_figures/gprofiler_selclusters.pdf", width = 7, height = 8)
ggprof
dev.off()png
2
Save count matrix and cell type annotations
if (!file.exists("upload_data")) {
dir.create("upload_data", showWarnings = FALSE, recursive = FALSE)
}
writeMM(assay(organ, "counts"),
file = "upload_data/ivmc_scrnaseq_counts.mtx")NULL
R.utils::gzip(filename = "upload_data/ivmc_scrnaseq_counts.mtx",
compression = 9, overwrite = TRUE, remove = TRUE)
write.table(rownames(organ),
file = "upload_data/ivmc_scrnaseq_genes.tsv",
row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE)
R.utils::gzip(filename = "upload_data/ivmc_scrnaseq_genes.tsv",
compression = 9, overwrite = TRUE, remove = TRUE)
write.table(colnames(organ),
file = "upload_data/ivmc_scrnaseq_cells.tsv",
row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE)
R.utils::gzip(filename = "upload_data/ivmc_scrnaseq_cells.tsv",
compression = 9, overwrite = TRUE, remove = TRUE)
write.table(
cbind(colData(organ)[, c("cell", "leiden_res0.125_annot",
"pred_from_invivo", "timepoint")],
reducedDim(organ, "UMAP_sketch")[, 1:2]),
file = "upload_data/ivmc_scrnaseq_annot.tsv",
row.names = FALSE, col.names = TRUE, sep = "\t")
R.utils::gzip(filename = "upload_data/ivmc_scrnaseq_annot.tsv",
compression = 9, overwrite = TRUE, remove = TRUE)Session info
# gProfiler/GO version
get_version_info(organism = "hsapiens")$gprofiler_version[1] "e112_eg59_p19_25aa4782"
get_version_info(organism = "hsapiens")$sources$`GO:MF`$name
[1] "molecular function"
$version
[1] "annotations: BioMart\nclasses: releases/2024-10-27"
sessionInfo()R version 4.4.2 (2024-10-31)
Platform: x86_64-pc-linux-gnu
Running under: Rocky Linux 8.10 (Green Obsidian)
Matrix products: default
BLAS/LAPACK: FlexiBLAS OPENBLAS; LAPACK version 3.12.0
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Zurich
tzcode source: system (glibc)
attached base packages:
[1] grid stats4 stats graphics grDevices utils datasets
[8] methods base
other attached packages:
[1] forcats_1.0.0 gprofiler2_0.2.3
[3] Matrix_1.7-1 BiocParallel_1.40.2
[5] swissknife_0.43 ggh4x_0.3.0
[7] scales_1.3.0.9000 ggtext_0.1.2
[9] circlize_0.4.16 ComplexHeatmap_2.22.0
[11] cowplot_1.1.3 tidyr_1.3.1
[13] dplyr_1.1.4 scater_1.34.1
[15] ggplot2_3.5.2 scuttle_1.16.0
[17] SingleCellExperiment_1.28.1 SummarizedExperiment_1.36.0
[19] Biobase_2.66.0 GenomicRanges_1.58.0
[21] GenomeInfoDb_1.42.3 IRanges_2.40.1
[23] S4Vectors_0.44.0 BiocGenerics_0.52.0
[25] MatrixGenerics_1.18.1 matrixStats_1.5.0
loaded via a namespace (and not attached):
[1] bitops_1.0-9 gridExtra_2.3 rlang_1.1.6
[4] magrittr_2.0.3 clue_0.3-66 GetoptLong_1.0.5
[7] compiler_4.4.2 png_0.1-8 vctrs_0.6.5
[10] stringr_1.5.1 pkgconfig_2.0.3 shape_1.4.6.1
[13] crayon_1.5.3 fastmap_1.2.0 XVector_0.46.0
[16] labeling_0.4.3 rmarkdown_2.29 markdown_2.0
[19] UCSC.utils_1.2.0 ggbeeswarm_0.7.2 purrr_1.0.4
[22] bluster_1.16.0 xfun_0.52 zlibbioc_1.52.0
[25] beachmat_2.22.0 litedown_0.7 jsonlite_2.0.0
[28] DelayedArray_0.32.0 irlba_2.3.5.1 parallel_4.4.2
[31] cluster_2.1.6 R6_2.6.1 stringi_1.8.7
[34] RColorBrewer_1.1-3 limma_3.62.2 Rcpp_1.0.14
[37] iterators_1.0.14 knitr_1.50 usethis_3.1.0
[40] R.utils_2.13.0 igraph_2.1.4 tidyselect_1.2.1
[43] dichromat_2.0-0.1 abind_1.4-8 yaml_2.3.10
[46] viridis_0.6.5 doParallel_1.0.17 codetools_0.2-20
[49] lattice_0.22-6 tibble_3.2.1 withr_3.0.2
[52] evaluate_1.0.3 xml2_1.3.8 pillar_1.10.2
[55] KernSmooth_2.23-24 foreach_1.5.2 plotly_4.10.4
[58] generics_0.1.3 RCurl_1.98-1.17 commonmark_1.9.5
[61] glue_1.8.0 metapod_1.14.0 lazyeval_0.2.2
[64] tools_4.4.2 BiocNeighbors_2.0.1 data.table_1.17.0
[67] ScaledMatrix_1.14.0 locfit_1.5-9.12 scran_1.34.0
[70] fs_1.6.6 edgeR_4.4.2 colorspace_2.1-1
[73] GenomeInfoDbData_1.2.13 beeswarm_0.4.0 BiocSingular_1.22.0
[76] vipor_0.4.7 cli_3.6.4 rsvd_1.0.5
[79] S4Arrays_1.6.0 viridisLite_0.4.2 gtable_0.3.6
[82] R.methodsS3_1.8.2 digest_0.6.37 dqrng_0.4.1
[85] SparseArray_1.6.2 ggrepel_0.9.6 rjson_0.2.23
[88] htmlwidgets_1.6.4 farver_2.1.2 R.oo_1.27.0
[91] htmltools_0.5.8.1 lifecycle_1.0.4 httr_1.4.7
[94] statmod_1.5.0 GlobalOptions_0.1.2 gridtext_0.1.5