suppressPackageStartupMessages({
library(fishpond)
library(SummarizedExperiment)
library(SingleCellExperiment)
library(scuttle)
library(scater)
library(scran)
library(dplyr)
library(tidyr)
library(ggplot2)
library(msigdbr)
library(batchelor)
library(cowplot)
library(ComplexHeatmap)
library(circlize)
library(scDblFinder)
library(Seurat)
library(sketchR)
library(tricycle)
library(org.Hs.eg.db)
library(parallel)
library(bluster)
library(BiocParallel)
library(igraph)
library(DT)
library(SingleR)
})Generating an endometrial atlas
Load packages
Define parameters and reference files
samples <- c("4861STDY7387181", "4861STDY7387182", "4861STDY7387183",
"4861STDY7771115", "4861STDY7771123", "MRC_Endo8625698",
"MRC_Endo8625699", "MRC_Endo8712024", "MRC_Endo8712032",
"MRC_Endo8715415", "MRC_Endo8715416", "GSM4577306", "GSM4577307",
"GSM4577308", "GSM4577309", "GSM4577310", "GSM4577311",
"GSM4577312", "GSM4577313", "GSM4577314", "GSM4577315",
"Shih_MEtissue1", "Shih_MEtissue2", "Shih_MEtissue3",
"Shih_MEtissue4", "Shih_MEtissue5", "Shih_MEtissue6",
"Shih_MEtissue7", "Huang_GSM6605437", "Huang_GSM6605438",
"Huang_GSM6605440", "Huang_GSM6605439", "Huang_GSM7277296",
"Huang_GSM7277297", "Huang_GSM7277298", "Lai_GSM5572238",
"Lai_GSM5572239", "Lai_GSM5572240")
blockvar <- "sample" ## used as blocking factor in doublet annotation
batchvar <- "donorCol" ## used for batch correction
datasetvar <- "matchDataset" ## used as blocking factor in HVG annotation
## Sample annotation
sampleannot <- "../scRNAseq_invivo_endometrium_epithelial_cell_atlas/invivo_atlas_samples.txt"
## Gene annotation (derived from GENCODE v38 gtf file)
t2g <- "../scRNAseq_invivo_endometrium_epithelial_cell_atlas/reference/gencode.v38.annotation.tx2gene_full.rds"
## Reproductive cell atlas data (downloaded from
## https://www.reproductivecellatlas.org/ and converted to SingleCellExperiment
## format)
rca <- "../scRNAseq_invivo_endometrium_epithelial_cell_atlas/data/reproductivecellatlas/2021/endometrium_all.rds"
heca <- "../scRNAseq_invivo_endometrium_epithelial_cell_atlas/data/reproductivecellatlas/2023/endometriumAtlasV2_cells_with_counts.rds"
hecaimmune <- "../scRNAseq_invivo_endometrium_epithelial_cell_atlas/data/reproductivecellatlas/2023/endometriumAtlasV2_cells_immune.rds"
## Cell cycle marker genes
## https://github.com/ventolab/HECA-Human-Endometrial-Cell-Atlas/tree/main/utils
s_genes <- "../scRNAseq_invivo_endometrium_epithelial_cell_atlas/data/reproductivecellatlas/2023/S_genes_HECA.tsv"
g2m_genes <- "../scRNAseq_invivo_endometrium_epithelial_cell_atlas/data/reproductivecellatlas/2023/G2M_genes_HECA.tsv"
## Shih et al cell annotations (obtained from
## https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE203191)
shih_cellannot <- "../scRNAseq_invivo_endometrium_epithelial_cell_atlas/data/GSE203191/GSE203191_Shih_endo_meta_frame.tsv"
## Epithelial atlas
epiatlas <- "../scRNAseq_invivo_endometrium_epithelial_cell_atlas/invivo_epithelial_cell_atlas.rds"Assemble gene annotation
geneannot <- readRDS(t2g) |>
dplyr::select(gene_id, seqnames, gene_type, gene_name) |>
dplyr::rename(chromosome = seqnames) |>
dplyr::distinct()
## Annotate ribosomal protein genes
## GO term "structural constituent of ribosome" (GO:0003735)
golist <- as.list(org.Hs.egGO2ALLEGS)
ribo_entrez <- unique(golist[["GO:0003735"]])
length(ribo_entrez)[1] 175
tmp <- select(org.Hs.eg.db, keys = ribo_entrez, columns = "ENSEMBL",
keytype = "ENTREZID")'select()' returned 1:many mapping between keys and columns
ribo_ensembl <- unique(na.omit(tmp[, "ENSEMBL"]))
length(ribo_ensembl)[1] 195
summary(ribo_ensembl %in% sub("\\.[0-9]+$", "", geneannot$gene_id)) Mode FALSE TRUE
logical 33 162
geneannot$is_ribosomal_protein <-
sub("\\.[0-9]+$", "", geneannot$gene_id) %in% ribo_ensembl
geneannot$gene_type_ribo <- ifelse(
geneannot$gene_type == "protein_coding",
ifelse(geneannot$is_ribosomal_protein, "protein_coding_ribosome",
geneannot$gene_type),
geneannot$gene_type)Read data
## Get sample IDs
tbl <- read.delim(sampleannot) |>
dplyr::filter(SampleName %in% samples)
DT::datatable(tbl, options = list(scrollX = TRUE, pageLength = 50))sns <- unique(tbl$SampleName)
names(sns) <- sns
## Read alevin-fry data and combine
dat <- do.call(cbind, lapply(sns, function(s) {
x <- fishpond::loadFry(
fryDir = file.path("../scRNAseq_invivo_endometrium_epithelial_cell_atlas",
"data", "alevin_fry_splici", s),
outputFormat = list(counts = c("S", "A"), spliced = c("S", "A"),
unspliced = c("U"), total = c("U", "S", "A")))
rowData(x)$gene_name <-
geneannot$gene_name[match(rowData(x)$gene_ids, geneannot$gene_id)]
rowData(x)$chromosome <-
geneannot$chromosome[match(rowData(x)$gene_ids, geneannot$gene_id)]
rowData(x)$gene_type <-
geneannot$gene_type[match(rowData(x)$gene_ids, geneannot$gene_id)]
rowData(x)$gene_type_ribo <-
geneannot$gene_type_ribo[match(rowData(x)$gene_ids, geneannot$gene_id)]
## Keep only cells with at least 1,000 detected genes
x <- x[, colSums(assay(x, "total") > 0) > 1000]
## Add sample ID to colnames
x$sample <- s
colnames(x) <- paste0(s, "_", colnames(x))
x
}))
datclass: SingleCellExperiment
dim: 60649 274348
metadata(0):
assays(4): counts spliced unspliced total
rownames(60649): ENSG00000223972.5 ENSG00000243485.5 ...
ENSG00000210194.1 ENSG00000210196.2
rowData names(5): gene_ids gene_name chromosome gene_type
gene_type_ribo
colnames(274348): 4861STDY7387181_AGAGTGGGTCTAGCGC
4861STDY7387181_TAAGAGATCTGAGTGT ...
Huang_GSM7277298_CTATCCGTCTTTCGAT Huang_GSM7277298_GGGTTTACAGAACTTC
colData names(2): barcodes sample
reducedDimNames(0):
mainExpName: NULL
altExpNames(0):
## Set rownames to gene names where possible
rownames(dat) <- scater::uniquifyFeatureNames(
ID = rownames(dat),
names = rowData(dat)$gene_name
)Add sample annotations
## Extract barcodes corresponding to control samples in Shih data and remove
## all other barcodes from the Shih samples
cb <- read.delim(shih_cellannot) |>
dplyr::filter(pheno == "Control") |>
dplyr::mutate(barcode = gsub("-[0-9]$", "", barcode)) |>
dplyr::mutate(sample_cb = paste("Shih", run, barcode, sep = "_")) |>
dplyr::pull(sample_cb)
to_remove <- which(grepl("Shih", dat$sample) &
!paste0(dat$sample, "_", dat$barcodes) %in% cb)
dat <- dat[, -to_remove]
dim(dat)[1] 60649 253579
## Add 'cell' column
dat$cell <- colnames(dat)
## Any other provided annotations
tbl2 <- tbl |>
dplyr::filter(RunID != "Shih")
if (nrow(tbl2) > 0 && (length(tbl2$SampleName) == length(unique(tbl2$SampleName)))) {
for (cn in colnames(tbl2)) {
if (!all(is.na(tbl2[[cn]])) && !(cn %in% colnames(colData(dat)))) {
if (cn == "Day") {
colData(dat)[[cn]] <- paste0(
"D", tbl2[[cn]][match(dat$sample, tbl2$SampleName)])
} else if (cn == "RunID") {
colData(dat)[[cn]] <- sub("_[0-9]+$", "",
tbl2[[cn]][match(dat$sample,
tbl2$SampleName)])
} else {
colData(dat)[[cn]] <- tbl2[[cn]][match(dat$sample, tbl2$SampleName)]
}
}
}
}
## Harmonize annotation values
dat$Stage <- gsub(" stage", "", dat$Stage)
## Add information for the Shih data set
shih_annot <- read.delim(shih_cellannot) |>
dplyr::mutate(cell = paste0("Shih_", run, "_", gsub("-[0-9]$", "", barcode)))
if (any(dat$cell %in% shih_annot$cell)) {
for (cn in c("clusterID", "pheno", "Phase", "subjectID", "NKsubclusterID",
"StromalSubclusterID")) {
colData(dat)[[paste0("Shih_", cn)]] <-
shih_annot[[cn]][match(dat$cell, shih_annot$cell)]
}
}
dat$RunID[grep("Shih", dat$sample)] <- "Shih"
dat$Stage[grep("Shih", dat$sample)] <- "menstrual"
dat$StageFactor <- factor(dat$Stage,
levels = c("menstrual", "proliferative",
"early-secretory", "early-mid-secretory",
"mid-secretory", "late-secretory"))
dat$StageCoarse <- sub("early-|early-mid-|mid-|late-", "", dat$Stage)
## Create a 'donorCol' column
dat$donorCol <- NA_character_
if ("DonorID" %in% colnames(colData(dat))) {
idx <- which(!is.na(dat[["DonorID"]]))
dat$donorCol[idx] <- dat[["DonorID"]][idx]
}
if ("Shih_subjectID" %in% colnames(colData(dat))) {
idx <- which(!is.na(dat[["Shih_subjectID"]]))
dat$donorCol[idx] <- dat[["Shih_subjectID"]][idx]
}
## Create a 'sampleCol' column (either sample or Shih_subjectID)
dat$sampleCol <- NA_character_
if ("sample" %in% colnames(colData(dat))) {
idx <- which(!is.na(dat[["sample"]]))
dat$sampleCol[idx] <- dat[["sample"]][idx]
}
if ("Shih_subjectID" %in% colnames(colData(dat))) {
idx <- which(!is.na(dat[["Shih_subjectID"]]))
dat$sampleCol[idx] <- dat[["Shih_subjectID"]][idx]
}
## Generate columns to match to HECA
dat$matchDataset <- ifelse(dat$RunID %in% c("Wang", "Shih", "Lai", "Huang"),
dat$RunID, "GarciaAlonso")
dat$matchCell <- sub("^Huang_|^Lai_|^Shih_", "", dat$cell)
dat$matchId <- paste0(dat$matchCell, "-", dat$matchDataset)
## Add annotations from the reproductive cell atlas (2021)
rca <- readRDS(rca)
table(dat$matchDataset, colnames(dat) %in% colnames(rca), useNA = "ifany")
FALSE TRUE
GarciaAlonso 28396 40337
Huang 63190 0
Lai 18499 0
Shih 3110 0
Wang 47223 52824
dat$RCA_CellCyclePhase <- rca$`CellCycle Phase`[match(colnames(dat), colnames(rca))]
dat$RCA_Celltype <- rca$`Cell type`[match(colnames(dat), colnames(rca))]
dat$RCA_percentmito <- rca$percent_mito[match(colnames(dat), colnames(rca))]
dat$RCA_ngenes <- rca$n_genes[match(colnames(dat), colnames(rca))]
dat$RCA_BiopsyType <- rca$BiopsyType[match(colnames(dat), colnames(rca))]
dat$RCA_Location <- rca$Location[match(colnames(dat), colnames(rca))]
dat$RCA_BroadCelltype <- rca$`Broad cell type`[match(colnames(dat), colnames(rca))]
## Add annotations from HECA (2023)
heca <- readRDS(heca)
hecaimmune <- readRDS(hecaimmune)
table(dat$matchDataset, dat$matchId %in% colnames(heca), useNA = "ifany")
FALSE TRUE
GarciaAlonso 37041 31692
Huang 34420 28770
Lai 6227 12272
Shih 3110 0
Wang 46329 53718
dat$HECA_CellCyclePhase <- as.character(heca$phase[match(dat$matchId, colnames(heca))])
dat$HECA_Celltype <- as.character(heca$celltype[match(dat$matchId, colnames(heca))])
dat$HECA_CelltypeImmune <- as.character(hecaimmune$celltype[match(dat$matchId, colnames(hecaimmune))])
dat$HECA_ngenes <- heca$n_genes[match(dat$matchId, colnames(heca))]
dat$HECA_percentmito <- heca$percent_mito[match(dat$matchId, colnames(heca))]
dat$HECA_ScrubletScore <- heca$scrublet_score[match(dat$matchId, colnames(heca))]
dat$HECA_Stage <- as.character(heca$Stage[match(dat$matchId, colnames(heca))])
dat$HECA_Lineage <- as.character(heca$lineage[match(dat$matchId, colnames(heca))])
dat$HECA_Label <- as.character(heca$label_long[match(dat$matchId, colnames(heca))])
## Filter to only keep cells that have a value for the batch variable
(dat <- dat[, !is.na(dat[[batchvar]])])class: SingleCellExperiment
dim: 60649 253579
metadata(0):
assays(4): counts spliced unspliced total
rownames(60649): DDX11L1 MIR1302-2HG ... MT-TE MT-TP
rowData names(5): gene_ids gene_name chromosome gene_type
gene_type_ribo
colnames(253579): 4861STDY7387181_AGAGTGGGTCTAGCGC
4861STDY7387181_TAAGAGATCTGAGTGT ...
Huang_GSM7277298_CTATCCGTCTTTCGAT Huang_GSM7277298_GGGTTTACAGAACTTC
colData names(50): barcodes sample ... HECA_Lineage HECA_Label
reducedDimNames(0):
mainExpName: NULL
altExpNames(0):
## Sample annotation summary
DT::datatable(as.data.frame(colData(dat)) |>
dplyr::select(sample, SampleType, DonorID, SampleName, RunID,
ReadLength1, ReadLength2, Tissue, Stage,
Digestion, Protocol, Source, Sex,
TimeInMenstrualCycle, DiseaseState,
Fecundity, Phenotype, DevStage,
Shih_pheno, Shih_subjectID,
StageFactor, StageCoarse, donorCol, sampleCol,
matchDataset) |>
dplyr::distinct(),
rownames = FALSE,
options = list(scrollX = TRUE, pageLength = 50))Check total count for each gene type and removed unwanted gene types
keepGeneTypes <- c("protein_coding", "IG_V_gene", "IG_C_gene", "IG_J_gene",
"TR_C_gene", "TR_J_gene", "TR_V_gene", "TR_D_gene",
"IG_D_gene", "Mt_tRNA", "Mt_rRNA")
pd <- data.frame(gene = rownames(dat),
gene_type_ribo = rowData(dat)$gene_type_ribo,
total_count = rowSums(counts(dat))) |>
group_by(gene_type_ribo) |>
summarize(nbr_genes = length(gene),
total_count = sum(total_count)) |>
mutate(keep = gene_type_ribo %in% keepGeneTypes) |>
arrange(desc(nbr_genes))
pd$gene_type_ribo <- factor(pd$gene_type_ribo, levels = unique(pd$gene_type_ribo))
p1 <- ggplot(pd, aes(x = gene_type_ribo, y = nbr_genes, fill = keep)) +
geom_col() +
labs(x = "", y = "Number of genes", fill = "keep") +
theme_cowplot() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
p2 <- ggplot(pd, aes(x = gene_type_ribo, y = total_count, fill = keep)) +
geom_col() +
labs(x = "", y = "Total UMI count", fill = "keep") +
theme_cowplot() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
plot_grid(p1, p2, ncol = 1)dat <- dat[rowData(dat)$gene_type_ribo %in% keepGeneTypes, ]
dim(dat)[1] 20231 253579
Add QC metrics
(MT <- grep("^MT-", rownames(dat), value = TRUE)) [1] "MT-TF" "MT-RNR1" "MT-TV" "MT-RNR2" "MT-TL1" "MT-ND1" "MT-TI"
[8] "MT-TM" "MT-ND2" "MT-TW" "MT-CO1" "MT-TD" "MT-CO2" "MT-TK"
[15] "MT-ATP8" "MT-ATP6" "MT-CO3" "MT-TG" "MT-ND3" "MT-TR" "MT-ND4L"
[22] "MT-ND4" "MT-TH" "MT-TS2" "MT-TL2" "MT-ND5" "MT-CYB" "MT-TT"
[29] "MT-TQ" "MT-TA" "MT-TN" "MT-TC" "MT-TY" "MT-TS1" "MT-ND6"
[36] "MT-TE" "MT-TP"
dat <- scuttle::addPerCellQC(dat, assay.type = "counts", subsets = list(MT = MT))
dat <- scuttle::addPerFeatureQC(dat, assay.type = "counts")
dat$intronic_fraction <-
colSums(assay(dat, "unspliced")) / colSums(assay(dat, "total"))
rowData(dat)$intronic_fraction <-
rowSums(assay(dat, "unspliced")) / rowSums(assay(dat, "total"))QC plots
total_count_threshold <- 1000
detected_threshold <- 500
mt_percent_threshold <- 25
dat$keep <- (dat$total > total_count_threshold) &
(dat$detected > detected_threshold) &
(dat$subsets_MT_percent < mt_percent_threshold)
table(dat$keep)
FALSE TRUE
79933 173646
cdplot <- as.data.frame(colData(dat))
sid <- "sample"
stid <- "StageFactor"
ggplot(cdplot, aes(x = .data[[sid]], y = sum, fill = .data[[stid]])) +
geom_hline(yintercept = total_count_threshold) +
geom_violin(scale = "width") + theme_minimal() +
labs(y = "Total UMI count per cell") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))ggplot(cdplot, aes(x = .data[[sid]], y = sum, fill = .data[[stid]])) +
geom_hline(yintercept = total_count_threshold) +
geom_violin(scale = "width") + theme_minimal() +
scale_y_log10() +
labs(y = "Total UMI count per cell") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))ggplot(cdplot, aes(x = .data[[sid]], y = detected, fill = .data[[stid]])) +
geom_hline(yintercept = detected_threshold) +
geom_violin(scale = "width") + theme_minimal() +
labs(y = "Number of detected genes per cell") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))ggplot(cdplot, aes(x = .data[[sid]], y = subsets_MT_percent, fill = .data[[stid]])) +
geom_hline(yintercept = mt_percent_threshold) +
geom_violin(scale = "width") + theme_minimal() +
labs(y = "Percentage of MT count per cell") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))ggplot(cdplot, aes(x = detected, y = subsets_MT_percent, color = keep)) +
geom_point(size = 0.5, alpha = 0.5) + theme_minimal() +
labs(x = "Number of detected genes per cell",
y = "Percentage of MT count per cell")ggplot(cdplot, aes(x = detected, y = intronic_fraction, color = keep)) +
geom_point(size = 0.5, alpha = 0.5) + theme_minimal() +
labs(x = "Number of detected genes per cell",
y = "Intronic fraction per cell")Filter and remove samples with too few cells
(tbl <- table(sample = dat$sample, keep = dat$keep)) keep
sample FALSE TRUE
4861STDY7387181 1640 8886
4861STDY7387182 1500 8571
4861STDY7387183 738 8010
4861STDY7771115 663 9339
4861STDY7771123 431 6338
GSM4577306 2025 1914
GSM4577307 2594 2352
GSM4577308 2704 9881
GSM4577309 13724 19364
GSM4577310 8149 8432
GSM4577311 2749 2339
GSM4577312 1643 5423
GSM4577313 1433 3723
GSM4577314 2378 3080
GSM4577315 1979 4161
Huang_GSM6605437 1779 6882
Huang_GSM6605438 2182 6079
Huang_GSM6605439 635 11971
Huang_GSM6605440 800 5999
Huang_GSM7277296 1071 5766
Huang_GSM7277297 1640 11027
Huang_GSM7277298 546 6813
Lai_GSM5572238 5061 1901
Lai_GSM5572239 3430 2166
Lai_GSM5572240 2664 3277
MRC_Endo8625698 2699 3331
MRC_Endo8625699 4432 1628
MRC_Endo8712024 970 450
MRC_Endo8712032 1450 399
MRC_Endo8715415 563 416
MRC_Endo8715416 4784 1495
Shih_MEtissue1 248 343
Shih_MEtissue2 411 966
Shih_MEtissue3 218 924
ggplot(as.data.frame(tbl), aes(x = sample, y = Freq, fill = keep)) +
geom_col(position = "stack") + theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(x = "", y = "Number of cells")ggplot(as.data.frame(tbl), aes(x = sample, y = Freq, fill = keep)) +
geom_col(position = "fill") + theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(x = "", y = "Fraction of cells")dat <- dat[, which(dat$keep)]
dim(dat)[1] 20231 173646
(tbl <- sort(table(dat$sampleCol)))
TB13021372 TB13021061 MRC_Endo8712032 MRC_Endo8715415
343 387 399 416
MRC_Endo8712024 TB13021576 TB13020851 MRC_Endo8715416
450 579 924 1495
MRC_Endo8625699 Lai_GSM5572238 GSM4577306 Lai_GSM5572239
1628 1901 1914 2166
GSM4577311 GSM4577307 GSM4577314 Lai_GSM5572240
2339 2352 3080 3277
MRC_Endo8625698 GSM4577313 GSM4577315 GSM4577312
3331 3723 4161 5423
Huang_GSM7277296 Huang_GSM6605440 Huang_GSM6605438 4861STDY7771123
5766 5999 6079 6338
Huang_GSM7277298 Huang_GSM6605437 4861STDY7387183 GSM4577310
6813 6882 8010 8432
4861STDY7387182 4861STDY7387181 4861STDY7771115 GSM4577308
8571 8886 9339 9881
Huang_GSM7277297 Huang_GSM6605439 GSM4577309
11027 11971 19364
keepsamples <- names(tbl)[tbl > 30]
dat <- dat[, dat$sampleCol %in% keepsamples]
dim(dat)[1] 20231 173646
tbl <- table(dat[[sid]], dat[[stid]])
draw(Heatmap(tbl, col = circlize::colorRamp2(c(0, max(tbl)), c("white", "steelblue")),
cluster_columns = FALSE, cluster_rows = FALSE,
row_names_gp = gpar(fontsize = 8),
name = "Number of\ncells per\nsample",
rect_gp = gpar(col = "grey", lwd = 1),
cell_fun = function(j, i, x, y, width, height, fill) {
grid.text(sprintf("%d", tbl[i, j]), x, y, gp = gpar(fontsize = 8))
}))Recalculate QC metrics for genes based on remaining cells
dat <- scuttle::addPerFeatureQC(dat, assay.type = "counts")
rowData(dat)$intronic_fraction <-
rowSums(assay(dat, "unspliced")) / rowSums(assay(dat, "total"))Most highly expressed genes
scater::plotHighestExprs(dat, n = 25, colour_cells_by = stid,
exprs_values = "counts")Calculate size factors and logcounts
size_factor_threshold <- 0.1
set.seed(123)
clusters <- scran::quickCluster(
dat, method = "igraph", min.mean = 0.1,
use.ranks = FALSE, BSPARAM = BiocSingular::ExactParam(),
block = dat[[datasetvar]],
min.size = min(table(dat[[datasetvar]]))
)
dat <- scran::computeSumFactors(dat, min.mean = 0.1, cluster = clusters)
summary(sizeFactors(dat)) Min. 1st Qu. Median Mean 3rd Qu. Max.
0.07845 0.36578 0.72227 1.00000 1.25364 22.57632
ggplot(data.frame(libSize = dat$sum,
sizeFactor = sizeFactors(dat),
group = dat[[blockvar]],
stringsAsFactors = FALSE),
aes(x = libSize, y = sizeFactor, color = group)) +
geom_point(alpha = 0.5) + theme_bw() +
geom_hline(yintercept = size_factor_threshold, linetype = "dashed") +
xlab("Total UMI count") + ylab("Size factor")dat <- dat[, sizeFactors(dat) > size_factor_threshold]
dat <- batchelor::multiBatchNorm(dat, batch = dat[[batchvar]], min.mean = 0.1)
ggplot(data.frame(libSize = dat$sum,
sizeFactor = sizeFactors(dat),
group = dat[[blockvar]],
stringsAsFactors = FALSE),
aes(x = libSize, y = sizeFactor, color = group)) +
geom_point(alpha = 0.5) + theme_bw() +
geom_hline(yintercept = size_factor_threshold, linetype = "dashed") +
xlab("Total UMI count") + ylab("Size factor")Cell cycle assignment
## tricycle
set.seed(123)
dat <- project_cycle_space(dat, species = "human", gname.type = "SYMBOL")
dat <- estimate_cycle_position(dat)
dat <- estimate_Schwabe_stage(dat, gname.type = "SYMBOL", species = "human",
batch.v = batchvar, tolerance = 0)
plotReducedDim(dat, "tricycleEmbedding", point_size = 0.5,
colour_by = "CCStage")tbl <- table(dat$StageFactor, dat$CCStage)
draw(Heatmap(tbl, col = circlize::colorRamp2(c(0, max(tbl)), c("white", "steelblue")),
cluster_columns = FALSE, cluster_rows = FALSE,
row_names_gp = gpar(fontsize = 8),
name = "Number of\ncells",
rect_gp = gpar(col = "grey", lwd = 1),
cell_fun = function(j, i, x, y, width, height, fill) {
grid.text(sprintf("%d", tbl[i, j]), x, y, gp = gpar(fontsize = 8))
}))## Seurat (using genes from HECA)
s_genes <- read.delim(s_genes)[, 1]
g2m_genes <- read.delim(g2m_genes)[, 1]
seurat <- CreateSeuratObject(counts = assay(dat, "counts"))
seurat <- NormalizeData(seurat, verbose = FALSE)
seurat <- FindVariableFeatures(seurat, selection.method = "vst", verbose = FALSE)
seurat <- ScaleData(seurat, features = rownames(seurat), verbose = FALSE)
seurat <- CellCycleScoring(seurat, s.features = s_genes, g2m.features = g2m_genes,
set.ident = TRUE)
stopifnot(all(rownames(seurat@meta.data) == colnames(dat)))
dat$SeuratCC <- seurat@meta.data$Phase
tbl <- table(dat$StageFactor, dat$SeuratCC)
draw(Heatmap(tbl, col = circlize::colorRamp2(c(0, max(tbl)), c("white", "steelblue")),
cluster_columns = FALSE, cluster_rows = FALSE,
row_names_gp = gpar(fontsize = 8),
name = "Number of\ncells",
rect_gp = gpar(col = "grey", lwd = 1),
cell_fun = function(j, i, x, y, width, height, fill) {
grid.text(sprintf("%d", tbl[i, j]), x, y, gp = gpar(fontsize = 8))
}))Find and mark doublets
set.seed(123)
dat <- scDblFinder(dat, samples = blockvar, verbose = FALSE,
BPPARAM = MulticoreParam(workers = 8L))
table(dat$scDblFinder.class, dat[[blockvar]])
4861STDY7387181 4861STDY7387182 4861STDY7387183 4861STDY7771115
singlet 7861 7602 7306 8592
doublet 1024 969 704 746
4861STDY7771123 GSM4577306 GSM4577307 GSM4577308 GSM4577309
singlet 5772 1825 2228 8607 16657
doublet 558 89 124 1220 2694
GSM4577310 GSM4577311 GSM4577312 GSM4577313 GSM4577314 GSM4577315
singlet 7783 2209 4982 3449 2897 3864
doublet 649 130 439 274 183 297
Huang_GSM6605437 Huang_GSM6605438 Huang_GSM6605439 Huang_GSM6605440
singlet 6313 5582 10858 5587
doublet 569 497 1113 412
Huang_GSM7277296 Huang_GSM7277297 Huang_GSM7277298 Lai_GSM5572238
singlet 5263 9910 6217 1815
doublet 503 1117 596 86
Lai_GSM5572239 Lai_GSM5572240 MRC_Endo8625698 MRC_Endo8625699
singlet 2054 3087 3118 1551
doublet 112 190 213 77
MRC_Endo8712024 MRC_Endo8712032 MRC_Endo8715415 MRC_Endo8715416
singlet 431 385 404 1430
doublet 19 14 12 65
Shih_MEtissue1 Shih_MEtissue2 Shih_MEtissue3
singlet 331 926 889
doublet 12 40 35
Sketching
set.seed(123)
sketchcells.bybatch <- unlist(lapply(unique(colData(dat)[[batchvar]]), function(b) {
subd <- dat[, which(colData(dat)[[batchvar]] == b)]
geneVar <- scran::modelGeneVar(subd)
subhvgs <- scran::getTopHVGs(geneVar, n = 2000)
subd <- scater::runPCA(subd, exprs_values = "logcounts", ncomponents = 30,
name = "PCAsub",
subset_row = subhvgs, scale = FALSE)
idxsub <- geosketch(reducedDim(subd, "PCAsub"),
N = min(ncol(subd), max(0.1 * ncol(subd), 150)),
seed = 123)
colnames(subd)[idxsub]
}))
dat$sketched.bybatch <- ifelse(colnames(dat) %in% sketchcells.bybatch, TRUE, FALSE)
table(batch = colData(dat)[[batchvar]], sketched.bybatch = dat$sketched.bybatch) sketched.bybatch
batch FALSE TRUE
A13 22920 2546
A30 14102 1566
E1 4464 495
E2 699 150
E3 1720 191
GSM4577306 1723 191
GSM4577307 2117 235
GSM4577308 8845 982
GSM4577309 17416 1935
GSM4577310 7589 843
GSM4577311 2106 233
GSM4577312 4879 542
GSM4577313 3351 372
GSM4577314 2772 308
GSM4577315 3745 416
GSM5572238 1711 190
GSM5572239 1950 216
GSM5572240 2950 327
GSM6605437 6194 688
GSM6605438 5472 607
GSM6605439 10774 1197
GSM6605440 5400 599
GSM7277296 5190 576
GSM7277297 9925 1102
GSM7277298 6132 681
TB13020851 774 150
TB13021061 237 150
TB13021372 193 150
TB13021576 429 150
Find an alternative reduced dimension representation, after sketching
set.seed(123)
## First find a new set of HVGs
dattmp <- dat[, sketchcells.bybatch]
stopifnot(all(rownames(dattmp) == rownames(dat)))
rowData(dat)$scran_geneVar_geosketch <-
scran::modelGeneVar(dattmp, block = dattmp[[datasetvar]])
hvgs <- scran::getTopHVGs(rowData(dat)$scran_geneVar_geosketch, n = 3000)
metadata(dat)$hvg.scran.genevar.geosketch <- hvgs
head(
as.data.frame(
rowData(dat)$scran_geneVar_geosketch[
order(rowData(dat)$scran_geneVar_geosketch$bio,
decreasing = TRUE),
c("mean", "total", "tech", "bio", "p.value", "FDR")]
),
25
) mean total tech bio p.value FDR
GNLY 0.8430368 3.082955 0.4760610 2.6068937 0.000000e+00 0.000000e+00
PAEP 0.6338257 2.269092 0.3586959 1.9103957 0.000000e+00 0.000000e+00
CCL4 0.7156646 2.385446 0.4839192 1.9015270 0.000000e+00 0.000000e+00
HSPA1A 1.8405659 2.554906 0.9156427 1.6392630 4.227595e-45 5.262062e-43
COL3A1 1.3701836 2.108264 0.6047220 1.5035419 3.720120e-75 7.823796e-73
COL1A1 1.2971255 2.054165 0.5880678 1.4660975 1.002319e-76 2.157580e-74
CXCL8 0.8145901 1.844594 0.5457379 1.2988557 1.763084e-123 5.973916e-121
SFRP4 0.7736994 1.669994 0.3779656 1.2920287 4.521651e-107 1.378877e-104
IGFBP7 1.4500679 1.876015 0.7403303 1.1356848 1.664677e-39 1.760613e-37
HLA-DRA 0.5747616 1.530909 0.4216986 1.1092108 8.092654e-216 5.484122e-213
TIMP1 1.1100104 1.788087 0.6934214 1.0946661 5.409529e-27 4.158746e-25
CD74 0.7570072 1.623149 0.5305676 1.0925815 4.206788e-112 1.304604e-109
MT2A 1.0790865 1.703440 0.6173767 1.0860637 3.114396e-52 4.670828e-50
NKG7 0.5250787 1.402792 0.3760315 1.0267602 1.780916e-206 1.086181e-203
CCL3 0.4424003 1.354914 0.3306605 1.0242538 4.530567e-234 3.604165e-231
CCL4L2 0.3739051 1.276560 0.2596221 1.0169381 7.670023e-248 7.386232e-245
LGALS1 2.1602992 2.061316 1.0743581 0.9869577 4.468995e-18 2.412071e-16
WFDC2 0.7853112 1.323889 0.3450149 0.9788741 4.608429e-137 1.720825e-134
APOE 0.7601890 1.441097 0.4837123 0.9573851 1.263635e-53 1.959383e-51
SOD2 1.2907472 1.684880 0.7389731 0.9459069 9.240494e-38 9.289743e-36
TAGLN 0.6784393 1.290123 0.3467882 0.9433345 2.297792e-80 5.321861e-78
SRGN 0.9308689 1.528172 0.5987945 0.9293771 6.784450e-75 1.410626e-72
MT1G 0.5606329 1.151168 0.2301244 0.9210436 4.889999e-235 4.260587e-232
FOS 2.3218867 1.958179 1.0503591 0.9078202 9.190106e-12 3.495870e-10
CCL5 0.4146188 1.154212 0.2747909 0.8794207 4.360188e-192 2.346422e-189
## Next run multibatch PCA on the sketched cells
set.seed(123)
dattmp <- dat[, sketchcells.bybatch]
mbpca <- multiBatchPCA(dattmp, batch = dattmp[[batchvar]], d = 30,
subset.row = metadata(dat)$hvg.scran.genevar.geosketch,
weights = FALSE, preserve.single = TRUE)
projection <- mbpca[[1]]
rotation <- metadata(mbpca)$rotation
centers <- metadata(mbpca)$centers
## Sanity check
tmp <- t(sweep(assay(dattmp[names(centers), ], "logcounts"), MARGIN = 1,
STATS = centers, FUN = "-")) %*% rotation
max(abs(tmp - projection))[1] 1.030287e-13
## Project all cells
pca <- t(sweep(assay(dat[names(centers), ], "logcounts"), MARGIN = 1,
STATS = centers, FUN = "-")) %*% rotationWarning in asMethod(object): sparse->dense coercion: allocating vector of size
3.9 GiB
## Perform batch correction in the PCA space
stopifnot(all(rownames(pca) == colnames(dat)))
reducedDim(dat, "multibatchPCA") <- as.matrix(pca)
mnn <- reducedMNN(as.matrix(pca), batch = dat[[batchvar]], k = 20,
BPPARAM = MulticoreParam(workers = 8L))
stopifnot(all(rownames(mnn) == colnames(dat)))
reducedDim(dat, "fastMNNcorr_sketch") <- mnn$correctedDimension reduction
Apply tSNE and UMAP to sketched cells and project the others.
## TSNE
set.seed(123)
tsne <- snifter::fitsne(
reducedDim(dat[, sketchcells.bybatch], "fastMNNcorr_sketch"),
n_jobs = 8L, random_state = 123L,
perplexity = 30, n_components = 2L,
pca = FALSE)
proj <- snifter::project(
x = tsne,
new = reducedDim(dat, "fastMNNcorr_sketch"),
old = reducedDim(dat[, sketchcells.bybatch], "fastMNNcorr_sketch"))
rownames(proj) <- colnames(dat)
reducedDim(dat, "TSNE_fastMNNcorr_sketch") <- proj
## UMAP
set.seed(123)
umap <- uwot::umap(
X = reducedDim(dat[, sketchcells.bybatch], "fastMNNcorr_sketch"),
n_components = 2, n_neighbors = 50,
pca = NULL, scale = FALSE,
ret_model = TRUE, n_threads = 8L)
proj <- uwot::umap_transform(
X = reducedDim(dat, "fastMNNcorr_sketch"),
model = umap,
n_threads = 8L)
rownames(proj) <- colnames(dat)
reducedDim(dat, "UMAP_fastMNNcorr_sketch") <- projShuffle cells (to avoid plotting in layers by sample later)
set.seed(123)
dat <- dat[, sample(seq_len(ncol(dat)), ncol(dat), replace = FALSE)]Annotate cells
dat$celltype_annot <- rep(NA, ncol(dat))
# Cells from the epithelial lineage are assigned a cell type label based on the
# annotation from there.
epiatlas <- readRDS(epiatlas)
epicells <- intersect(colnames(epiatlas), colnames(dat))
dat$celltype_annot[match(epicells, colnames(dat))] <-
as.character(epiatlas$leiden_res0.4_annot)[match(epicells, colnames(epiatlas))]
# For the other lineages, cells that appear in HECA
# are assigned the label from there.
hecaidx <- which(!is.na(dat$HECA_Celltype) & dat$HECA_Lineage != "Epithelial")
dat$celltype_annot[hecaidx] <- dat$HECA_Celltype[hecaidx]
# Immune cells are assigned the refined immune cell type
dat$celltype_annot[which(dat$celltype_annot %in%
c("Immune_Myeloid", "Immune_Lymphoid"))] <- NA
hecaimmuneidx <- which(!is.na(dat$HECA_CelltypeImmune))
dat$celltype_annot[hecaimmuneidx] <- dat$HECA_CelltypeImmune[hecaimmuneidx]
# Group together some subtypes
dat$celltype_annot[dat$celltype_annot %in%
c("dStromal_early", "dStromal_late", "dStromal_mid")] <- "dStromal"
dat$celltype_annot[dat$celltype_annot %in% c("ePV_1a", "ePV_1b", "ePV_2")] <- "ePV"
dat$celltype_annot[dat$celltype_annot %in%
c("eStromal", "eStromal_cycling", "eStromal_MMPs")] <- "eStromal"
table(dat$celltype_annot, useNA = "ifany")
Arterial B_cell cDC1
609 130 68
cDC2 ciliated cycling
198 3595 6029
dHormones dStromal eM1
1 24519 520
eM2 ePV eStromal
589 3348 26928
Fibroblast_basalis hormone responsive early hormone responsive late
51 4865 3175
HOXA13 ILC3 KRT5+
72 288 81
luminal early luminal late Lymphatic
1274 10564 82
Mast_cell Monocyte mPV
28 73 509
MUC5B+ pDC Peripheral_lymphocyte
152 103 288
Plasma_B_cell pre-ciliated Red_blood_cell
56 178 30
remodeling secretory early secretory late
728 4296 1265
secretory mid sHormones T_cell_CD4
11287 199 1202
T_cell_CD8 T_cell_cycling T_Reg
1475 155 126
transcriptionally active uNK1 uNK1_cycling
1400 509 333
uNK2 uNK3 uSMCs
1037 1117 998
Venous <NA>
5018 54019
# All remaining cells are assigned a label by running SingleR with the
# previously annotated cells as the reference.
datleft <- dat[, is.na(dat$celltype_annot)]
datref <- dat[, !is.na(dat$celltype_annot)]
set.seed(123)
ref_summed <- aggregateAcrossCells(datref, id = datref$celltype_annot,
use.assay.type = "logcounts",
statistics = "mean")
pred <- SingleR(test = datleft, ref = ref_summed,
labels = ref_summed$celltype_annot)
plotScoreHeatmap(pred)dat$celltype_annot[match(rownames(pred), colnames(dat))] <- pred$labels
table(dat$celltype_annot, useNA = "ifany")
Arterial B_cell cDC1
1367 290 104
cDC2 ciliated cycling
282 3604 6101
dHormones dStromal eM1
19 33458 684
eM2 ePV eStromal
774 5041 40204
Fibroblast_basalis hormone responsive early hormone responsive late
1037 5812 3262
HOXA13 ILC3 KRT5+
354 323 103
luminal early luminal late Lymphatic
1311 10860 116
Mast_cell Monocyte mPV
44 305 5167
MUC5B+ pDC Peripheral_lymphocyte
155 132 414
Plasma_B_cell pre-ciliated Red_blood_cell
63 186 49
remodeling secretory early secretory late
776 5500 1359
secretory mid sHormones T_cell_CD4
11620 6821 1860
T_cell_CD8 T_cell_cycling T_Reg
1812 249 186
transcriptionally active uNK1 uNK1_cycling
1565 785 531
uNK2 uNK3 uSMCs
1486 1363 5957
Venous
10076
# Exclude cells that are not assigned a cluster label
table(is.na(dat$celltype_annot))
FALSE
173567
dat <- dat[, !is.na(dat$celltype_annot)]
leiden_res0.4_annot_cols <- c(
"pre-ciliated" = "#AEC7E8",
"ciliated" = "#1F77B4",
"secretory early" = "#FF9896",
"secretory mid" = "#D62728",
"secretory late" = "#C49C94",
"luminal early" = "#C5B0D5",
"luminal late" = "#9467BD",
"transcriptionally active" = "#FF7F0E",
"MUC5B+" = "#2CA02C",
"KRT5+" = "#8C564B",
"remodeling" = "grey20",
"cycling" = "#FFBB78",
"hormone responsive early" = "#F7B6D2",
"hormone responsive late" = "#E377C2"
)Dot plots of clusters and marker genes of interest
Marker genes from HECA paper (https://www.nature.com/articles/s41588-024-01873-w, Fig 2a).
col = circlize::colorRamp2(
breaks = seq(0, 7.5, length.out = 64),
colors = colorRampPalette(c("grey90", "purple"))(64))
markers <- intersect(
c("PAX8", "EPCAM", "FOXA2", "MMP7", "SOX9", "AXIN2", "TRH", "ALDH1A1",
"CDH2", "KLK11", "SLC7A11", "DKK1", "PHLDA1", "KMO", "IHH", "EMID1",
"HPRT1", "SUFU", "OPRK1", "CBR3", "S100P", "SCGB2A2", "ABCG1", "PAEP",
"DPP4", "GPX3", "PTPRR", "FGF7", "FXYD2", "IL32", "WNT7A", "LGR5",
"MMP7", "TNF", "VTCN1", "CLDN22", "SULT1E1", "LEFTY1", "PTGS1", "IL6",
"CCNO", "CDC20B", "MUC12", "PIFO", "FOXJ1", "TP73", "BPIFB1", "TFF3",
"MUC5B", "SAA1", "TP63", "SNCG", "KRT5", "PGR", "AR", "ESR1"),
rownames(dat))
plotDots(dat, features = markers,
group = "celltype_annot",
swap_rownames = "gene_name", zlim = c(0, 7.5),
colour = col(seq(0, 7.5, length.out = 64))) +
scale_size(limits=c(0.2, 1)) +
labs(x = "") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))Plot dimension reductions
plotReducedDim(dat, dimred = "UMAP_fastMNNcorr_sketch",
other_fields = "HECA_Celltype", point_size = 0.5) +
geom_point(aes(color = HECA_Celltype), size = 0.5) +
guides(color = guide_legend(override.aes = list(size = 2, alpha = 1)))for (vrb in c("RCA_Celltype", "RCA_Location", "RCA_BiopsyType",
"matchDataset", "StageCoarse", "Stage", "donorCol",
"scDblFinder.class", "SeuratCC", "celltype_annot")) {
print(plotReducedDim(dat, dimred = "UMAP_fastMNNcorr_sketch",
colour_by = vrb, point_size = 0.5) +
guides(color = guide_legend(override.aes = list(size = 2, alpha = 1))))
}print(plotReducedDim(dat, dimred = "UMAP_fastMNNcorr_sketch",
colour_by = "celltype_annot", point_size = 0.5) +
scale_color_manual(values = leiden_res0.4_annot_cols, name = "") +
guides(color = guide_legend(override.aes = list(size = 2, alpha = 1))))Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
for (vrb in c("subsets_MT_percent", "detected", "intronic_fraction")) {
print(plotReducedDim(dat, dimred = "UMAP_fastMNNcorr_sketch",
colour_by = vrb, point_size = 0.5))
}Plot cluster abundance over the cycle
col <- "celltype_annot"
df <- data.frame(colData(dat)[, c("StageFactor", col)]) |>
group_by(StageFactor, .data[[col]]) |>
tally() |>
group_by(StageFactor) |>
mutate(perc = n / sum(n) * 100) |>
group_by(.data[[col]]) |>
mutate(scaledperc = perc / sum(perc)) |>
ungroup() |>
dplyr::filter(!is.na(StageFactor))
ggplot(df, aes(x = StageFactor, y = perc, color = .data[[col]])) +
geom_point() +
geom_line(aes(x = as.numeric(StageFactor)), linewidth = 1) +
theme_cowplot() +
labs(x = "", y = "Relative abundance (%)",
title = col) +
theme(legend.position = "bottom") +
scale_color_manual(values = leiden_res0.4_annot_cols, name = "")## One panel per cluster
ggplot(df, aes(x = StageFactor, y = perc, color = .data[[col]])) +
geom_point() +
geom_line(aes(x = as.numeric(StageFactor)), linewidth = 1) +
theme_cowplot() +
labs(x = "", y = "Relative abundance (%)",
title = col) +
facet_wrap(~ .data[[col]], scales = "free_y") +
scale_color_manual(values = leiden_res0.4_annot_cols, name = "") +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
strip.text = element_text(size = 10))Find marker genes
First, we compare all clusters in a pairwise manner. The comparison is only performed for genes with a non-zero count in at least 0.05% of the cells (corresponding to 87 of the 173567 cells in the data set). These gene IDs are also written to a file, for potential use as the ‘universe’ in over-representation analyses.
frac_detected <- rowMeans(assay(dat, "counts") > 0)
genes_to_test <- rownames(dat)[frac_detected >= (0.05 / 100)]
write.table(genes_to_test, file = "epithelial_stromal_cell_atlas_gene_universe.txt",
row.names = FALSE, col.names = FALSE, quote = FALSE, sep = "\t")
clc <- "celltype_annot"
ptts <- pairwiseTTests(dat, groups = dat[[clc]],
block = dat[[blockvar]], direction = "up",
subset.row = genes_to_test)
markers <- combineMarkers(ptts$statistics, ptts$pairs, pval.type = "all")
## Write results to files
ttestdir <- paste0("pairwise_ttests_", clc)
if (!file.exists(ttestdir)) {
dir.create(ttestdir, recursive = TRUE)
}
for (i in seq_len(nrow(ptts$pairs))) {
df <- cbind(gene = rownames(ptts$statistics[[i]]),
ptts$statistics[[i]])
df <- df[order(df$p.value, -df$logFC), ]
filename <- paste0(clc, "_cluster_", ptts$pairs$first[i],
"_vs_", ptts$pairs$second[i], ".txt")
write.table(df, file = file.path(ttestdir, filename), quote = FALSE,
sep = "\t", row.names = FALSE, col.names = TRUE)
}
markerdir <- paste0("marker_genes_all_", clc)
if (!file.exists(markerdir)) {
dir.create(markerdir, recursive = TRUE)
}
for (i in seq_along(markers)) {
df <- cbind(gene = rownames(markers[[i]]), markers[[i]])
filename <- paste0(clc, "_cluster_", names(markers)[i], "_markers.txt")
write.table(df, file = file.path(markerdir, filename), quote = FALSE,
sep = "\t", row.names = FALSE, col.names = TRUE)
}Save object
saveRDS(dat, file = "invivo_cell_atlas.rds")Session info
sessionInfo()R version 4.4.2 (2024-10-31)
Platform: x86_64-pc-linux-gnu
Running under: Rocky Linux 8.10 (Green Obsidian)
Matrix products: default
BLAS/LAPACK: FlexiBLAS OPENBLAS; LAPACK version 3.12.0
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Zurich
tzcode source: system (glibc)
attached base packages:
[1] parallel grid stats4 stats graphics grDevices utils
[8] datasets methods base
other attached packages:
[1] SingleR_2.8.0 DT_0.33
[3] igraph_2.1.4 BiocParallel_1.40.2
[5] bluster_1.16.0 org.Hs.eg.db_3.20.0
[7] AnnotationDbi_1.68.0 tricycle_1.14.0
[9] sketchR_1.2.0 Seurat_5.2.1
[11] SeuratObject_5.0.2 sp_2.2-0
[13] scDblFinder_1.20.2 circlize_0.4.16
[15] ComplexHeatmap_2.22.0 cowplot_1.1.3
[17] batchelor_1.22.0 msigdbr_10.0.2
[19] tidyr_1.3.1 dplyr_1.1.4
[21] scran_1.34.0 scater_1.34.1
[23] ggplot2_3.5.2 scuttle_1.16.0
[25] SingleCellExperiment_1.28.1 SummarizedExperiment_1.36.0
[27] Biobase_2.66.0 GenomicRanges_1.58.0
[29] GenomeInfoDb_1.42.3 IRanges_2.40.1
[31] S4Vectors_0.44.0 BiocGenerics_0.52.0
[33] MatrixGenerics_1.18.1 matrixStats_1.5.0
[35] fishpond_2.12.0
loaded via a namespace (and not attached):
[1] spatstat.sparse_3.1-0 bitops_1.0-9
[3] httr_1.4.7 RColorBrewer_1.1-3
[5] doParallel_1.0.17 tools_4.4.2
[7] sctransform_0.4.1 R6_2.6.1
[9] ResidualMatrix_1.16.0 lazyeval_0.2.2
[11] uwot_0.2.3 GetoptLong_1.0.5
[13] withr_3.0.2 gridExtra_2.3
[15] progressr_0.15.1 cli_3.6.4
[17] Cairo_1.6-2 spatstat.explore_3.4-2
[19] fastDummies_1.7.5 labeling_0.4.3
[21] sass_0.4.10 mvtnorm_1.3-3
[23] spatstat.data_3.1-6 readr_2.1.5
[25] ggridges_0.5.6 pbapply_1.7-2
[27] Rsamtools_2.22.0 dichromat_2.0-0.1
[29] parallelly_1.43.0 limma_3.62.2
[31] RSQLite_2.3.9 circular_0.5-1
[33] generics_0.1.3 shape_1.4.6.1
[35] BiocIO_1.16.0 crosstalk_1.2.1
[37] gtools_3.9.5 ica_1.0-3
[39] spatstat.random_3.3-3 Matrix_1.7-1
[41] ggbeeswarm_0.7.2 abind_1.4-8
[43] lifecycle_1.0.4 yaml_2.3.10
[45] edgeR_4.4.2 SparseArray_1.6.2
[47] Rtsne_0.17 blob_1.2.4
[49] promises_1.3.2 dqrng_0.4.1
[51] crayon_1.5.3 dir.expiry_1.14.0
[53] miniUI_0.1.2 lattice_0.22-6
[55] beachmat_2.22.0 KEGGREST_1.46.0
[57] magick_2.8.6 pillar_1.10.2
[59] knitr_1.50 metapod_1.14.0
[61] boot_1.3-31 rjson_0.2.23
[63] xgboost_1.7.9.1 future.apply_1.11.3
[65] codetools_0.2-20 glue_1.8.0
[67] spatstat.univar_3.1-2 data.table_1.17.0
[69] vctrs_0.6.5 png_0.1-8
[71] spam_2.11-1 gtable_0.3.6
[73] assertthat_0.2.1 cachem_1.1.0
[75] xfun_0.52 S4Arrays_1.6.0
[77] mime_0.13 survival_3.7-0
[79] pheatmap_1.0.12 iterators_1.0.14
[81] statmod_1.5.0 fitdistrplus_1.2-2
[83] ROCR_1.0-11 nlme_3.1-166
[85] bit64_4.6.0-1 filelock_1.0.3
[87] RcppAnnoy_0.0.22 bslib_0.9.0
[89] irlba_2.3.5.1 vipor_0.4.7
[91] KernSmooth_2.23-24 DBI_1.2.3
[93] colorspace_2.1-1 tidyselect_1.2.1
[95] bit_4.6.0 compiler_4.4.2
[97] curl_6.2.2 BiocNeighbors_2.0.1
[99] basilisk.utils_1.18.0 DelayedArray_0.32.0
[101] plotly_4.10.4 msigdbdf_24.1.0
[103] rtracklayer_1.66.0 scales_1.3.0.9000
[105] lmtest_0.9-40 stringr_1.5.1
[107] digest_0.6.37 goftest_1.2-3
[109] spatstat.utils_3.1-3 rmarkdown_2.29
[111] basilisk_1.18.0 XVector_0.46.0
[113] htmltools_0.5.8.1 pkgconfig_2.0.3
[115] sparseMatrixStats_1.18.0 fastmap_1.2.0
[117] rlang_1.1.6 GlobalOptions_0.1.2
[119] htmlwidgets_1.6.4 UCSC.utils_1.2.0
[121] shiny_1.10.0 DelayedMatrixStats_1.28.1
[123] jquerylib_0.1.4 farver_2.1.2
[125] zoo_1.8-14 jsonlite_2.0.0
[127] BiocSingular_1.22.0 RCurl_1.98-1.17
[129] magrittr_2.0.3 GenomeInfoDbData_1.2.13
[131] dotCall64_1.2 patchwork_1.3.0
[133] Rcpp_1.0.14 ggnewscale_0.5.1
[135] babelgene_22.9 viridis_0.6.5
[137] reticulate_1.42.0 stringi_1.8.7
[139] zlibbioc_1.52.0 MASS_7.3-61
[141] plyr_1.8.9 listenv_0.9.1
[143] ggrepel_0.9.6 deldir_2.0-4
[145] Biostrings_2.74.1 splines_4.4.2
[147] tensor_1.5 hms_1.1.3
[149] locfit_1.5-9.12 spatstat.geom_3.3-6
[151] RcppHNSW_0.6.0 reshape2_1.4.4
[153] ScaledMatrix_1.14.0 XML_3.99-0.18
[155] evaluate_1.0.3 snifter_1.16.0
[157] tzdb_0.5.0 foreach_1.5.2
[159] httpuv_1.6.16 RANN_2.6.2
[161] purrr_1.0.4 polyclip_1.10-7
[163] future_1.40.0 clue_0.3-66
[165] scattermore_1.2 rsvd_1.0.5
[167] xtable_1.8-4 restfulr_0.0.15
[169] svMisc_1.4.3 RSpectra_0.16-2
[171] later_1.4.2 viridisLite_0.4.2
[173] tibble_3.2.1 memoise_2.0.1
[175] beeswarm_0.4.0 GenomicAlignments_1.42.0
[177] cluster_2.1.6 globals_0.17.0