IVMC analysis and plots

Published

2025-07-03

Description

This script will generate the following plots:

  • Fig 1 d, e, f, g

  • Fig 3 c

  • Fig 5 d

  • ExtDat Fig 1 b, b, d

  • ExtDat Fig 2 a

  • ExtDat Fig 3 a, d, e

  • ExtDat Fig 4 a

  • ExtDat Fig 5 a, b

  • ExtDat Fig 9 a

  • ExtDat Fig 10 a, b

preparation

load libraries

library(sva)
library(edgeR) 
library(dplyr)
library(PCAtools)
library(ComplexHeatmap)
library(gprofiler2)
gprofiler2::set_base_url("https://biit.cs.ut.ee/gprofiler_archive3/e112_eg59_p19")

new folders

plot_dir <- "paper_figures_IVMC"
if (!file.exists(plot_dir)) {
    dir.create(plot_dir, showWarnings = FALSE, recursive = FALSE)
}

table_dir <- "paper_tables_IVMC"
if (!file.exists(table_dir)) {
    dir.create(table_dir, showWarnings = FALSE, recursive = FALSE)
}

reading raw counts and tpm values

all_counts <- read.delim("data/IVMC.raw_counts.20250403.tab")
# calculate cpm values on the complete data set first
all_counts.cpm <- cpm(all_counts, log=FALSE)

all_tpm <- read.delim("data/IVMC.TPM.20250403.tab")

set up metadata

colnames(all_counts)
  [1] "B032_tp0_NB_NH" "B044_tp0_NB_NH" "B050_tp0_NB_NH" "B066_tp0_NB_NH"
  [5] "B078_tp0_NB_NH" "B080_tp0_NB_NH" "B032_tp1_NB_H"  "B032_tp1_NB_NH"
  [9] "B044_tp1_NB_H"  "B044_tp1_NB_NH" "B050_tp1_NB_H"  "B050_tp1_NB_NH"
 [13] "B066_tp1_NB_H"  "B066_tp1_NB_NH" "B078_tp1_NB_H"  "B078_tp1_NB_NH"
 [17] "B080_tp1_NB_H"  "B080_tp1_NB_NH" "B032_tp2_NB_H"  "B032_tp2_NB_NH"
 [21] "B044_tp2_NB_H"  "B044_tp2_NB_NH" "B050_tp2_NB_H"  "B050_tp2_NB_NH"
 [25] "B066_tp2_NB_H"  "B066_tp2_NB_NH" "B078_tp2_NB_H"  "B078_tp2_NB_NH"
 [29] "B080_tp2_NB_H"  "B080_tp2_NB_NH" "B032_tp3_NB_H"  "B032_tp3_NB_NH"
 [33] "B044_tp3_NB_H"  "B044_tp3_NB_NH" "B050_tp3_NB_H"  "B050_tp3_NB_NH"
 [37] "B066_tp3_NB_H"  "B066_tp3_NB_NH" "B078_tp3_NB_H"  "B078_tp3_NB_NH"
 [41] "B080_tp3_NB_H"  "B080_tp3_NB_NH" "B032_tp4_B_H"   "B032_tp4_B_NH" 
 [45] "B032_tp4_NB_H"  "B032_tp4_NB_NH" "B044_tp4_B_H"   "B044_tp4_B_NH" 
 [49] "B044_tp4_NB_H"  "B044_tp4_NB_NH" "B050_tp4_B_H"   "B050_tp4_B_NH" 
 [53] "B050_tp4_NB_H"  "B050_tp4_NB_NH" "B066_tp4_B_H"   "B066_tp4_B_NH" 
 [57] "B066_tp4_NB_H"  "B066_tp4_NB_NH" "B078_tp4_B_H"   "B078_tp4_B_NH" 
 [61] "B078_tp4_NB_H"  "B078_tp4_NB_NH" "B080_tp4_B_H"   "B080_tp4_B_NH" 
 [65] "B080_tp4_NB_H"  "B080_tp4_NB_NH" "B032_tp5_B_H"   "B032_tp5_B_NH" 
 [69] "B032_tp5_NB_H"  "B032_tp5_NB_NH" "B044_tp5_B_H"   "B044_tp5_B_NH" 
 [73] "B044_tp5_NB_H"  "B044_tp5_NB_NH" "B050_tp5_B_H"   "B050_tp5_B_NH" 
 [77] "B050_tp5_NB_H"  "B050_tp5_NB_NH" "B066_tp5_B_H"   "B066_tp5_B_NH" 
 [81] "B066_tp5_NB_H"  "B066_tp5_NB_NH" "B078_tp5_B_H"   "B078_tp5_B_NH" 
 [85] "B078_tp5_NB_H"  "B078_tp5_NB_NH" "B080_tp5_B_H"   "B080_tp5_B_NH" 
 [89] "B080_tp5_NB_H"  "B080_tp5_NB_NH" "B032_tp6_B_H"   "B032_tp6_B_NH" 
 [93] "B032_tp6_NB_H"  "B032_tp6_NB_NH" "B044_tp6_B_H"   "B044_tp6_B_NH" 
 [97] "B044_tp6_NB_H"  "B044_tp6_NB_NH" "B050_tp6_B_H"   "B050_tp6_B_NH" 
[101] "B050_tp6_NB_H"  "B050_tp6_NB_NH" "B066_tp6_B_H"   "B066_tp6_B_NH" 
[105] "B066_tp6_NB_H"  "B066_tp6_NB_NH" "B078_tp6_B_H"   "B078_tp6_B_NH" 
[109] "B078_tp6_NB_H"  "B078_tp6_NB_NH" "B080_tp6_B_H"   "B080_tp6_B_NH" 
[113] "B080_tp6_NB_H"  "B080_tp6_NB_NH"
metadata <- data.frame(sample = colnames(all_counts),
                       donor = unlist(strsplit(colnames(all_counts),"_"))[rep(c(TRUE,FALSE,FALSE,FALSE),114)],
                       timepoint = unlist(strsplit(colnames(all_counts),"_"))[rep(c(FALSE,TRUE,FALSE,FALSE),114)],
                       breakdown = unlist(strsplit(colnames(all_counts),"_"))[rep(c(FALSE,FALSE,TRUE,FALSE),114)],
                       hormones  = unlist(strsplit(colnames(all_counts),"_"))[rep(c(FALSE,FALSE,FALSE,TRUE),114)] )
rownames(metadata) <- metadata$sample
metadata$timepoint_B_H <- paste(metadata$timepoint,metadata$breakdown,metadata$hormones, sep = "_")
metadata
                       sample donor timepoint breakdown hormones timepoint_B_H
B032_tp0_NB_NH B032_tp0_NB_NH  B032       tp0        NB       NH     tp0_NB_NH
B044_tp0_NB_NH B044_tp0_NB_NH  B044       tp0        NB       NH     tp0_NB_NH
B050_tp0_NB_NH B050_tp0_NB_NH  B050       tp0        NB       NH     tp0_NB_NH
B066_tp0_NB_NH B066_tp0_NB_NH  B066       tp0        NB       NH     tp0_NB_NH
B078_tp0_NB_NH B078_tp0_NB_NH  B078       tp0        NB       NH     tp0_NB_NH
B080_tp0_NB_NH B080_tp0_NB_NH  B080       tp0        NB       NH     tp0_NB_NH
B032_tp1_NB_H   B032_tp1_NB_H  B032       tp1        NB        H      tp1_NB_H
B032_tp1_NB_NH B032_tp1_NB_NH  B032       tp1        NB       NH     tp1_NB_NH
B044_tp1_NB_H   B044_tp1_NB_H  B044       tp1        NB        H      tp1_NB_H
B044_tp1_NB_NH B044_tp1_NB_NH  B044       tp1        NB       NH     tp1_NB_NH
B050_tp1_NB_H   B050_tp1_NB_H  B050       tp1        NB        H      tp1_NB_H
B050_tp1_NB_NH B050_tp1_NB_NH  B050       tp1        NB       NH     tp1_NB_NH
B066_tp1_NB_H   B066_tp1_NB_H  B066       tp1        NB        H      tp1_NB_H
B066_tp1_NB_NH B066_tp1_NB_NH  B066       tp1        NB       NH     tp1_NB_NH
B078_tp1_NB_H   B078_tp1_NB_H  B078       tp1        NB        H      tp1_NB_H
B078_tp1_NB_NH B078_tp1_NB_NH  B078       tp1        NB       NH     tp1_NB_NH
B080_tp1_NB_H   B080_tp1_NB_H  B080       tp1        NB        H      tp1_NB_H
B080_tp1_NB_NH B080_tp1_NB_NH  B080       tp1        NB       NH     tp1_NB_NH
B032_tp2_NB_H   B032_tp2_NB_H  B032       tp2        NB        H      tp2_NB_H
B032_tp2_NB_NH B032_tp2_NB_NH  B032       tp2        NB       NH     tp2_NB_NH
B044_tp2_NB_H   B044_tp2_NB_H  B044       tp2        NB        H      tp2_NB_H
B044_tp2_NB_NH B044_tp2_NB_NH  B044       tp2        NB       NH     tp2_NB_NH
B050_tp2_NB_H   B050_tp2_NB_H  B050       tp2        NB        H      tp2_NB_H
B050_tp2_NB_NH B050_tp2_NB_NH  B050       tp2        NB       NH     tp2_NB_NH
B066_tp2_NB_H   B066_tp2_NB_H  B066       tp2        NB        H      tp2_NB_H
B066_tp2_NB_NH B066_tp2_NB_NH  B066       tp2        NB       NH     tp2_NB_NH
B078_tp2_NB_H   B078_tp2_NB_H  B078       tp2        NB        H      tp2_NB_H
B078_tp2_NB_NH B078_tp2_NB_NH  B078       tp2        NB       NH     tp2_NB_NH
B080_tp2_NB_H   B080_tp2_NB_H  B080       tp2        NB        H      tp2_NB_H
B080_tp2_NB_NH B080_tp2_NB_NH  B080       tp2        NB       NH     tp2_NB_NH
B032_tp3_NB_H   B032_tp3_NB_H  B032       tp3        NB        H      tp3_NB_H
B032_tp3_NB_NH B032_tp3_NB_NH  B032       tp3        NB       NH     tp3_NB_NH
B044_tp3_NB_H   B044_tp3_NB_H  B044       tp3        NB        H      tp3_NB_H
B044_tp3_NB_NH B044_tp3_NB_NH  B044       tp3        NB       NH     tp3_NB_NH
B050_tp3_NB_H   B050_tp3_NB_H  B050       tp3        NB        H      tp3_NB_H
B050_tp3_NB_NH B050_tp3_NB_NH  B050       tp3        NB       NH     tp3_NB_NH
B066_tp3_NB_H   B066_tp3_NB_H  B066       tp3        NB        H      tp3_NB_H
B066_tp3_NB_NH B066_tp3_NB_NH  B066       tp3        NB       NH     tp3_NB_NH
B078_tp3_NB_H   B078_tp3_NB_H  B078       tp3        NB        H      tp3_NB_H
B078_tp3_NB_NH B078_tp3_NB_NH  B078       tp3        NB       NH     tp3_NB_NH
B080_tp3_NB_H   B080_tp3_NB_H  B080       tp3        NB        H      tp3_NB_H
B080_tp3_NB_NH B080_tp3_NB_NH  B080       tp3        NB       NH     tp3_NB_NH
B032_tp4_B_H     B032_tp4_B_H  B032       tp4         B        H       tp4_B_H
B032_tp4_B_NH   B032_tp4_B_NH  B032       tp4         B       NH      tp4_B_NH
B032_tp4_NB_H   B032_tp4_NB_H  B032       tp4        NB        H      tp4_NB_H
B032_tp4_NB_NH B032_tp4_NB_NH  B032       tp4        NB       NH     tp4_NB_NH
B044_tp4_B_H     B044_tp4_B_H  B044       tp4         B        H       tp4_B_H
B044_tp4_B_NH   B044_tp4_B_NH  B044       tp4         B       NH      tp4_B_NH
B044_tp4_NB_H   B044_tp4_NB_H  B044       tp4        NB        H      tp4_NB_H
B044_tp4_NB_NH B044_tp4_NB_NH  B044       tp4        NB       NH     tp4_NB_NH
B050_tp4_B_H     B050_tp4_B_H  B050       tp4         B        H       tp4_B_H
B050_tp4_B_NH   B050_tp4_B_NH  B050       tp4         B       NH      tp4_B_NH
B050_tp4_NB_H   B050_tp4_NB_H  B050       tp4        NB        H      tp4_NB_H
B050_tp4_NB_NH B050_tp4_NB_NH  B050       tp4        NB       NH     tp4_NB_NH
B066_tp4_B_H     B066_tp4_B_H  B066       tp4         B        H       tp4_B_H
B066_tp4_B_NH   B066_tp4_B_NH  B066       tp4         B       NH      tp4_B_NH
B066_tp4_NB_H   B066_tp4_NB_H  B066       tp4        NB        H      tp4_NB_H
B066_tp4_NB_NH B066_tp4_NB_NH  B066       tp4        NB       NH     tp4_NB_NH
B078_tp4_B_H     B078_tp4_B_H  B078       tp4         B        H       tp4_B_H
B078_tp4_B_NH   B078_tp4_B_NH  B078       tp4         B       NH      tp4_B_NH
B078_tp4_NB_H   B078_tp4_NB_H  B078       tp4        NB        H      tp4_NB_H
B078_tp4_NB_NH B078_tp4_NB_NH  B078       tp4        NB       NH     tp4_NB_NH
B080_tp4_B_H     B080_tp4_B_H  B080       tp4         B        H       tp4_B_H
B080_tp4_B_NH   B080_tp4_B_NH  B080       tp4         B       NH      tp4_B_NH
B080_tp4_NB_H   B080_tp4_NB_H  B080       tp4        NB        H      tp4_NB_H
B080_tp4_NB_NH B080_tp4_NB_NH  B080       tp4        NB       NH     tp4_NB_NH
B032_tp5_B_H     B032_tp5_B_H  B032       tp5         B        H       tp5_B_H
B032_tp5_B_NH   B032_tp5_B_NH  B032       tp5         B       NH      tp5_B_NH
B032_tp5_NB_H   B032_tp5_NB_H  B032       tp5        NB        H      tp5_NB_H
B032_tp5_NB_NH B032_tp5_NB_NH  B032       tp5        NB       NH     tp5_NB_NH
B044_tp5_B_H     B044_tp5_B_H  B044       tp5         B        H       tp5_B_H
B044_tp5_B_NH   B044_tp5_B_NH  B044       tp5         B       NH      tp5_B_NH
B044_tp5_NB_H   B044_tp5_NB_H  B044       tp5        NB        H      tp5_NB_H
B044_tp5_NB_NH B044_tp5_NB_NH  B044       tp5        NB       NH     tp5_NB_NH
B050_tp5_B_H     B050_tp5_B_H  B050       tp5         B        H       tp5_B_H
B050_tp5_B_NH   B050_tp5_B_NH  B050       tp5         B       NH      tp5_B_NH
B050_tp5_NB_H   B050_tp5_NB_H  B050       tp5        NB        H      tp5_NB_H
B050_tp5_NB_NH B050_tp5_NB_NH  B050       tp5        NB       NH     tp5_NB_NH
B066_tp5_B_H     B066_tp5_B_H  B066       tp5         B        H       tp5_B_H
B066_tp5_B_NH   B066_tp5_B_NH  B066       tp5         B       NH      tp5_B_NH
B066_tp5_NB_H   B066_tp5_NB_H  B066       tp5        NB        H      tp5_NB_H
B066_tp5_NB_NH B066_tp5_NB_NH  B066       tp5        NB       NH     tp5_NB_NH
B078_tp5_B_H     B078_tp5_B_H  B078       tp5         B        H       tp5_B_H
B078_tp5_B_NH   B078_tp5_B_NH  B078       tp5         B       NH      tp5_B_NH
B078_tp5_NB_H   B078_tp5_NB_H  B078       tp5        NB        H      tp5_NB_H
B078_tp5_NB_NH B078_tp5_NB_NH  B078       tp5        NB       NH     tp5_NB_NH
B080_tp5_B_H     B080_tp5_B_H  B080       tp5         B        H       tp5_B_H
B080_tp5_B_NH   B080_tp5_B_NH  B080       tp5         B       NH      tp5_B_NH
B080_tp5_NB_H   B080_tp5_NB_H  B080       tp5        NB        H      tp5_NB_H
B080_tp5_NB_NH B080_tp5_NB_NH  B080       tp5        NB       NH     tp5_NB_NH
B032_tp6_B_H     B032_tp6_B_H  B032       tp6         B        H       tp6_B_H
B032_tp6_B_NH   B032_tp6_B_NH  B032       tp6         B       NH      tp6_B_NH
B032_tp6_NB_H   B032_tp6_NB_H  B032       tp6        NB        H      tp6_NB_H
B032_tp6_NB_NH B032_tp6_NB_NH  B032       tp6        NB       NH     tp6_NB_NH
B044_tp6_B_H     B044_tp6_B_H  B044       tp6         B        H       tp6_B_H
B044_tp6_B_NH   B044_tp6_B_NH  B044       tp6         B       NH      tp6_B_NH
B044_tp6_NB_H   B044_tp6_NB_H  B044       tp6        NB        H      tp6_NB_H
B044_tp6_NB_NH B044_tp6_NB_NH  B044       tp6        NB       NH     tp6_NB_NH
B050_tp6_B_H     B050_tp6_B_H  B050       tp6         B        H       tp6_B_H
B050_tp6_B_NH   B050_tp6_B_NH  B050       tp6         B       NH      tp6_B_NH
B050_tp6_NB_H   B050_tp6_NB_H  B050       tp6        NB        H      tp6_NB_H
B050_tp6_NB_NH B050_tp6_NB_NH  B050       tp6        NB       NH     tp6_NB_NH
B066_tp6_B_H     B066_tp6_B_H  B066       tp6         B        H       tp6_B_H
B066_tp6_B_NH   B066_tp6_B_NH  B066       tp6         B       NH      tp6_B_NH
B066_tp6_NB_H   B066_tp6_NB_H  B066       tp6        NB        H      tp6_NB_H
B066_tp6_NB_NH B066_tp6_NB_NH  B066       tp6        NB       NH     tp6_NB_NH
B078_tp6_B_H     B078_tp6_B_H  B078       tp6         B        H       tp6_B_H
B078_tp6_B_NH   B078_tp6_B_NH  B078       tp6         B       NH      tp6_B_NH
B078_tp6_NB_H   B078_tp6_NB_H  B078       tp6        NB        H      tp6_NB_H
B078_tp6_NB_NH B078_tp6_NB_NH  B078       tp6        NB       NH     tp6_NB_NH
B080_tp6_B_H     B080_tp6_B_H  B080       tp6         B        H       tp6_B_H
B080_tp6_B_NH   B080_tp6_B_NH  B080       tp6         B       NH      tp6_B_NH
B080_tp6_NB_H   B080_tp6_NB_H  B080       tp6        NB        H      tp6_NB_H
B080_tp6_NB_NH B080_tp6_NB_NH  B080       tp6        NB       NH     tp6_NB_NH

color and description scheme

source("../params.R")

timepoint_colors <- bulk_timepoint_colors[c("tp0","tp1","tp2","tp3","tp4","tp5","tp6")]
timepoint_colors.alt_nlb <- bulk_timepoint_colors[c("Pre-diff","P4-diff","H. W. 24h","H. W. 48h","P. B. 24h","E2-diff 24h","E2-diff 48h")]

heatmap_labels_colors <- c("black","white","black","black","white","black","black")

protocol_colors <- hormones_colors
names(protocol_colors) <- c("B_NH","IVMC")
metadata$description <- bulk_timepoint_map_sl["tp0"]
metadata$description[metadata$timepoint == "tp1"] <- bulk_timepoint_map_sl["tp1"]
metadata$description[metadata$timepoint == "tp2"] <- bulk_timepoint_map_sl["tp2"]
metadata$description[metadata$timepoint == "tp3"] <- bulk_timepoint_map_sl["tp3"]
metadata$description[metadata$timepoint == "tp4"] <- bulk_timepoint_map_sl["tp4"]
metadata$description[metadata$timepoint == "tp5"] <- bulk_timepoint_map_sl["tp5"]
metadata$description[metadata$timepoint == "tp6"] <- bulk_timepoint_map_sl["tp6"]
# alternative
metadata$description.alt <- bulk_timepoint_map_mls["tp0"]
metadata$description.alt[metadata$timepoint == "tp1"] <- bulk_timepoint_map_mls["tp1"]
metadata$description.alt[metadata$timepoint == "tp2"] <- bulk_timepoint_map_mls["tp2"]
metadata$description.alt[metadata$timepoint == "tp3"] <- bulk_timepoint_map_mls["tp3"]
metadata$description.alt[metadata$timepoint == "tp4"] <- bulk_timepoint_map_mls["tp4"]
metadata$description.alt[metadata$timepoint == "tp5"] <- bulk_timepoint_map_mls["tp5"]
metadata$description.alt[metadata$timepoint == "tp6"] <- bulk_timepoint_map_mls["tp6"]
# alternative no line break
metadata$description.alt_nlb <- gsub("\n", " ", bulk_timepoint_map_mls["tp0"])
metadata$description.alt_nlb[metadata$timepoint == "tp1"] <- gsub("\n", " ", bulk_timepoint_map_mls["tp1"])
metadata$description.alt_nlb[metadata$timepoint == "tp2"] <- gsub("\n", " ", bulk_timepoint_map_mls["tp2"])
metadata$description.alt_nlb[metadata$timepoint == "tp3"] <- gsub("\n", " ", bulk_timepoint_map_mls["tp3"])
metadata$description.alt_nlb[metadata$timepoint == "tp4"] <- gsub("\n", " ", bulk_timepoint_map_mls["tp4"])
metadata$description.alt_nlb[metadata$timepoint == "tp5"] <- gsub("\n", " ", bulk_timepoint_map_mls["tp5"])
metadata$description.alt_nlb[metadata$timepoint == "tp6"] <- gsub("\n", " ", bulk_timepoint_map_mls["tp6"])
# alternative2
metadata$description.alt2 <- bulk_timepoint_map_mlm["tp0"]
metadata$description.alt2[metadata$timepoint == "tp1"] <- bulk_timepoint_map_mlm["tp1"]
metadata$description.alt2[metadata$timepoint == "tp2"] <- bulk_timepoint_map_mlm["tp2"]
metadata$description.alt2[metadata$timepoint == "tp3"] <- bulk_timepoint_map_mlm["tp3"]
metadata$description.alt2[metadata$timepoint == "tp4"] <- bulk_timepoint_map_mlm["tp4"]
metadata$description.alt2[metadata$timepoint == "tp5"] <- bulk_timepoint_map_mlm["tp5"]
metadata$description.alt2[metadata$timepoint == "tp6"] <- bulk_timepoint_map_mlm["tp6"]
# Pre-diff
metadata$timeline <- "other"
metadata$timeline[metadata$timepoint == "tp0"] <- "Pre-diff"

batch correction by donor using ComBat-seq

# 'ComBat-seq()' requires integers 
forceMatrixToInteger <- function(m){
    apply (m, c (1, 2), function (x) {
         (as.integer(x))
    })
}

all_counts.integer <- forceMatrixToInteger(as.matrix(all_counts))
row.names(all_counts.integer) <- row.names(all_counts)

# actual batch correction
all_counts.CBSeq_byD <- ComBat_seq(all_counts.integer, batch=metadata$donor, group=NULL)
Found 6 batches
Using null model in ComBat-seq.
Adjusting for 0 covariate(s) or covariate level(s)
Estimating dispersions
Fitting the GLM model
Shrinkage off - using GLM estimates for parameters
Adjusting the data
row.names(all_counts.CBSeq_byD) <- row.names(all_counts)

# calculate cpm values on the complete data set first
all_counts.bc.cpm <- cpm(all_counts.CBSeq_byD, log=FALSE)

set up DGE

only one design matrix for Figures: 1f, ED3a, ED3d, ED3e, ED5a

groups_all <- metadata$timepoint_B_H
donor <- metadata$donor
# generate DGEList object (we work with the raw counts)
counts.all.asDGEList <- DGEList(counts=all_counts,
                                     genes=rownames(all_counts), 
                                     group = groups_all )

counts.all.asDGEList
An object of class "DGEList"
$counts
         B032_tp0_NB_NH B044_tp0_NB_NH B050_tp0_NB_NH B066_tp0_NB_NH
TSPAN6         2330.149       1669.757       2323.153       2178.668
TNMD              0.000          0.000          0.000          0.000
DPM1           1057.640        947.221       1898.128       1782.786
SCYL3           454.987        408.387        516.702        585.629
C1orf112        297.605        257.085        652.944        540.747
         B078_tp0_NB_NH B080_tp0_NB_NH B032_tp1_NB_H B032_tp1_NB_NH
TSPAN6         2701.845       1941.214      2974.309       3363.321
TNMD              0.000          0.000         1.000          1.000
DPM1           1555.317       1670.424      1355.103       1017.533
SCYL3           491.001        505.327       569.013        635.134
C1orf112        562.380        776.523       207.487        238.741
         B044_tp1_NB_H B044_tp1_NB_NH B050_tp1_NB_H B050_tp1_NB_NH
TSPAN6        1785.138       2988.257      3364.456       2172.189
TNMD             0.000          0.000         0.000          0.000
DPM1           833.345       1167.385      1371.059        964.385
SCYL3          450.662        639.321       615.183        523.009
C1orf112       106.805        242.218       183.012        201.001
         B066_tp1_NB_H B066_tp1_NB_NH B078_tp1_NB_H B078_tp1_NB_NH
TSPAN6        1620.574       1486.001      1953.345       1571.570
TNMD             0.000          0.000         0.000          0.000
DPM1           898.820        789.880       857.830       1010.559
SCYL3          393.422        510.000       368.000        501.726
C1orf112        88.999        113.001        98.000        156.078
         B080_tp1_NB_H B080_tp1_NB_NH B032_tp2_NB_H B032_tp2_NB_NH
TSPAN6        2214.665       1988.146      2545.154       4336.623
TNMD             0.000          0.000         0.000          1.004
DPM1           825.908        895.224       842.792       1120.162
SCYL3          546.883        441.936       500.231        824.888
C1orf112        96.618        146.963       257.938        292.632
         B044_tp2_NB_H B044_tp2_NB_NH B050_tp2_NB_H B050_tp2_NB_NH
TSPAN6        1876.503       2702.298      2934.437       2295.110
TNMD             0.000          0.000         2.000          0.000
DPM1           723.453        974.237      1405.790       1127.157
SCYL3          431.433        688.475       698.526        563.000
C1orf112       114.247        252.666       261.026        359.000
         B066_tp2_NB_H B066_tp2_NB_NH B078_tp2_NB_H B078_tp2_NB_NH
TSPAN6        1651.000       1907.695      1959.843       1928.319
TNMD             0.000          0.000         0.000          0.000
DPM1           734.001       1137.242       921.103       1024.922
SCYL3          380.062        625.350       500.304        480.033
C1orf112       154.001        266.930       166.999        276.000
         B080_tp2_NB_H B080_tp2_NB_NH B032_tp3_NB_H B032_tp3_NB_NH
TSPAN6        1279.781        832.087      1907.078       3079.175
TNMD             0.000          0.000         0.000          0.000
DPM1           511.003        512.105       491.394        813.765
SCYL3          452.715        295.261       525.007        576.113
C1orf112       118.177        150.360       274.909        202.922
         B044_tp3_NB_H B044_tp3_NB_NH B050_tp3_NB_H B050_tp3_NB_NH
TSPAN6        2022.268       2281.337      2705.818       2026.441
TNMD             0.000          0.000         0.000          1.000
DPM1           769.528        785.079      1145.941       1006.223
SCYL3          460.795        472.769       525.977        445.299
C1orf112       128.736        204.576       223.257        228.026
         B066_tp3_NB_H B066_tp3_NB_NH B078_tp3_NB_H B078_tp3_NB_NH
TSPAN6        1418.719       1253.236      2006.202       1781.443
TNMD             0.000          0.000         0.000          0.000
DPM1           679.983        681.048       668.810        923.999
SCYL3          365.000        355.007       395.009        452.134
C1orf112       170.000        129.000       104.000        247.001
         B080_tp3_NB_H B080_tp3_NB_NH B032_tp4_B_H B032_tp4_B_NH B032_tp4_NB_H
TSPAN6        1139.296       1665.859     1712.537      2589.469      1511.922
TNMD             0.000          0.000        1.000         0.000         0.000
DPM1           501.126        816.451     1211.173      1399.578       365.061
SCYL3          397.463        403.690      365.607       448.356       335.678
C1orf112       206.566        317.140      514.024       570.754       202.999
         B032_tp4_NB_NH B044_tp4_B_H B044_tp4_B_NH B044_tp4_NB_H B044_tp4_NB_NH
TSPAN6         2832.630     1781.079      1626.481      2193.526       2103.678
TNMD              0.000        0.000         0.000         0.000          0.000
DPM1            523.200     1574.285      1588.111       812.236        909.040
SCYL3           497.140      454.168       319.324       439.047        431.911
C1orf112        156.971      400.628       451.690       163.855        210.561
         B050_tp4_B_H B050_tp4_B_NH B050_tp4_NB_H B050_tp4_NB_NH B066_tp4_B_H
TSPAN6       2340.607      2207.438      2758.369       2588.953     2111.229
TNMD            0.000         0.000         0.000          0.000        0.000
DPM1         1995.332      1854.014      1160.185       1193.282     2159.178
SCYL3         654.503       567.400       600.064        515.000      620.316
C1orf112      630.985       540.000       213.063        233.002      605.000
         B066_tp4_B_NH B066_tp4_NB_H B066_tp4_NB_NH B078_tp4_B_H B078_tp4_B_NH
TSPAN6        1712.219      1231.137       1238.061     2398.771      1561.133
TNMD             0.000         0.000          0.000        0.000         0.000
DPM1          1558.188       712.001        710.351     2079.461      1747.254
SCYL3          538.044       409.001        456.117      669.248       501.522
C1orf112       617.598       214.000        161.956      517.216       522.585
         B078_tp4_NB_H B078_tp4_NB_NH B080_tp4_B_H B080_tp4_B_NH B080_tp4_NB_H
TSPAN6        1941.175       1498.209     1337.427      1965.989      1416.113
TNMD             0.000          0.000        0.000         0.000         0.000
DPM1           685.101        770.595     1122.677      1605.993       528.655
SCYL3          404.001        355.999      447.412       528.414       397.029
C1orf112       151.259        211.064      516.636       601.728       214.142
         B080_tp4_NB_NH B032_tp5_B_H B032_tp5_B_NH B032_tp5_NB_H B032_tp5_NB_NH
TSPAN6         1045.357     2293.025      2703.466       774.906       3826.274
TNMD              0.000        1.000         1.000         0.000          0.000
DPM1            524.020     1625.211      1306.424       250.923        814.101
SCYL3           347.922      309.703       377.699       194.260        619.203
C1orf112        132.906      683.186       501.659       114.270        281.023
         B044_tp5_B_H B044_tp5_B_NH B044_tp5_NB_H B044_tp5_NB_NH B050_tp5_B_H
TSPAN6       2787.210      1744.425      2582.013       2138.224     2951.892
TNMD            0.000         0.000         2.000          0.000        0.000
DPM1         1705.423      1222.215       927.060        860.911     2157.301
SCYL3         480.506       312.133       383.643        394.666      563.000
C1orf112      506.535       476.100       176.082        231.507      656.351
         B050_tp5_B_NH B050_tp5_NB_H B050_tp5_NB_NH B066_tp5_B_H B066_tp5_B_NH
TSPAN6        3027.696      2856.884       2026.269     1438.731      1847.317
TNMD             0.000         0.000          0.000        0.000         0.000
DPM1          2143.770      1210.814        941.042     1450.999      2061.645
SCYL3          599.000       522.143        413.002      316.000       505.579
C1orf112       724.012       199.000        183.039      517.101       714.007
         B066_tp5_NB_H B066_tp5_NB_NH B078_tp5_B_H B078_tp5_B_NH B078_tp5_NB_H
TSPAN6        2196.342       1366.697     3292.663      2734.971      1868.026
TNMD             0.000          0.000        0.000         0.000         0.000
DPM1          1226.282        757.420     1842.883      1907.728       646.581
SCYL3          441.458        348.261      645.144       535.413       296.257
C1orf112       150.001        153.000      560.135       670.537       145.000
         B078_tp5_NB_NH B080_tp5_B_H B080_tp5_B_NH B080_tp5_NB_H B080_tp5_NB_NH
TSPAN6         1370.435     1544.203      1861.981      1336.852       1294.003
TNMD              0.000        0.000         0.000         0.000          0.000
DPM1            704.719     1182.420      1456.517       578.657        533.926
SCYL3           299.497      376.380       415.435       317.958        451.563
C1orf112        173.008      419.747       495.404       114.323        126.854
         B032_tp6_B_H B032_tp6_B_NH B032_tp6_NB_H B032_tp6_NB_NH B044_tp6_B_H
TSPAN6       2140.944      2302.157      1260.407       2118.888     1967.731
TNMD            1.000         1.000         0.000          1.000        0.000
DPM1         1225.324       733.668       470.140        628.239     1453.737
SCYL3         338.175       375.167       319.911        580.092      420.029
C1orf112      439.444       359.567       159.121        208.633      556.262
         B044_tp6_B_NH B044_tp6_NB_H B044_tp6_NB_NH B050_tp6_B_H B050_tp6_B_NH
TSPAN6        1674.798      1746.140       1532.562     3034.905      2505.743
TNMD             1.000         0.000          0.000        3.000         2.000
DPM1          1260.477       766.634        698.540     1884.420      1658.094
SCYL3          365.444       390.249        413.001      542.169       476.142
C1orf112       509.695       150.858        178.230      615.150       615.392
         B050_tp6_NB_H B050_tp6_NB_NH B066_tp6_B_H B066_tp6_B_NH B066_tp6_NB_H
TSPAN6        3434.312       2182.102     1661.101      1887.268      1563.185
TNMD             0.000          1.000        0.000         0.000         0.000
DPM1          1124.856        944.508     2206.904      2293.305       652.809
SCYL3          405.188        354.011      435.413       466.060       261.000
C1orf112       204.124        235.001      852.744       942.025       165.000
         B066_tp6_NB_NH B078_tp6_B_H B078_tp6_B_NH B078_tp6_NB_H B078_tp6_NB_NH
TSPAN6         1458.798     2651.344      2745.033      2378.005       1124.000
TNMD              0.000        0.000         3.000         0.000          0.000
DPM1            648.289     1550.912      1715.664       963.277        633.126
SCYL3           316.150      535.177       499.204       331.003        267.000
C1orf112        156.063      605.840       648.215       185.073        148.000
         B080_tp6_B_H B080_tp6_B_NH B080_tp6_NB_H B080_tp6_NB_NH
TSPAN6       1388.112      2388.716      1195.576       1464.287
TNMD            0.000         0.000         0.000          0.000
DPM1         1183.336      2158.858       382.530        555.890
SCYL3         372.075       669.658       281.414        549.054
C1orf112      488.236       939.782       103.227        215.637
60225 more rows ...

$samples
                   group lib.size norm.factors
B032_tp0_NB_NH tp0_NB_NH 30144167            1
B044_tp0_NB_NH tp0_NB_NH 28732415            1
B050_tp0_NB_NH tp0_NB_NH 37521705            1
B066_tp0_NB_NH tp0_NB_NH 37201395            1
B078_tp0_NB_NH tp0_NB_NH 32579189            1
109 more rows ...

$genes
            genes
TSPAN6     TSPAN6
TNMD         TNMD
DPM1         DPM1
SCYL3       SCYL3
C1orf112 C1orf112
60225 more rows ...
# filter lowly expressed genes 
# we have 6 replicas (aka donors)
mincpm <- 1
minsamples <- 6

counts.all.asDGEList <- counts.all.asDGEList[ rowSums(all_counts.cpm > mincpm) >= minsamples, , keep.lib.size=FALSE] 
dim(counts.all.asDGEList)
[1] 17438   114
# normalization
counts.all.asDGEList <- calcNormFactors(counts.all.asDGEList)   
head(counts.all.asDGEList$samples)
                   group lib.size norm.factors
B032_tp0_NB_NH tp0_NB_NH 30113869     1.122469
B044_tp0_NB_NH tp0_NB_NH 28701049     1.090766
B050_tp0_NB_NH tp0_NB_NH 37487039     1.180351
B066_tp0_NB_NH tp0_NB_NH 37168756     1.135219
B078_tp0_NB_NH tp0_NB_NH 32548843     1.170481
B080_tp0_NB_NH tp0_NB_NH 41483708     1.154863
plotMDS(counts.all.asDGEList)

set up design matrix

(B032_tp0_NB_NH is used as intercept)

# use model.matrix
design_all <- model.matrix(~donor +group, data = counts.all.asDGEList$samples)
colnames(design_all)[2:24] <- substring(colnames(design_all)[2:24],6,15)
design_all
               (Intercept) B044 B050 B066 B078 B080 tp1_NB_H tp1_NB_NH tp2_NB_H
B032_tp0_NB_NH           1    0    0    0    0    0        0         0        0
B044_tp0_NB_NH           1    1    0    0    0    0        0         0        0
B050_tp0_NB_NH           1    0    1    0    0    0        0         0        0
B066_tp0_NB_NH           1    0    0    1    0    0        0         0        0
B078_tp0_NB_NH           1    0    0    0    1    0        0         0        0
B080_tp0_NB_NH           1    0    0    0    0    1        0         0        0
B032_tp1_NB_H            1    0    0    0    0    0        1         0        0
B032_tp1_NB_NH           1    0    0    0    0    0        0         1        0
B044_tp1_NB_H            1    1    0    0    0    0        1         0        0
B044_tp1_NB_NH           1    1    0    0    0    0        0         1        0
B050_tp1_NB_H            1    0    1    0    0    0        1         0        0
B050_tp1_NB_NH           1    0    1    0    0    0        0         1        0
B066_tp1_NB_H            1    0    0    1    0    0        1         0        0
B066_tp1_NB_NH           1    0    0    1    0    0        0         1        0
B078_tp1_NB_H            1    0    0    0    1    0        1         0        0
B078_tp1_NB_NH           1    0    0    0    1    0        0         1        0
B080_tp1_NB_H            1    0    0    0    0    1        1         0        0
B080_tp1_NB_NH           1    0    0    0    0    1        0         1        0
B032_tp2_NB_H            1    0    0    0    0    0        0         0        1
B032_tp2_NB_NH           1    0    0    0    0    0        0         0        0
B044_tp2_NB_H            1    1    0    0    0    0        0         0        1
B044_tp2_NB_NH           1    1    0    0    0    0        0         0        0
B050_tp2_NB_H            1    0    1    0    0    0        0         0        1
B050_tp2_NB_NH           1    0    1    0    0    0        0         0        0
B066_tp2_NB_H            1    0    0    1    0    0        0         0        1
B066_tp2_NB_NH           1    0    0    1    0    0        0         0        0
B078_tp2_NB_H            1    0    0    0    1    0        0         0        1
B078_tp2_NB_NH           1    0    0    0    1    0        0         0        0
B080_tp2_NB_H            1    0    0    0    0    1        0         0        1
B080_tp2_NB_NH           1    0    0    0    0    1        0         0        0
B032_tp3_NB_H            1    0    0    0    0    0        0         0        0
B032_tp3_NB_NH           1    0    0    0    0    0        0         0        0
B044_tp3_NB_H            1    1    0    0    0    0        0         0        0
B044_tp3_NB_NH           1    1    0    0    0    0        0         0        0
B050_tp3_NB_H            1    0    1    0    0    0        0         0        0
B050_tp3_NB_NH           1    0    1    0    0    0        0         0        0
B066_tp3_NB_H            1    0    0    1    0    0        0         0        0
B066_tp3_NB_NH           1    0    0    1    0    0        0         0        0
B078_tp3_NB_H            1    0    0    0    1    0        0         0        0
B078_tp3_NB_NH           1    0    0    0    1    0        0         0        0
B080_tp3_NB_H            1    0    0    0    0    1        0         0        0
B080_tp3_NB_NH           1    0    0    0    0    1        0         0        0
B032_tp4_B_H             1    0    0    0    0    0        0         0        0
B032_tp4_B_NH            1    0    0    0    0    0        0         0        0
B032_tp4_NB_H            1    0    0    0    0    0        0         0        0
B032_tp4_NB_NH           1    0    0    0    0    0        0         0        0
B044_tp4_B_H             1    1    0    0    0    0        0         0        0
B044_tp4_B_NH            1    1    0    0    0    0        0         0        0
B044_tp4_NB_H            1    1    0    0    0    0        0         0        0
B044_tp4_NB_NH           1    1    0    0    0    0        0         0        0
B050_tp4_B_H             1    0    1    0    0    0        0         0        0
B050_tp4_B_NH            1    0    1    0    0    0        0         0        0
B050_tp4_NB_H            1    0    1    0    0    0        0         0        0
B050_tp4_NB_NH           1    0    1    0    0    0        0         0        0
B066_tp4_B_H             1    0    0    1    0    0        0         0        0
B066_tp4_B_NH            1    0    0    1    0    0        0         0        0
B066_tp4_NB_H            1    0    0    1    0    0        0         0        0
B066_tp4_NB_NH           1    0    0    1    0    0        0         0        0
B078_tp4_B_H             1    0    0    0    1    0        0         0        0
B078_tp4_B_NH            1    0    0    0    1    0        0         0        0
B078_tp4_NB_H            1    0    0    0    1    0        0         0        0
B078_tp4_NB_NH           1    0    0    0    1    0        0         0        0
B080_tp4_B_H             1    0    0    0    0    1        0         0        0
B080_tp4_B_NH            1    0    0    0    0    1        0         0        0
B080_tp4_NB_H            1    0    0    0    0    1        0         0        0
B080_tp4_NB_NH           1    0    0    0    0    1        0         0        0
B032_tp5_B_H             1    0    0    0    0    0        0         0        0
B032_tp5_B_NH            1    0    0    0    0    0        0         0        0
B032_tp5_NB_H            1    0    0    0    0    0        0         0        0
B032_tp5_NB_NH           1    0    0    0    0    0        0         0        0
B044_tp5_B_H             1    1    0    0    0    0        0         0        0
B044_tp5_B_NH            1    1    0    0    0    0        0         0        0
B044_tp5_NB_H            1    1    0    0    0    0        0         0        0
B044_tp5_NB_NH           1    1    0    0    0    0        0         0        0
B050_tp5_B_H             1    0    1    0    0    0        0         0        0
B050_tp5_B_NH            1    0    1    0    0    0        0         0        0
B050_tp5_NB_H            1    0    1    0    0    0        0         0        0
B050_tp5_NB_NH           1    0    1    0    0    0        0         0        0
B066_tp5_B_H             1    0    0    1    0    0        0         0        0
B066_tp5_B_NH            1    0    0    1    0    0        0         0        0
B066_tp5_NB_H            1    0    0    1    0    0        0         0        0
B066_tp5_NB_NH           1    0    0    1    0    0        0         0        0
B078_tp5_B_H             1    0    0    0    1    0        0         0        0
B078_tp5_B_NH            1    0    0    0    1    0        0         0        0
B078_tp5_NB_H            1    0    0    0    1    0        0         0        0
B078_tp5_NB_NH           1    0    0    0    1    0        0         0        0
B080_tp5_B_H             1    0    0    0    0    1        0         0        0
B080_tp5_B_NH            1    0    0    0    0    1        0         0        0
B080_tp5_NB_H            1    0    0    0    0    1        0         0        0
B080_tp5_NB_NH           1    0    0    0    0    1        0         0        0
B032_tp6_B_H             1    0    0    0    0    0        0         0        0
B032_tp6_B_NH            1    0    0    0    0    0        0         0        0
B032_tp6_NB_H            1    0    0    0    0    0        0         0        0
B032_tp6_NB_NH           1    0    0    0    0    0        0         0        0
B044_tp6_B_H             1    1    0    0    0    0        0         0        0
B044_tp6_B_NH            1    1    0    0    0    0        0         0        0
B044_tp6_NB_H            1    1    0    0    0    0        0         0        0
B044_tp6_NB_NH           1    1    0    0    0    0        0         0        0
B050_tp6_B_H             1    0    1    0    0    0        0         0        0
B050_tp6_B_NH            1    0    1    0    0    0        0         0        0
B050_tp6_NB_H            1    0    1    0    0    0        0         0        0
B050_tp6_NB_NH           1    0    1    0    0    0        0         0        0
B066_tp6_B_H             1    0    0    1    0    0        0         0        0
B066_tp6_B_NH            1    0    0    1    0    0        0         0        0
B066_tp6_NB_H            1    0    0    1    0    0        0         0        0
B066_tp6_NB_NH           1    0    0    1    0    0        0         0        0
B078_tp6_B_H             1    0    0    0    1    0        0         0        0
B078_tp6_B_NH            1    0    0    0    1    0        0         0        0
B078_tp6_NB_H            1    0    0    0    1    0        0         0        0
B078_tp6_NB_NH           1    0    0    0    1    0        0         0        0
B080_tp6_B_H             1    0    0    0    0    1        0         0        0
B080_tp6_B_NH            1    0    0    0    0    1        0         0        0
B080_tp6_NB_H            1    0    0    0    0    1        0         0        0
B080_tp6_NB_NH           1    0    0    0    0    1        0         0        0
               tp2_NB_NH tp3_NB_H tp3_NB_NH tp4_B_H tp4_B_NH tp4_NB_H tp4_NB_NH
B032_tp0_NB_NH         0        0         0       0        0        0         0
B044_tp0_NB_NH         0        0         0       0        0        0         0
B050_tp0_NB_NH         0        0         0       0        0        0         0
B066_tp0_NB_NH         0        0         0       0        0        0         0
B078_tp0_NB_NH         0        0         0       0        0        0         0
B080_tp0_NB_NH         0        0         0       0        0        0         0
B032_tp1_NB_H          0        0         0       0        0        0         0
B032_tp1_NB_NH         0        0         0       0        0        0         0
B044_tp1_NB_H          0        0         0       0        0        0         0
B044_tp1_NB_NH         0        0         0       0        0        0         0
B050_tp1_NB_H          0        0         0       0        0        0         0
B050_tp1_NB_NH         0        0         0       0        0        0         0
B066_tp1_NB_H          0        0         0       0        0        0         0
B066_tp1_NB_NH         0        0         0       0        0        0         0
B078_tp1_NB_H          0        0         0       0        0        0         0
B078_tp1_NB_NH         0        0         0       0        0        0         0
B080_tp1_NB_H          0        0         0       0        0        0         0
B080_tp1_NB_NH         0        0         0       0        0        0         0
B032_tp2_NB_H          0        0         0       0        0        0         0
B032_tp2_NB_NH         1        0         0       0        0        0         0
B044_tp2_NB_H          0        0         0       0        0        0         0
B044_tp2_NB_NH         1        0         0       0        0        0         0
B050_tp2_NB_H          0        0         0       0        0        0         0
B050_tp2_NB_NH         1        0         0       0        0        0         0
B066_tp2_NB_H          0        0         0       0        0        0         0
B066_tp2_NB_NH         1        0         0       0        0        0         0
B078_tp2_NB_H          0        0         0       0        0        0         0
B078_tp2_NB_NH         1        0         0       0        0        0         0
B080_tp2_NB_H          0        0         0       0        0        0         0
B080_tp2_NB_NH         1        0         0       0        0        0         0
B032_tp3_NB_H          0        1         0       0        0        0         0
B032_tp3_NB_NH         0        0         1       0        0        0         0
B044_tp3_NB_H          0        1         0       0        0        0         0
B044_tp3_NB_NH         0        0         1       0        0        0         0
B050_tp3_NB_H          0        1         0       0        0        0         0
B050_tp3_NB_NH         0        0         1       0        0        0         0
B066_tp3_NB_H          0        1         0       0        0        0         0
B066_tp3_NB_NH         0        0         1       0        0        0         0
B078_tp3_NB_H          0        1         0       0        0        0         0
B078_tp3_NB_NH         0        0         1       0        0        0         0
B080_tp3_NB_H          0        1         0       0        0        0         0
B080_tp3_NB_NH         0        0         1       0        0        0         0
B032_tp4_B_H           0        0         0       1        0        0         0
B032_tp4_B_NH          0        0         0       0        1        0         0
B032_tp4_NB_H          0        0         0       0        0        1         0
B032_tp4_NB_NH         0        0         0       0        0        0         1
B044_tp4_B_H           0        0         0       1        0        0         0
B044_tp4_B_NH          0        0         0       0        1        0         0
B044_tp4_NB_H          0        0         0       0        0        1         0
B044_tp4_NB_NH         0        0         0       0        0        0         1
B050_tp4_B_H           0        0         0       1        0        0         0
B050_tp4_B_NH          0        0         0       0        1        0         0
B050_tp4_NB_H          0        0         0       0        0        1         0
B050_tp4_NB_NH         0        0         0       0        0        0         1
B066_tp4_B_H           0        0         0       1        0        0         0
B066_tp4_B_NH          0        0         0       0        1        0         0
B066_tp4_NB_H          0        0         0       0        0        1         0
B066_tp4_NB_NH         0        0         0       0        0        0         1
B078_tp4_B_H           0        0         0       1        0        0         0
B078_tp4_B_NH          0        0         0       0        1        0         0
B078_tp4_NB_H          0        0         0       0        0        1         0
B078_tp4_NB_NH         0        0         0       0        0        0         1
B080_tp4_B_H           0        0         0       1        0        0         0
B080_tp4_B_NH          0        0         0       0        1        0         0
B080_tp4_NB_H          0        0         0       0        0        1         0
B080_tp4_NB_NH         0        0         0       0        0        0         1
B032_tp5_B_H           0        0         0       0        0        0         0
B032_tp5_B_NH          0        0         0       0        0        0         0
B032_tp5_NB_H          0        0         0       0        0        0         0
B032_tp5_NB_NH         0        0         0       0        0        0         0
B044_tp5_B_H           0        0         0       0        0        0         0
B044_tp5_B_NH          0        0         0       0        0        0         0
B044_tp5_NB_H          0        0         0       0        0        0         0
B044_tp5_NB_NH         0        0         0       0        0        0         0
B050_tp5_B_H           0        0         0       0        0        0         0
B050_tp5_B_NH          0        0         0       0        0        0         0
B050_tp5_NB_H          0        0         0       0        0        0         0
B050_tp5_NB_NH         0        0         0       0        0        0         0
B066_tp5_B_H           0        0         0       0        0        0         0
B066_tp5_B_NH          0        0         0       0        0        0         0
B066_tp5_NB_H          0        0         0       0        0        0         0
B066_tp5_NB_NH         0        0         0       0        0        0         0
B078_tp5_B_H           0        0         0       0        0        0         0
B078_tp5_B_NH          0        0         0       0        0        0         0
B078_tp5_NB_H          0        0         0       0        0        0         0
B078_tp5_NB_NH         0        0         0       0        0        0         0
B080_tp5_B_H           0        0         0       0        0        0         0
B080_tp5_B_NH          0        0         0       0        0        0         0
B080_tp5_NB_H          0        0         0       0        0        0         0
B080_tp5_NB_NH         0        0         0       0        0        0         0
B032_tp6_B_H           0        0         0       0        0        0         0
B032_tp6_B_NH          0        0         0       0        0        0         0
B032_tp6_NB_H          0        0         0       0        0        0         0
B032_tp6_NB_NH         0        0         0       0        0        0         0
B044_tp6_B_H           0        0         0       0        0        0         0
B044_tp6_B_NH          0        0         0       0        0        0         0
B044_tp6_NB_H          0        0         0       0        0        0         0
B044_tp6_NB_NH         0        0         0       0        0        0         0
B050_tp6_B_H           0        0         0       0        0        0         0
B050_tp6_B_NH          0        0         0       0        0        0         0
B050_tp6_NB_H          0        0         0       0        0        0         0
B050_tp6_NB_NH         0        0         0       0        0        0         0
B066_tp6_B_H           0        0         0       0        0        0         0
B066_tp6_B_NH          0        0         0       0        0        0         0
B066_tp6_NB_H          0        0         0       0        0        0         0
B066_tp6_NB_NH         0        0         0       0        0        0         0
B078_tp6_B_H           0        0         0       0        0        0         0
B078_tp6_B_NH          0        0         0       0        0        0         0
B078_tp6_NB_H          0        0         0       0        0        0         0
B078_tp6_NB_NH         0        0         0       0        0        0         0
B080_tp6_B_H           0        0         0       0        0        0         0
B080_tp6_B_NH          0        0         0       0        0        0         0
B080_tp6_NB_H          0        0         0       0        0        0         0
B080_tp6_NB_NH         0        0         0       0        0        0         0
               tp5_B_H tp5_B_NH tp5_NB_H tp5_NB_NH tp6_B_H tp6_B_NH tp6_NB_H
B032_tp0_NB_NH       0        0        0         0       0        0        0
B044_tp0_NB_NH       0        0        0         0       0        0        0
B050_tp0_NB_NH       0        0        0         0       0        0        0
B066_tp0_NB_NH       0        0        0         0       0        0        0
B078_tp0_NB_NH       0        0        0         0       0        0        0
B080_tp0_NB_NH       0        0        0         0       0        0        0
B032_tp1_NB_H        0        0        0         0       0        0        0
B032_tp1_NB_NH       0        0        0         0       0        0        0
B044_tp1_NB_H        0        0        0         0       0        0        0
B044_tp1_NB_NH       0        0        0         0       0        0        0
B050_tp1_NB_H        0        0        0         0       0        0        0
B050_tp1_NB_NH       0        0        0         0       0        0        0
B066_tp1_NB_H        0        0        0         0       0        0        0
B066_tp1_NB_NH       0        0        0         0       0        0        0
B078_tp1_NB_H        0        0        0         0       0        0        0
B078_tp1_NB_NH       0        0        0         0       0        0        0
B080_tp1_NB_H        0        0        0         0       0        0        0
B080_tp1_NB_NH       0        0        0         0       0        0        0
B032_tp2_NB_H        0        0        0         0       0        0        0
B032_tp2_NB_NH       0        0        0         0       0        0        0
B044_tp2_NB_H        0        0        0         0       0        0        0
B044_tp2_NB_NH       0        0        0         0       0        0        0
B050_tp2_NB_H        0        0        0         0       0        0        0
B050_tp2_NB_NH       0        0        0         0       0        0        0
B066_tp2_NB_H        0        0        0         0       0        0        0
B066_tp2_NB_NH       0        0        0         0       0        0        0
B078_tp2_NB_H        0        0        0         0       0        0        0
B078_tp2_NB_NH       0        0        0         0       0        0        0
B080_tp2_NB_H        0        0        0         0       0        0        0
B080_tp2_NB_NH       0        0        0         0       0        0        0
B032_tp3_NB_H        0        0        0         0       0        0        0
B032_tp3_NB_NH       0        0        0         0       0        0        0
B044_tp3_NB_H        0        0        0         0       0        0        0
B044_tp3_NB_NH       0        0        0         0       0        0        0
B050_tp3_NB_H        0        0        0         0       0        0        0
B050_tp3_NB_NH       0        0        0         0       0        0        0
B066_tp3_NB_H        0        0        0         0       0        0        0
B066_tp3_NB_NH       0        0        0         0       0        0        0
B078_tp3_NB_H        0        0        0         0       0        0        0
B078_tp3_NB_NH       0        0        0         0       0        0        0
B080_tp3_NB_H        0        0        0         0       0        0        0
B080_tp3_NB_NH       0        0        0         0       0        0        0
B032_tp4_B_H         0        0        0         0       0        0        0
B032_tp4_B_NH        0        0        0         0       0        0        0
B032_tp4_NB_H        0        0        0         0       0        0        0
B032_tp4_NB_NH       0        0        0         0       0        0        0
B044_tp4_B_H         0        0        0         0       0        0        0
B044_tp4_B_NH        0        0        0         0       0        0        0
B044_tp4_NB_H        0        0        0         0       0        0        0
B044_tp4_NB_NH       0        0        0         0       0        0        0
B050_tp4_B_H         0        0        0         0       0        0        0
B050_tp4_B_NH        0        0        0         0       0        0        0
B050_tp4_NB_H        0        0        0         0       0        0        0
B050_tp4_NB_NH       0        0        0         0       0        0        0
B066_tp4_B_H         0        0        0         0       0        0        0
B066_tp4_B_NH        0        0        0         0       0        0        0
B066_tp4_NB_H        0        0        0         0       0        0        0
B066_tp4_NB_NH       0        0        0         0       0        0        0
B078_tp4_B_H         0        0        0         0       0        0        0
B078_tp4_B_NH        0        0        0         0       0        0        0
B078_tp4_NB_H        0        0        0         0       0        0        0
B078_tp4_NB_NH       0        0        0         0       0        0        0
B080_tp4_B_H         0        0        0         0       0        0        0
B080_tp4_B_NH        0        0        0         0       0        0        0
B080_tp4_NB_H        0        0        0         0       0        0        0
B080_tp4_NB_NH       0        0        0         0       0        0        0
B032_tp5_B_H         1        0        0         0       0        0        0
B032_tp5_B_NH        0        1        0         0       0        0        0
B032_tp5_NB_H        0        0        1         0       0        0        0
B032_tp5_NB_NH       0        0        0         1       0        0        0
B044_tp5_B_H         1        0        0         0       0        0        0
B044_tp5_B_NH        0        1        0         0       0        0        0
B044_tp5_NB_H        0        0        1         0       0        0        0
B044_tp5_NB_NH       0        0        0         1       0        0        0
B050_tp5_B_H         1        0        0         0       0        0        0
B050_tp5_B_NH        0        1        0         0       0        0        0
B050_tp5_NB_H        0        0        1         0       0        0        0
B050_tp5_NB_NH       0        0        0         1       0        0        0
B066_tp5_B_H         1        0        0         0       0        0        0
B066_tp5_B_NH        0        1        0         0       0        0        0
B066_tp5_NB_H        0        0        1         0       0        0        0
B066_tp5_NB_NH       0        0        0         1       0        0        0
B078_tp5_B_H         1        0        0         0       0        0        0
B078_tp5_B_NH        0        1        0         0       0        0        0
B078_tp5_NB_H        0        0        1         0       0        0        0
B078_tp5_NB_NH       0        0        0         1       0        0        0
B080_tp5_B_H         1        0        0         0       0        0        0
B080_tp5_B_NH        0        1        0         0       0        0        0
B080_tp5_NB_H        0        0        1         0       0        0        0
B080_tp5_NB_NH       0        0        0         1       0        0        0
B032_tp6_B_H         0        0        0         0       1        0        0
B032_tp6_B_NH        0        0        0         0       0        1        0
B032_tp6_NB_H        0        0        0         0       0        0        1
B032_tp6_NB_NH       0        0        0         0       0        0        0
B044_tp6_B_H         0        0        0         0       1        0        0
B044_tp6_B_NH        0        0        0         0       0        1        0
B044_tp6_NB_H        0        0        0         0       0        0        1
B044_tp6_NB_NH       0        0        0         0       0        0        0
B050_tp6_B_H         0        0        0         0       1        0        0
B050_tp6_B_NH        0        0        0         0       0        1        0
B050_tp6_NB_H        0        0        0         0       0        0        1
B050_tp6_NB_NH       0        0        0         0       0        0        0
B066_tp6_B_H         0        0        0         0       1        0        0
B066_tp6_B_NH        0        0        0         0       0        1        0
B066_tp6_NB_H        0        0        0         0       0        0        1
B066_tp6_NB_NH       0        0        0         0       0        0        0
B078_tp6_B_H         0        0        0         0       1        0        0
B078_tp6_B_NH        0        0        0         0       0        1        0
B078_tp6_NB_H        0        0        0         0       0        0        1
B078_tp6_NB_NH       0        0        0         0       0        0        0
B080_tp6_B_H         0        0        0         0       1        0        0
B080_tp6_B_NH        0        0        0         0       0        1        0
B080_tp6_NB_H        0        0        0         0       0        0        1
B080_tp6_NB_NH       0        0        0         0       0        0        0
               tp6_NB_NH
B032_tp0_NB_NH         0
B044_tp0_NB_NH         0
B050_tp0_NB_NH         0
B066_tp0_NB_NH         0
B078_tp0_NB_NH         0
B080_tp0_NB_NH         0
B032_tp1_NB_H          0
B032_tp1_NB_NH         0
B044_tp1_NB_H          0
B044_tp1_NB_NH         0
B050_tp1_NB_H          0
B050_tp1_NB_NH         0
B066_tp1_NB_H          0
B066_tp1_NB_NH         0
B078_tp1_NB_H          0
B078_tp1_NB_NH         0
B080_tp1_NB_H          0
B080_tp1_NB_NH         0
B032_tp2_NB_H          0
B032_tp2_NB_NH         0
B044_tp2_NB_H          0
B044_tp2_NB_NH         0
B050_tp2_NB_H          0
B050_tp2_NB_NH         0
B066_tp2_NB_H          0
B066_tp2_NB_NH         0
B078_tp2_NB_H          0
B078_tp2_NB_NH         0
B080_tp2_NB_H          0
B080_tp2_NB_NH         0
B032_tp3_NB_H          0
B032_tp3_NB_NH         0
B044_tp3_NB_H          0
B044_tp3_NB_NH         0
B050_tp3_NB_H          0
B050_tp3_NB_NH         0
B066_tp3_NB_H          0
B066_tp3_NB_NH         0
B078_tp3_NB_H          0
B078_tp3_NB_NH         0
B080_tp3_NB_H          0
B080_tp3_NB_NH         0
B032_tp4_B_H           0
B032_tp4_B_NH          0
B032_tp4_NB_H          0
B032_tp4_NB_NH         0
B044_tp4_B_H           0
B044_tp4_B_NH          0
B044_tp4_NB_H          0
B044_tp4_NB_NH         0
B050_tp4_B_H           0
B050_tp4_B_NH          0
B050_tp4_NB_H          0
B050_tp4_NB_NH         0
B066_tp4_B_H           0
B066_tp4_B_NH          0
B066_tp4_NB_H          0
B066_tp4_NB_NH         0
B078_tp4_B_H           0
B078_tp4_B_NH          0
B078_tp4_NB_H          0
B078_tp4_NB_NH         0
B080_tp4_B_H           0
B080_tp4_B_NH          0
B080_tp4_NB_H          0
B080_tp4_NB_NH         0
B032_tp5_B_H           0
B032_tp5_B_NH          0
B032_tp5_NB_H          0
B032_tp5_NB_NH         0
B044_tp5_B_H           0
B044_tp5_B_NH          0
B044_tp5_NB_H          0
B044_tp5_NB_NH         0
B050_tp5_B_H           0
B050_tp5_B_NH          0
B050_tp5_NB_H          0
B050_tp5_NB_NH         0
B066_tp5_B_H           0
B066_tp5_B_NH          0
B066_tp5_NB_H          0
B066_tp5_NB_NH         0
B078_tp5_B_H           0
B078_tp5_B_NH          0
B078_tp5_NB_H          0
B078_tp5_NB_NH         0
B080_tp5_B_H           0
B080_tp5_B_NH          0
B080_tp5_NB_H          0
B080_tp5_NB_NH         0
B032_tp6_B_H           0
B032_tp6_B_NH          0
B032_tp6_NB_H          0
B032_tp6_NB_NH         1
B044_tp6_B_H           0
B044_tp6_B_NH          0
B044_tp6_NB_H          0
B044_tp6_NB_NH         1
B050_tp6_B_H           0
B050_tp6_B_NH          0
B050_tp6_NB_H          0
B050_tp6_NB_NH         1
B066_tp6_B_H           0
B066_tp6_B_NH          0
B066_tp6_NB_H          0
B066_tp6_NB_NH         1
B078_tp6_B_H           0
B078_tp6_B_NH          0
B078_tp6_NB_H          0
B078_tp6_NB_NH         1
B080_tp6_B_H           0
B080_tp6_B_NH          0
B080_tp6_NB_H          0
B080_tp6_NB_NH         1
attr(,"assign")
 [1] 0 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
attr(,"contrasts")
attr(,"contrasts")$donor
[1] "contr.treatment"

attr(,"contrasts")$group
[1] "contr.treatment"

dispersion estimation and fitting

# dispersion estimation
counts.all.asDGEList <- estimateDisp(counts.all.asDGEList, design_all)
counts.all.asDGEList$common.dispersion
[1] 0.09312949
plotBCV(counts.all.asDGEList)

# fitting
fit_all <- glmQLFit(counts.all.asDGEList, design_all)

plotQLDisp(fit_all)

Fig 1 d

PCA of batch corrected samples in the IVMC protocol analysed by bulk RNA sequencing, coloured by timepoint (n=6 independent organoid lines).

using B0xx_tp0_NB_NH, B0xx_tp1_NB_H, B0xx_tp2_NB_H, B0xx_tp3_NB_H, B0xx_tp4_B_H, B0xx_tp5_B_H, B0xx_tp6_B_H => “sel_IVMC”

metadata.sel_IVMC <- mutate(as.data.frame(metadata), sel_IVMC = case_when(
                                timepoint == "tp0" ~ TRUE,
                                timepoint == "tp1" & hormones == "H" ~ TRUE,
                                timepoint == "tp2" & hormones == "H" ~ TRUE,
                                timepoint == "tp3" & hormones == "H" ~ TRUE,
                                timepoint == "tp4" & hormones == "H" & breakdown == "B" ~ TRUE,
                                timepoint == "tp5" & hormones == "H" & breakdown == "B" ~ TRUE,
                                timepoint == "tp6" & hormones == "H" & breakdown == "B" ~ TRUE,
                                TRUE ~ FALSE))

sel_IVMC <- metadata$sample[metadata.sel_IVMC$sel_IVMC]
sel_IVMC
 [1] "B032_tp0_NB_NH" "B044_tp0_NB_NH" "B050_tp0_NB_NH" "B066_tp0_NB_NH"
 [5] "B078_tp0_NB_NH" "B080_tp0_NB_NH" "B032_tp1_NB_H"  "B044_tp1_NB_H" 
 [9] "B050_tp1_NB_H"  "B066_tp1_NB_H"  "B078_tp1_NB_H"  "B080_tp1_NB_H" 
[13] "B032_tp2_NB_H"  "B044_tp2_NB_H"  "B050_tp2_NB_H"  "B066_tp2_NB_H" 
[17] "B078_tp2_NB_H"  "B080_tp2_NB_H"  "B032_tp3_NB_H"  "B044_tp3_NB_H" 
[21] "B050_tp3_NB_H"  "B066_tp3_NB_H"  "B078_tp3_NB_H"  "B080_tp3_NB_H" 
[25] "B032_tp4_B_H"   "B044_tp4_B_H"   "B050_tp4_B_H"   "B066_tp4_B_H"  
[29] "B078_tp4_B_H"   "B080_tp4_B_H"   "B032_tp5_B_H"   "B044_tp5_B_H"  
[33] "B050_tp5_B_H"   "B066_tp5_B_H"   "B078_tp5_B_H"   "B080_tp5_B_H"  
[37] "B032_tp6_B_H"   "B044_tp6_B_H"   "B050_tp6_B_H"   "B066_tp6_B_H"  
[41] "B078_tp6_B_H"   "B080_tp6_B_H"  
counts.sel_IVMC<- all_counts[,sel_IVMC]
metadata.sel_IVMC <- metadata[metadata.sel_IVMC$sel_IVMC,]

metadata.sel_IVMC$description.alt_nlb <- factor(metadata.sel_IVMC$description.alt_nlb,
                                                levels = unique(metadata.sel_IVMC$description.alt_nlb))

all_counts.CBSeq_byD.sel_IVMC <- all_counts.CBSeq_byD[,sel_IVMC]

all_counts.CBSeq_byD.sel_IVMC.log.cpm.pca <- pca(cpm(all_counts.CBSeq_byD.sel_IVMC, log=TRUE), 
                              metadata = metadata.sel_IVMC, removeVar = 0.1)
-- removing the lower 10% of variables based on variance
xlab = paste("PC1, ",round(all_counts.CBSeq_byD.sel_IVMC.log.cpm.pca$variance["PC1"],digits = 2),"% variance",sep = "")
ylab = paste("PC2, ",round(all_counts.CBSeq_byD.sel_IVMC.log.cpm.pca$variance["PC2"],digits = 2),"% variance",sep = "")

plot_object<- biplot(all_counts.CBSeq_byD.sel_IVMC.log.cpm.pca, 
       colby = 'description.alt_nlb',
       colkey = timepoint_colors.alt_nlb,
       pointSize = 8,
       lab = NULL,
       legendPosition = 'right',
       legendIconSize = 8,
       colLegendTitle = " ",
       gridlines.major = FALSE,
       gridlines.minor = FALSE,
       xlab = xlab,
       ylab = ylab,
       title = "")
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
plot_object

file_name <- paste(plot_dir,"/Figure_1_d.pdf", sep = "")
pdf(file_name, width = 8, height = 5)
    plot_object
dev.off()
png 
  2 

Fig 1 e

Heatmap depicting batch corrected centered and scaled counts per million (cpm) of selected genes across the IVMC protocol

genes <- c("MKI67", "PCNA", "MCM2", "MCM7", "EXO1", "E2F2", "AXIN2", "CDH2", "ALDH1A1", "LGR5","MMP10", "RXRG",
           "SCGB1D2", "SCGB1D4", "SCGB2A1", "S100P", "PAEP", "PTGS1", "PTGS2", "ENPP3",
           "SERPINE1", "SERPINA5", "SERPINA6", "KRT17", "KRT16", "NKD2", "CNN1", "FOXJ1", "PIFO",
           "RSPH1", "TPPP3", "KISS1R", "EGLN3", "HIF3A", "VEGFA", "ALPP", "ALPG", "CHGA",
           "TNFRSF11B", "TNFRSF12A", "CSF2", "FGF2", "DKK1","SOX9", "FUT4", "PTGES", "TIGAR", "CD44",
           "PLAUR", "PLAU", "PLAT", "KRT6A", "CXCL8", "CXCL10", "CXCL1", "CXCL11", "IL1RN", "IL23A",
           "MMP7","OLFM4", "PGR", "ESR1", "GREB1", "COL1A2")

# select the samples (using selection vector from Fig 1 d)
counts.bc.cpm.sel_IVMC <- all_counts.bc.cpm[,sel_IVMC]

# define data to plot
data2plot.bc <- data.frame()

for (gene in genes) {
    data2plot.bc <- rbind(data2plot.bc, counts.bc.cpm.sel_IVMC[rownames(counts.bc.cpm.sel_IVMC) == gene,])
}

rownames(data2plot.bc) <- genes
colnames(data2plot.bc) <- colnames(counts.bc.cpm.sel_IVMC)

# center and scale
data2plot.bc.sc <- t(scale(t(data2plot.bc)))

heatmap_selected_genes<- Heatmap(data2plot.bc.sc, cluster_columns=FALSE, cluster_rows=FALSE, 
        heatmap_legend_param = list(legend_direction = "horizontal", 
                                    title = "cpm (centered \nand scaled)", 
                                    color_bar = "continuous"),
        column_split = c(rep(0, 6),rep(1, 6),rep(2,6),rep(3, 6),rep(4,6),rep(5, 6),rep(6,6)),
        column_title = "",
        bottom_annotation = HeatmapAnnotation(foo = anno_block(gp = gpar(fill = timepoint_colors, 
                                                                         col = timepoint_colors),
        labels = unique(metadata.sel_IVMC$description.alt), 
        labels_gp = gpar(col = heatmap_labels_colors, fontsize = 8))),
        show_column_names = FALSE)

draw(heatmap_selected_genes, heatmap_legend_side="bottom")

file_name <- paste(plot_dir,"/Figure_1_e.pdf", sep = "")
pdf(file_name,height = 10,width = 8)
    draw(heatmap_selected_genes, heatmap_legend_side="bottom")
dev.off()
png 
  2 

Fig 1 f

Volcano plot highlighting selected DEGs of organoids in post-breakdown phase of the IVMC protocol (Post-breakdown 24h) compared to organoids treated with hormones but not subjected to breakdown (NB_H; no breakdown hormones)

compare “tp4_B_H” versus “tp4_NB_H”

define contrast

(colnames(design_all) == "tp4_B_H") - (colnames(design_all) == "tp4_NB_H")
 [1]  0  0  0  0  0  0  0  0  0  0  0  0  1  0 -1  0  0  0  0  0  0  0  0  0
qlf_1F<- glmQLFTest(fit_all, contrast = as.vector((colnames(design_all) == "tp4_B_H") - (colnames(design_all) == "tp4_NB_H")))
tt_1F <- as.data.frame(topTags(qlf_1F,n=nrow(counts.all.asDGEList)))
head(tt_1F)
        genes    logFC     logCPM        F       PValue          FDR
GALNT5 GALNT5 5.532962  1.8389142 286.1138 5.260153e-32 9.172655e-28
YKT6     YKT6 1.902848  6.0728781 273.8387 1.112590e-29 8.660889e-26
FGF2     FGF2 5.634464  0.4405043 230.1738 1.490003e-29 8.660889e-26
SBDS     SBDS 2.076409  5.1949412 269.1516 2.044519e-29 8.913082e-26
CSF2     CSF2 7.312718 -1.1908599 203.6735 9.235046e-29 3.220815e-25
NIPA2   NIPA2 2.115973  5.1145905 252.2612 1.954534e-28 5.680528e-25

define cut offs

FDR < 0.05 & logFC > 2.0 ~ up; FDR < 0.05 & logFC < -2.0 ~ down

# Create significant (sig) column
results_1F <- mutate(tt_1F, sig = case_when(
                                FDR < 0.05 & logFC > 2.0 ~ up,
                                FDR < 0.05 & logFC < -2.0 ~ down,
                                TRUE ~ notsig))
sum(results_1F$sig == "Up")
[1] 438
sum(results_1F$sig == "Down")
[1] 1540
file_name.table  <- paste(table_dir,"/Figure_1_f.DEG.tab",sep = "")
write.table(results_1F, file_name.table,
            quote = FALSE, row.names = FALSE, sep = "\t")

define genes to label

genes_1F_up_label <- c("HS3ST2", "CSF2", "KRT6A", "DKK1", "FGF2", "KRT13", "EDAR", "CXCL6", 
                       "THBS1", "CXCL8", "CEACAM5", "EREG", "ADAMTS9", "FUT4", "AREG", "MMP7", 
                       "IL23A", "ITGA2", "PLAUR", "PCNA")
genes_1F_down_label <- c("TBC1D3K", "TGM5", "LMO1", "SEMA5B", "COL23A1", "PNCK", "ANGPTL4", 
                         "KISS1R", "CA9", "HIF3A", "ALDOC", "INHA", "PTPRR", "CXCR4", "HILPDA", 
                         "OLFM2", "NUPR1", "PAX2", "EGLN3", "VEGFA") 
genes_1F_label <- c(genes_1F_up_label,genes_1F_down_label)

# add gene labels
results_1F <- mutate(results_1F, labels = ifelse(genes %in% genes_1F_label, genes, ""))

volcano plot

# set up base plot
p <- ggplot(data = results_1F, aes(x = logFC, y = -log10(PValue))) +
  geom_point(aes(colour = sig)) +
    xlim(-12, +12) +
  scale_color_manual(name = "|log2FC| > 2, FDR < 0.05", values = volcano_colors) +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_blank(),
    axis.line = element_line(colour = "black"),
    legend.key = element_blank(),
    legend.position = "bottom",
    
    # increase text sizes
    axis.text = element_text(size = 18), 
    axis.title = element_text(size = 22),  
    plot.title = element_text(size = 26, hjust = 0.5), 
    legend.title = element_text(size = 20), 
    legend.text = element_text(size = 20) 
  ) +
  xlab('log2 FC') +
  ylab('-log10 p-value') +
  guides(color = guide_legend(override.aes = list(size=5))) +
  
  # add gene labels
  geom_text_repel(data = filter(results_1F, labels != ""), 
                         aes(label = labels),
                         min.segment.length = 0,
                         max.overlaps = Inf,
                         size = 7,  
                         show.legend = FALSE) +
  ggtitle('Post-breakdown 24h (IVMC vs NB_H)')

print(p)

file_name <- paste(plot_dir,"/Figure_1_f.pdf", sep = "")
pdf(file_name,height = 8,width = 10)
    p
dev.off()
png 
  2 

Fig 1 g

Biological processes enriched from genes upregulated during post-breakdown phase of the IVMC protocol.

# all upregulated genes
genes_1F_up <- results_1F$genes[results_1F$sig == "Up"]

gostres <- gost(query = genes_1F_up, custom_bg = results_1F$genes, organism = "hsapiens", correction_method = "fdr")
Detected custom background input, domain scope is set to 'custom'.
head(gostres$result)
    query significant      p_value term_size query_size intersection_size
1 query_1        TRUE 3.956148e-02         2        377                 2
2 query_1        TRUE 3.956148e-02         2        377                 2
3 query_1        TRUE 4.494566e-06        62        377                13
4 query_1        TRUE 4.494566e-06        40        377                11
5 query_1        TRUE 2.157698e-05       278        377                25
6 query_1        TRUE 2.570997e-05       310        377                26
   precision     recall    term_id source
1 0.00530504 1.00000000 CORUM:7112  CORUM
2 0.00530504 1.00000000 CORUM:7109  CORUM
3 0.03448276 0.20967742 GO:0019730  GO:BP
4 0.02917772 0.27500000 GO:0061844  GO:BP
5 0.06631300 0.08992806 GO:0050900  GO:BP
6 0.06896552 0.08387097 GO:0042330  GO:BP
                                                                term_name
1                                                      PLAUR-PLAU complex
2                                                IGF2R-PLAUR-PLAU complex
3                                          antimicrobial humoral response
4 antimicrobial humoral immune response mediated by antimicrobial peptide
5                                                     leukocyte migration
6                                                                   taxis
  effective_domain_size source_order      parents
1                 15781         2609 CORUM:00....
2                 15781         2606 CORUM:00....
3                 15781         6021 GO:00069....
4                 15781        15685   GO:0019730
5                 15781        13342 GO:00023....
6                 15781        10096 GO:00096....
unique(gostres$result$source)
 [1] "CORUM" "GO:BP" "GO:CC" "GO:MF" "HP"    "KEGG"  "MIRNA" "REAC"  "TF"   
[10] "WP"   
gostres_results.BP <- gostres$result[gostres$result$source == "GO:BP",]
unique(gostres_results.BP$source)
[1] "GO:BP"
gostres_results.BP$neg_log10Pvalue <- ( -log10(gostres_results.BP$p_value) )
gostres_results.BP <- gostres_results.BP[order(gostres_results.BP$neg_log10Pvalue, decreasing = TRUE),]

head(gostres_results.BP[,c("term_name","neg_log10Pvalue")])
                                                                term_name
3                                          antimicrobial humoral response
4 antimicrobial humoral immune response mediated by antimicrobial peptide
5                                                     leukocyte migration
6                                                                   taxis
7                                                   response to chemokine
8                                                              chemotaxis
  neg_log10Pvalue
3        5.347312
4        5.347312
5        4.666009
6        4.589898
7        4.589898
8        4.589898
file_name.table  <- paste(table_dir,"/Figure_1_g.tab",sep = "")
write.table(gostres_results.BP[,c("term_name","neg_log10Pvalue")], file_name.table,
            quote = FALSE, row.names = FALSE, sep = "\t")

representative_GO_terms <- c("antimicrobial humoral response",
                             "leukocyte migration",
                             "response to chemokine",
                             "chemotaxis",
                             "response to bacterium",
                             "locomotion",
                             "G protein-coupled receptor signaling pathway",
                             "cellular response to biotic stimulus",
                             "granulocyte migration",
                             "response to external stimulus",
                             "cellular response to lipopolysaccharide",
                             "humoral immune response",
                             "cell-cell signaling",
                             "negative regulation of wound healing",
                             "skin development",
                             "fibrinolysis",
                             "inflammatory response",
                             "regulation of wound healing",
                             "response to stress",
                             "intermediate filament organization",
                             "response to oxygen-containing compound",
                             "negative regulation of sprouting angiogenesis",
                             "regulation of angiogenesis")

data2plot <- gostres_results.BP[gostres_results.BP$term_name %in% representative_GO_terms,]

data2plot$term_name <- factor(data2plot$term_name, levels = rev(data2plot$term_name))

p <- ggplot(data2plot, aes(x=neg_log10Pvalue,y=term_name)) +
  geom_point(size = 10, color = "#8B1A1A") +
  xlab('-log10 p-value') +
  xlim(0, 6) +
  ylab('') +
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.text.y = element_text(size=22)) +
  theme(axis.text.x = element_text(size=18)) +
  theme(axis.title.x =element_text(size=18)) +
  theme(panel.border = element_blank(), axis.line = element_line())

print(p)    

file_name <- paste(plot_dir,"/Figure_1_g.pdf", sep = "")
pdf(file_name,height = 8,width = 12)
    print(p)
dev.off() 
png 
  2 

Fig 3 c

Line plots depicting cpm of WNT7A in organoids undergoing the IVMC protocol (solid lines) and organoids not subjected to breakdown (NB_H) (dotted lines), analysed with bulk RNAseq.

using B0xx_tp0_NB_NH, B0xx_tp1_NB_H, B0xx_tp2_NB_H, B0xx_tp3_NB_H, B0xx_tp4_B_H, B0xx_tp5_B_H, B0xx_tp6_B_H this has been “sel_IVMC”

plus B0xx_tp3_NB_H, B0xx_tp4_NB_H, B0xx_tp5_NB_H, B0xx_tp6_NB_H => “sel_IVMC_NB”

# make a new selection (the ‘no passaging’)
metadata.sel_IVMC_NB <- mutate(as.data.frame(metadata), sel_IVMC_NB = case_when(
                                timepoint == "tp3" & hormones == "H" ~ TRUE,
                                timepoint == "tp4" & hormones == "H" & breakdown == "NB" ~ TRUE,
                                timepoint == "tp5" & hormones == "H" & breakdown == "NB" ~ TRUE,
                                timepoint == "tp6" & hormones == "H" & breakdown == "NB" ~ TRUE,
                                TRUE ~ FALSE))
    
sel_IVMC_NB <- metadata$sample[metadata.sel_IVMC_NB$sel_IVMC_NB]
sel_IVMC_NB
 [1] "B032_tp3_NB_H" "B044_tp3_NB_H" "B050_tp3_NB_H" "B066_tp3_NB_H"
 [5] "B078_tp3_NB_H" "B080_tp3_NB_H" "B032_tp4_NB_H" "B044_tp4_NB_H"
 [9] "B050_tp4_NB_H" "B066_tp4_NB_H" "B078_tp4_NB_H" "B080_tp4_NB_H"
[13] "B032_tp5_NB_H" "B044_tp5_NB_H" "B050_tp5_NB_H" "B066_tp5_NB_H"
[17] "B078_tp5_NB_H" "B080_tp5_NB_H" "B032_tp6_NB_H" "B044_tp6_NB_H"
[21] "B050_tp6_NB_H" "B066_tp6_NB_H" "B078_tp6_NB_H" "B080_tp6_NB_H"
metadata.sel_IVMC_NB <- metadata[metadata.sel_IVMC_NB$sel_IVMC_NB == TRUE,]

# batch corrected data
counts.bc.cpm.sel_IVMC_NB <- all_counts.bc.cpm[,sel_IVMC_NB]
metadata.sel_IVMC$general_timepoint <- factor(metadata.sel_IVMC$description.alt2, levels = (unique(metadata.sel_IVMC$description.alt2))) 
metadata.sel_IVMC_NB$general_timepoint <- factor(metadata.sel_IVMC_NB$description.alt2, levels = (unique(metadata.sel_IVMC_NB$description.alt2)))

genes_for_line_plots <- c("WNT7A")
 
for (gene in genes_for_line_plots ) {
    expr.values.sel_IVMC <- counts.bc.cpm.sel_IVMC[gene,]
    expr.values.sel_IVMC_NB <- counts.bc.cpm.sel_IVMC_NB[gene,]

    # data to plot
    df2plot <- data.frame(samples=colnames(counts.bc.cpm.sel_IVMC), cpm=expr.values.sel_IVMC)
    df2plot.sel_IVMC_NB <- data.frame(samples=colnames(counts.bc.cpm.sel_IVMC_NB), cpm=expr.values.sel_IVMC_NB)
    # combine with metadata
    df2plot_withMetadata <- merge(df2plot, metadata.sel_IVMC, by.x = "samples", by.y = "sample", sort = FALSE)
    df2plot_withMetadata.sel_IVMC_NB <- merge(df2plot.sel_IVMC_NB, metadata.sel_IVMC_NB, by.x = "samples", by.y = "sample", sort = FALSE)

    p <- ggplot(df2plot_withMetadata, aes(y=cpm)) +
          geom_line(aes(x=general_timepoint, color = donor, group = donor), linewidth=1.5, data = df2plot_withMetadata) +
          geom_line(aes(y=cpm, x=general_timepoint, color = donor, group = donor), linetype = 2, linewidth=1.5, data = df2plot_withMetadata.sel_IVMC_NB)+
          scale_color_manual(values = donor_colors)  +
          ylab(bquote(italic(.(gene))~' cpm')) +
          xlab("") +
          theme_bw() +
          theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
          theme(axis.line = element_line(colour = "black")) +
          theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 0.95)) +
          theme(axis.text.x = element_text(size = 24)) + 
          theme(axis.text.y = element_text(size = 22)) +
          theme(axis.title.y = element_text(size = 24)) +
          theme(panel.border = element_blank()) + 
          theme(legend.text=element_text(size=22)) +
          theme(legend.title=element_text(size=22)) 
    print(p)

    file_name <- paste(plot_dir,"/Figure_3_c__",gene,".pdf", sep = "")
    pdf(file_name, height = 8,width = 10)
        print(p)
    dev.off()
}

Fig 5 d

using B0xx_tp0_NB_NH, B0xx_tp1_NB_H, B0xx_tp2_NB_H, B0xx_tp3_NB_H, B0xx_tp4_B_H, B0xx_tp5_B_H, B0xx_tp6_B_H this has been “sel_IVMC”

Line plots showing the (batch coreccted) mean expression levels (in cpm) of ANXA1 and CXCL8 across different timepoints in the IVMC protocol. The red line represents the mean cpm values across donors, while the shaded blue region indicates the standard deviation around the mean.

genes_for_line_plots <- c("ANXA1","CXCL8")

for (gene in genes_for_line_plots ) {
    expr.values.sel_IVMC <- counts.bc.cpm.sel_IVMC[gene,]

    # data to plot
    df2plot <- data.frame(samples=colnames(counts.bc.cpm.sel_IVMC), cpm=expr.values.sel_IVMC)

    # combine with metadata
    df2plot_withMetadata <- merge(df2plot, metadata.sel_IVMC, by.x = "samples", by.y = "sample", sort = FALSE)
 
    # aggregate
    df2plot_aggregate <- data.frame(general_timepoint = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,1],
                                    cpm_mean = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,2],
                                    cpm_mean_plus_sd = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,2] +
                                                       aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=sd)[,2],
                                    cpm_mean_minus_sd = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,2] -
                                                        aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=sd)[,2] )
    
    p <- ggplot(df2plot_aggregate, aes(y=cpm_mean, x=general_timepoint, group = 1)) + 
          geom_line(color = "red") +
          geom_point(color = "red", size = 2) +
          geom_ribbon(aes(x = general_timepoint, ymax = cpm_mean_plus_sd, ymin = cpm_mean_minus_sd), 
                      alpha = 0.3, fill = "lightblue") +
          ggtitle(bquote(italic(.(gene)))) +
          ylab("cpm (Mean ± SD across donors)") +
          xlab("") +
          theme_bw() +
          theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
          theme(axis.line = element_line(colour = "black")) + 
          theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 24)) +
          theme(axis.text.y = element_text(size = 22)) +
          theme(axis.title.y = element_text(size = 24)) +
          theme(panel.border = element_blank())
    print(p)
    
    file_name <- paste(plot_dir,"/Figure_5_d__",gene,".pdf", sep = "")
    pdf(file_name, height = 8,width = 10)
        print(p)
    dev.off()
}

Extended Data Fig 1 b

PCA of all organoid samples from all conditions colored by the breakdown variable.

colnames(metadata)[c(4,5,11)] <- c("Breakdown","Hormones","Timeline")
all_counts.CBSeq_byD.log.cpm.pca <- pca(cpm(all_counts.CBSeq_byD, log=TRUE), 
                                        metadata = metadata, removeVar = 0.1)
-- removing the lower 10% of variables based on variance
xlab = paste("PC1, ",round(all_counts.CBSeq_byD.log.cpm.pca$variance["PC1"],digits = 2),"% variance",sep = "")
ylab = paste("PC2, ",round(all_counts.CBSeq_byD.log.cpm.pca$variance["PC2"],digits = 2),"% variance",sep = "")

plot_object<- biplot(all_counts.CBSeq_byD.log.cpm.pca, 
       colby = 'Breakdown',
       colkey = breakdown_colors,
       shape = 'Timeline',
       pointSize = 7,
       lab = NULL,
       legendPosition = 'bottom',
       gridlines.major = FALSE,
       gridlines.minor = FALSE,
       xlab = xlab,
       ylab = ylab,
       title = "")
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
plot_object

file_name <- paste(plot_dir,"/EDFigure_1_b.pdf", sep = "")
    pdf(file_name, width = 8, height = 7.5)
plot_object
dev.off()
png 
  2 

Extended Data Fig 1 c

PCA of all organoid samples from all conditions colored by the hormonal treatment variable.

plot_object<- biplot(all_counts.CBSeq_byD.log.cpm.pca, 
       colby = 'Hormones',
       colkey = hormones_colors,
       shape = 'Timeline',
       pointSize = 7,
       lab = NULL,
       legendPosition = 'bottom',
       gridlines.major = FALSE,
       gridlines.minor = FALSE,
       xlab = xlab,
       ylab = ylab,
       title = "")
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
plot_object

file_name <- paste(plot_dir,"/EDFigure_1_c.pdf", sep = "")
    pdf(file_name, width = 8, height = 7.5)
plot_object
dev.off()
png 
  2 
colnames(metadata)[c(4,5,11)] <- c("breakdown","hormones","timeline")

Extended Data Fig 1 d

Hierarchically clustered heatmap displaying the top 50 differentially upregulated genes from pairwise comparisons of each timepoint against all timepoints.

using B0xx_tp0_NB_NH, B0xx_tp1_NB_H, B0xx_tp2_NB_H, B0xx_tp3_NB_H, B0xx_tp4_B_H, B0xx_tp5_B_H, B0xx_tp6_B_H this has been “sel_IVMC”

DGE (specific for each time point) using the raw data

groups <- metadata.sel_IVMC$timepoint_B_H
donor <- metadata.sel_IVMC$donor

setting up DGEList

counts.sel_IVMC.asDGEList <- DGEList(counts=counts.sel_IVMC,
                                     genes=rownames(counts.sel_IVMC), 
                                     group = groups )

counts.sel_IVMC.asDGEList
An object of class "DGEList"
$counts
         B032_tp0_NB_NH B044_tp0_NB_NH B050_tp0_NB_NH B066_tp0_NB_NH
TSPAN6         2330.149       1669.757       2323.153       2178.668
TNMD              0.000          0.000          0.000          0.000
DPM1           1057.640        947.221       1898.128       1782.786
SCYL3           454.987        408.387        516.702        585.629
C1orf112        297.605        257.085        652.944        540.747
         B078_tp0_NB_NH B080_tp0_NB_NH B032_tp1_NB_H B044_tp1_NB_H
TSPAN6         2701.845       1941.214      2974.309      1785.138
TNMD              0.000          0.000         1.000         0.000
DPM1           1555.317       1670.424      1355.103       833.345
SCYL3           491.001        505.327       569.013       450.662
C1orf112        562.380        776.523       207.487       106.805
         B050_tp1_NB_H B066_tp1_NB_H B078_tp1_NB_H B080_tp1_NB_H B032_tp2_NB_H
TSPAN6        3364.456      1620.574      1953.345      2214.665      2545.154
TNMD             0.000         0.000         0.000         0.000         0.000
DPM1          1371.059       898.820       857.830       825.908       842.792
SCYL3          615.183       393.422       368.000       546.883       500.231
C1orf112       183.012        88.999        98.000        96.618       257.938
         B044_tp2_NB_H B050_tp2_NB_H B066_tp2_NB_H B078_tp2_NB_H B080_tp2_NB_H
TSPAN6        1876.503      2934.437      1651.000      1959.843      1279.781
TNMD             0.000         2.000         0.000         0.000         0.000
DPM1           723.453      1405.790       734.001       921.103       511.003
SCYL3          431.433       698.526       380.062       500.304       452.715
C1orf112       114.247       261.026       154.001       166.999       118.177
         B032_tp3_NB_H B044_tp3_NB_H B050_tp3_NB_H B066_tp3_NB_H B078_tp3_NB_H
TSPAN6        1907.078      2022.268      2705.818      1418.719      2006.202
TNMD             0.000         0.000         0.000         0.000         0.000
DPM1           491.394       769.528      1145.941       679.983       668.810
SCYL3          525.007       460.795       525.977       365.000       395.009
C1orf112       274.909       128.736       223.257       170.000       104.000
         B080_tp3_NB_H B032_tp4_B_H B044_tp4_B_H B050_tp4_B_H B066_tp4_B_H
TSPAN6        1139.296     1712.537     1781.079     2340.607     2111.229
TNMD             0.000        1.000        0.000        0.000        0.000
DPM1           501.126     1211.173     1574.285     1995.332     2159.178
SCYL3          397.463      365.607      454.168      654.503      620.316
C1orf112       206.566      514.024      400.628      630.985      605.000
         B078_tp4_B_H B080_tp4_B_H B032_tp5_B_H B044_tp5_B_H B050_tp5_B_H
TSPAN6       2398.771     1337.427     2293.025     2787.210     2951.892
TNMD            0.000        0.000        1.000        0.000        0.000
DPM1         2079.461     1122.677     1625.211     1705.423     2157.301
SCYL3         669.248      447.412      309.703      480.506      563.000
C1orf112      517.216      516.636      683.186      506.535      656.351
         B066_tp5_B_H B078_tp5_B_H B080_tp5_B_H B032_tp6_B_H B044_tp6_B_H
TSPAN6       1438.731     3292.663     1544.203     2140.944     1967.731
TNMD            0.000        0.000        0.000        1.000        0.000
DPM1         1450.999     1842.883     1182.420     1225.324     1453.737
SCYL3         316.000      645.144      376.380      338.175      420.029
C1orf112      517.101      560.135      419.747      439.444      556.262
         B050_tp6_B_H B066_tp6_B_H B078_tp6_B_H B080_tp6_B_H
TSPAN6       3034.905     1661.101     2651.344     1388.112
TNMD            3.000        0.000        0.000        0.000
DPM1         1884.420     2206.904     1550.912     1183.336
SCYL3         542.169      435.413      535.177      372.075
C1orf112      615.150      852.744      605.840      488.236
60225 more rows ...

$samples
                   group lib.size norm.factors
B032_tp0_NB_NH tp0_NB_NH 30144167            1
B044_tp0_NB_NH tp0_NB_NH 28732415            1
B050_tp0_NB_NH tp0_NB_NH 37521705            1
B066_tp0_NB_NH tp0_NB_NH 37201395            1
B078_tp0_NB_NH tp0_NB_NH 32579189            1
37 more rows ...

$genes
            genes
TSPAN6     TSPAN6
TNMD         TNMD
DPM1         DPM1
SCYL3       SCYL3
C1orf112 C1orf112
60225 more rows ...
# filter lowly expressed genes 
counts.cpm.sel_IVMC <- all_counts.cpm[,metadata.sel_IVMC$sample]
counts.sel_IVMC.asDGEList <- counts.sel_IVMC.asDGEList[ rowSums(counts.cpm.sel_IVMC > mincpm) >= minsamples, , keep.lib.size=FALSE] 
dim(counts.sel_IVMC.asDGEList)
[1] 16451    42
# normalization
counts.sel_IVMC.asDGEList <- calcNormFactors(counts.sel_IVMC.asDGEList)   # same as normLibSizes(y)
head(counts.sel_IVMC.asDGEList$samples)
                   group lib.size norm.factors
B032_tp0_NB_NH tp0_NB_NH 30099923     1.077073
B044_tp0_NB_NH tp0_NB_NH 28687377     1.047688
B050_tp0_NB_NH tp0_NB_NH 37472680     1.140602
B066_tp0_NB_NH tp0_NB_NH 37157178     1.105160
B078_tp0_NB_NH tp0_NB_NH 32534805     1.126232
B080_tp0_NB_NH tp0_NB_NH 41470928     1.113365
plotMDS(counts.sel_IVMC.asDGEList)

setting up the design matrix

# use model.matrix
design <- model.matrix(~donor +group, data = counts.sel_IVMC.asDGEList$samples)
colnames(design)[2:12] <- substring(colnames(design)[2:12],6,14)
design
               (Intercept) B044 B050 B066 B078 B080 tp1_NB_H tp2_NB_H tp3_NB_H
B032_tp0_NB_NH           1    0    0    0    0    0        0        0        0
B044_tp0_NB_NH           1    1    0    0    0    0        0        0        0
B050_tp0_NB_NH           1    0    1    0    0    0        0        0        0
B066_tp0_NB_NH           1    0    0    1    0    0        0        0        0
B078_tp0_NB_NH           1    0    0    0    1    0        0        0        0
B080_tp0_NB_NH           1    0    0    0    0    1        0        0        0
B032_tp1_NB_H            1    0    0    0    0    0        1        0        0
B044_tp1_NB_H            1    1    0    0    0    0        1        0        0
B050_tp1_NB_H            1    0    1    0    0    0        1        0        0
B066_tp1_NB_H            1    0    0    1    0    0        1        0        0
B078_tp1_NB_H            1    0    0    0    1    0        1        0        0
B080_tp1_NB_H            1    0    0    0    0    1        1        0        0
B032_tp2_NB_H            1    0    0    0    0    0        0        1        0
B044_tp2_NB_H            1    1    0    0    0    0        0        1        0
B050_tp2_NB_H            1    0    1    0    0    0        0        1        0
B066_tp2_NB_H            1    0    0    1    0    0        0        1        0
B078_tp2_NB_H            1    0    0    0    1    0        0        1        0
B080_tp2_NB_H            1    0    0    0    0    1        0        1        0
B032_tp3_NB_H            1    0    0    0    0    0        0        0        1
B044_tp3_NB_H            1    1    0    0    0    0        0        0        1
B050_tp3_NB_H            1    0    1    0    0    0        0        0        1
B066_tp3_NB_H            1    0    0    1    0    0        0        0        1
B078_tp3_NB_H            1    0    0    0    1    0        0        0        1
B080_tp3_NB_H            1    0    0    0    0    1        0        0        1
B032_tp4_B_H             1    0    0    0    0    0        0        0        0
B044_tp4_B_H             1    1    0    0    0    0        0        0        0
B050_tp4_B_H             1    0    1    0    0    0        0        0        0
B066_tp4_B_H             1    0    0    1    0    0        0        0        0
B078_tp4_B_H             1    0    0    0    1    0        0        0        0
B080_tp4_B_H             1    0    0    0    0    1        0        0        0
B032_tp5_B_H             1    0    0    0    0    0        0        0        0
B044_tp5_B_H             1    1    0    0    0    0        0        0        0
B050_tp5_B_H             1    0    1    0    0    0        0        0        0
B066_tp5_B_H             1    0    0    1    0    0        0        0        0
B078_tp5_B_H             1    0    0    0    1    0        0        0        0
B080_tp5_B_H             1    0    0    0    0    1        0        0        0
B032_tp6_B_H             1    0    0    0    0    0        0        0        0
B044_tp6_B_H             1    1    0    0    0    0        0        0        0
B050_tp6_B_H             1    0    1    0    0    0        0        0        0
B066_tp6_B_H             1    0    0    1    0    0        0        0        0
B078_tp6_B_H             1    0    0    0    1    0        0        0        0
B080_tp6_B_H             1    0    0    0    0    1        0        0        0
               tp4_B_H tp5_B_H tp6_B_H
B032_tp0_NB_NH       0       0       0
B044_tp0_NB_NH       0       0       0
B050_tp0_NB_NH       0       0       0
B066_tp0_NB_NH       0       0       0
B078_tp0_NB_NH       0       0       0
B080_tp0_NB_NH       0       0       0
B032_tp1_NB_H        0       0       0
B044_tp1_NB_H        0       0       0
B050_tp1_NB_H        0       0       0
B066_tp1_NB_H        0       0       0
B078_tp1_NB_H        0       0       0
B080_tp1_NB_H        0       0       0
B032_tp2_NB_H        0       0       0
B044_tp2_NB_H        0       0       0
B050_tp2_NB_H        0       0       0
B066_tp2_NB_H        0       0       0
B078_tp2_NB_H        0       0       0
B080_tp2_NB_H        0       0       0
B032_tp3_NB_H        0       0       0
B044_tp3_NB_H        0       0       0
B050_tp3_NB_H        0       0       0
B066_tp3_NB_H        0       0       0
B078_tp3_NB_H        0       0       0
B080_tp3_NB_H        0       0       0
B032_tp4_B_H         1       0       0
B044_tp4_B_H         1       0       0
B050_tp4_B_H         1       0       0
B066_tp4_B_H         1       0       0
B078_tp4_B_H         1       0       0
B080_tp4_B_H         1       0       0
B032_tp5_B_H         0       1       0
B044_tp5_B_H         0       1       0
B050_tp5_B_H         0       1       0
B066_tp5_B_H         0       1       0
B078_tp5_B_H         0       1       0
B080_tp5_B_H         0       1       0
B032_tp6_B_H         0       0       1
B044_tp6_B_H         0       0       1
B050_tp6_B_H         0       0       1
B066_tp6_B_H         0       0       1
B078_tp6_B_H         0       0       1
B080_tp6_B_H         0       0       1
attr(,"assign")
 [1] 0 1 1 1 1 1 2 2 2 2 2 2
attr(,"contrasts")
attr(,"contrasts")$donor
[1] "contr.treatment"

attr(,"contrasts")$group
[1] "contr.treatment"

dispersion estimation and fitting

# dispersion estimation
counts.sel_IVMC.asDGEList <- estimateDisp(counts.sel_IVMC.asDGEList, design)
counts.sel_IVMC.asDGEList$common.dispersion
[1] 0.07067298
plotBCV(counts.sel_IVMC.asDGEList)

# fitting
fit <- glmQLFit(counts.sel_IVMC.asDGEList, design)

plotQLDisp(fit)

0) “Pre-diff” (tp0_NB_NH) specific

contrast_0 <- 1/6 * c(- (colnames(design) == "tp1_NB_H") -  (colnames(design) == "tp2_NB_H") - (colnames(design) == "tp3_NB_H") 
                      - (colnames(design) == "tp4_B_H") - (colnames(design) == "tp5_B_H") - (colnames(design) == "tp6_B_H") )
contrast_0
 [1]  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000
 [7] -0.1666667 -0.1666667 -0.1666667 -0.1666667 -0.1666667 -0.1666667
qlf_0 <- glmQLFTest(fit, contrast = contrast_0)
tt_0 <- as.data.frame(topTags(qlf_0,n=nrow(counts.sel_IVMC.asDGEList)))
head(tt_0)
        genes    logFC     logCPM         F       PValue          FDR
KRT4     KRT4 6.637973  1.4672627 1416.5371 6.950293e-41 1.143393e-36
CALCA   CALCA 6.550895  1.6920612 1132.5048 1.870543e-37 1.538615e-33
PLXNA4 PLXNA4 6.866916  3.5815887 1126.6076 1.938841e-33 1.063196e-29
LGI2     LGI2 4.735619 -0.5563552  622.7545 7.720354e-32 3.175189e-28
SHISA3 SHISA3 4.913608  0.5925268  813.9653 1.095696e-31 3.605059e-28
FBN2     FBN2 3.715567  5.6922941 1222.4537 8.542720e-29 2.342271e-25
file_name.table <- paste(table_dir,"/EDFigure_1_d.tp0_TopTable.tab",sep = "")
write.table(tt_0, file_name.table, quote = FALSE, row.names = FALSE, sep = "\t")

1) “P4-diff” (tp1_NB_H) specific

contrast_1 <- c((colnames(design) == "tp1_NB_H") - 1/6* (colnames(design) == "tp2_NB_H") - 1/6* (colnames(design) == "tp3_NB_H") 
                                - 1/6* (colnames(design) == "tp4_B_H") - 1/6* (colnames(design) == "tp5_B_H") - 1/6* (colnames(design) == "tp6_B_H") )
contrast_1
 [1]  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000
 [7]  1.0000000 -0.1666667 -0.1666667 -0.1666667 -0.1666667 -0.1666667
qlf_1 <- glmQLFTest(fit, contrast = contrast_1)
tt_1 <- as.data.frame(topTags(qlf_1,n=nrow(counts.sel_IVMC.asDGEList)))
head(tt_1)
                    genes    logFC     logCPM        F       PValue
SST                   SST 5.678946  4.3893633 702.8144 2.092213e-28
COL23A1           COL23A1 4.280639  2.8836585 490.3516 7.719881e-26
CD55                 CD55 2.526505  7.9750274 482.5971 4.780045e-22
RP11-856M9.1 RP11-856M9.1 3.286598  0.6042339 313.5169 9.562260e-22
RP1-153G14.4 RP1-153G14.4 3.410210 -0.5422749 235.9544 1.041374e-21
PHACTR3           PHACTR3 4.045445  0.3593289 282.7125 6.843084e-21
                      FDR
SST          3.441900e-24
COL23A1      6.349988e-22
CD55         2.621218e-18
RP11-856M9.1 3.426328e-18
RP1-153G14.4 3.426328e-18
PHACTR3      1.876260e-17
file_name.table <- paste(table_dir,"/EDFigure_1_d.tp1_TopTable.tab",sep = "")
write.table(tt_1, file_name.table, quote = FALSE, row.names = FALSE, sep = "\t")

2) “Horm Withdr 24h” (tp2_NB_H) specific

contrast_2 <-  c((colnames(design) == "tp2_NB_H") - 1/6* (colnames(design) == "tp1_NB_H") - 1/6* (colnames(design) == "tp3_NB_H") 
                                - 1/6* (colnames(design) == "tp4_B_H") - 1/6* (colnames(design) == "tp5_B_H") - 1/6* (colnames(design) == "tp6_B_H") )
contrast_2
 [1]  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000
 [7] -0.1666667  1.0000000 -0.1666667 -0.1666667 -0.1666667 -0.1666667
qlf_2 <- glmQLFTest(fit, contrast = contrast_2)
tt_2 <- as.data.frame(topTags(qlf_2,n=nrow(counts.sel_IVMC.asDGEList)))
head(tt_2)
              genes    logFC     logCPM        F       PValue          FDR
TGM5           TGM5 3.926647  2.7556014 510.7290 8.182216e-25 1.346056e-20
COL23A1     COL23A1 4.068428  2.8836585 421.1466 1.752220e-24 1.441289e-20
ZMIZ1-AS1 ZMIZ1-AS1 3.547144 -0.5122914 241.2649 5.201602e-22 2.852385e-18
NDUFA4L2   NDUFA4L2 3.760156  5.5628212 383.4921 8.300260e-22 3.413689e-18
LINC02649 LINC02649 3.270517  0.7894746 237.5867 1.910928e-20 6.287336e-17
FBXO32       FBXO32 2.638391  3.0973920 349.0151 1.029472e-19 2.822641e-16
file_name.table <- paste(table_dir,"/EDFigure_1_d.tp2_TopTable.tab",sep = "")
write.table(tt_2, file_name.table, quote = FALSE, row.names = FALSE, sep = "\t")

3) “Horm Withdr 48h” (tp3_NB_H) specific

contrast_3 <-  c((colnames(design) == "tp3_NB_H") - 1/6* (colnames(design) == "tp1_NB_H") - 1/6* (colnames(design) == "tp2_NB_H") 
                                - 1/6* (colnames(design) == "tp4_B_H") - 1/6* (colnames(design) == "tp5_B_H") - 1/6* (colnames(design) == "tp6_B_H") )
contrast_3
 [1]  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000
 [7] -0.1666667 -0.1666667  1.0000000 -0.1666667 -0.1666667 -0.1666667
qlf_3 <- glmQLFTest(fit, contrast = contrast_3)
tt_3 <- as.data.frame(topTags(qlf_3,n=nrow(counts.sel_IVMC.asDGEList)))
head(tt_3)
            genes    logFC    logCPM        F       PValue          FDR
TGM5         TGM5 4.952334 2.7556014 935.3743 6.416044e-30 1.055503e-25
COL23A1   COL23A1 4.543722 2.8836585 546.3425 8.193049e-27 6.739192e-23
NDUFA4L2 NDUFA4L2 4.484035 5.5628212 589.1112 3.528043e-25 1.934661e-21
PRDM16     PRDM16 5.106874 2.1512301 400.7150 1.150140e-24 4.730238e-21
PDE4C       PDE4C 4.713820 0.6501294 364.1578 5.719239e-23 1.881744e-19
PNCK         PNCK 4.797822 3.5242771 399.8600 7.821529e-23 2.144533e-19
file_name.table <- paste(table_dir,"/EDFigure_1_d.tp3_TopTable.tab",sep = "")
write.table(tt_3, file_name.table, quote = FALSE, row.names = FALSE, sep = "\t")

4) “Post-breakdown 24h” (tp4_B_H) specific

contrast_4 <-  c((colnames(design) == "tp4_B_H") - 1/6* (colnames(design) == "tp1_NB_H") - 1/6* (colnames(design) == "tp2_NB_H") - 1/6* (colnames(design) == "tp3_NB_H") 
                                - 1/6* (colnames(design) == "tp5_B_H") - 1/6* (colnames(design) == "tp6_B_H") )
contrast_4
 [1]  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000
 [7] -0.1666667 -0.1666667 -0.1666667  1.0000000 -0.1666667 -0.1666667
qlf_4 <- glmQLFTest(fit, contrast = contrast_4)
tt_4 <- as.data.frame(topTags(qlf_4,n=nrow(counts.sel_IVMC.asDGEList)))
head(tt_4)
              genes    logFC      logCPM        F       PValue          FDR
CSF2           CSF2 7.172484 -0.09129763 670.1599 5.229538e-25 5.372197e-21
TNFRSF11B TNFRSF11B 3.977181  0.94760286 498.0793 8.168232e-25 5.372197e-21
IGFL1         IGFL1 5.819212 -0.42929220 436.4569 9.796725e-25 5.372197e-21
BCL2L15     BCL2L15 3.532552  5.00765998 666.5908 2.358005e-24 9.697884e-21
GALNT5       GALNT5 4.363803  2.16316015 476.9216 1.934371e-23 6.364467e-20
GLIPR1       GLIPR1 3.988663  1.74433247 434.7055 1.746932e-22 4.789796e-19
file_name.table <- paste(table_dir,"/EDFigure_1_d.tp4_TopTable.tab",sep = "")
write.table(tt_4, file_name.table, quote = FALSE, row.names = FALSE, sep = "\t")

5) “E2-diff 24h” (tp5_B_H) specific

contrast_5 <- c((colnames(design) == "tp5_B_H") - 1/6* (colnames(design) == "tp1_NB_H") - 1/6* (colnames(design) == "tp2_NB_H") - 1/6* (colnames(design) == "tp3_NB_H") 
                                - 1/6* (colnames(design) == "tp4_B_H") - 1/6* (colnames(design) == "tp6_B_H") )
qlf_5 <- glmQLFTest(fit, contrast = contrast_5)
contrast_5
 [1]  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000
 [7] -0.1666667 -0.1666667 -0.1666667 -0.1666667  1.0000000 -0.1666667
tt_5 <- as.data.frame(topTags(qlf_5,n=nrow(counts.sel_IVMC.asDGEList)))
head(tt_5)
          genes    logFC   logCPM        F       PValue          FDR
CXCL10   CXCL10 4.939035 5.405468 454.7192 5.139231e-23 8.454549e-19
CXCL11   CXCL11 5.006808 3.184803 388.5890 9.554431e-22 7.858997e-18
OAS1       OAS1 3.189627 6.132966 370.4554 3.447883e-20 1.890704e-16
BCL2L15 BCL2L15 2.591279 5.007660 328.5224 2.365402e-19 9.471202e-16
OAS3       OAS3 2.813307 7.168217 320.8204 3.409625e-19 9.471202e-16
IFIT1     IFIT1 4.074539 6.304042 317.0045 3.454332e-19 9.471202e-16
file_name.table <- paste(table_dir,"/EDFigure_1_d.tp5_TopTable.tab",sep = "")
write.table(tt_5, file_name.table, quote = FALSE, row.names = FALSE, sep = "\t")

6) “E2 diff 48h” (tp6_B_H) specific

contrast_6 <-  c((colnames(design) == "tp6_B_H") - 1/6* (colnames(design) == "tp1_NB_H") - 1/6* (colnames(design) == "tp2_NB_H") - 1/6* (colnames(design) == "tp3_NB_H") 
                                - 1/6* (colnames(design) == "tp4_B_H") - 1/6* (colnames(design) == "tp5_B_H") )
qlf_6 <- glmQLFTest(fit, contrast = contrast_6)
contrast_6
 [1]  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000  0.0000000
 [7] -0.1666667 -0.1666667 -0.1666667 -0.1666667 -0.1666667  1.0000000
tt_6 <- as.data.frame(topTags(qlf_6,n=nrow(counts.sel_IVMC.asDGEList)))
head(tt_6)
        genes    logFC   logCPM        F       PValue          FDR
OAS3     OAS3 2.972377 7.168217 356.0451 6.514406e-20 4.927612e-16
CXCL10 CXCL10 4.205358 5.405468 300.4120 7.493286e-20 4.927612e-16
CXCL11 CXCL11 4.596896 3.184803 299.1716 8.985980e-20 4.927612e-16
OAS1     OAS1 3.069696 6.132966 330.6896 2.106641e-19 8.664089e-16
IFI27   IFI27 3.251867 8.425607 322.4245 3.191606e-19 1.010010e-15
IFI6     IFI6 3.727161 8.710519 319.2709 3.683704e-19 1.010010e-15
file_name.table <- paste(table_dir,"/EDFigure_1_d.tp6_TopTable.tab",sep = "")
write.table(tt_6, file_name.table, quote = FALSE, row.names = FALSE, sep = "\t")

gene list Heatmaps

# top 50 genes with logFC > 2
genes_top50up_all <- c(tt_0[tt_0$logFC > 2,]$genes[1:50],
                       tt_1[tt_1$logFC > 2,]$genes[1:50],
                       tt_2[tt_2$logFC > 2,]$genes[1:50],
                       tt_3[tt_3$logFC > 2,]$genes[1:50],
                       tt_4[tt_4$logFC > 2,]$genes[1:50],
                       tt_5[tt_5$logFC > 2,]$genes[1:50],
                       tt_6[tt_6$logFC > 2,]$genes[1:50])

# define data to plot
data2plot <- data.frame()

for (gene in genes_top50up_all) {
    data2plot <- rbind(data2plot, counts.bc.cpm.sel_IVMC[rownames(counts.bc.cpm.sel_IVMC) == gene,])
}
# rownames(data2plot) <- genes
colnames(data2plot) <- colnames(counts.bc.cpm.sel_IVMC)

# center and scale
data2plot.sc <- t(scale(t(data2plot)))

h <- Heatmap(data2plot.sc, cluster_columns=FALSE, cluster_rows=FALSE, 
        heatmap_legend_param = list(title = "cpm\n(centered\nscaled)", color_bar = "continuous"),
        column_split = c(rep(0, 6),rep(1, 6),rep(2,6),rep(3, 6),rep(4,6),rep(5, 6),rep(6,6)),
        row_split = c(rep(0,50),rep(1,50),rep(2,50),rep(3,50),rep(4,50),rep(5,50),rep(6,50)),
        column_title = " ",
        bottom_annotation = HeatmapAnnotation(foo = anno_block(gp = gpar(fill = timepoint_colors, 
                                                                         col = timepoint_colors),
                                                               labels = unique(metadata.sel_IVMC$description.alt2), 
                                                               labels_gp = gpar(col = heatmap_labels_colors, 
                                                                                fontsize = 6))),
        show_column_names = FALSE,
        row_title = NULL)

print(h)

file_name <- paste(plot_dir,"/EDFigure_1_d.pdf",sep = "")
pdf(file_name, width = 6, height = 7)    
    print(h)
dev.off()
png 
  2 
png(sub("\\.pdf$", ".png", file_name), width = 6, height = 7,
    unit = "in", res = 400)    
    print(h)
dev.off()
png 
  2 

Extended Data Fig 2 a

using B0xx_tp0_NB_NH, B0xx_tp1_NB_H, B0xx_tp2_NB_H, B0xx_tp3_NB_H, B0xx_tp4_B_H, B0xx_tp5_B_H, B0xx_tp6_B_H this has been “sel_IVMC”

Line plots showing the (batch coreccted) mean expression levels (in cpm) of genes reported in literature to mark stem cells of the endometrial epithelium (MKI67, ALDH1A1, CDH2) across different timepoints in the IVMC protocol. The red line represents the mean cpm values across donors, while the shaded blue region indicates the standard deviation around the mean.

genes_for_line_plots <- c("MKI67","ALDH1A1","CDH2")

for (gene in genes_for_line_plots ) {
    expr.values.sel_IVMC <- counts.bc.cpm.sel_IVMC[gene,]

    # data to plot
    df2plot <- data.frame(samples=colnames(counts.bc.cpm.sel_IVMC), cpm=expr.values.sel_IVMC)

    # combine with metadata
    df2plot_withMetadata <- merge(df2plot, metadata.sel_IVMC, by.x = "samples", by.y = "sample", sort = FALSE)
 
    # aggregate
    df2plot_aggregate <- data.frame(general_timepoint = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,1],
                                    cpm_mean = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,2],
                                    cpm_mean_plus_sd = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,2] +
                                                       aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=sd)[,2],
                                    cpm_mean_minus_sd = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,2] -
                                                        aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=sd)[,2] )
    

    p <- ggplot(df2plot_aggregate, aes(y=cpm_mean, x=general_timepoint, group = 1)) + 
          geom_line(color = "red") +
          geom_point(color = "red", size = 2) +
          geom_ribbon(aes(x = general_timepoint, ymax = cpm_mean_plus_sd, ymin = cpm_mean_minus_sd), 
                      alpha = 0.3, fill = "lightblue") +
          ggtitle(bquote(italic(.(gene)))) +
          ylab("cpm (Mean ± SD across donors)") +
          xlab("") +
          theme_bw() +
          theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
          theme(axis.line = element_line(colour = "black")) + 
          theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 24)) +
          theme(axis.text.y = element_text(size = 22)) +
          theme(axis.title.y = element_text(size = 24)) +
          theme(panel.border = element_blank())
    print(p)
    
    file_name <- paste(plot_dir,"/EDFigure_2_a__",gene,".pdf", sep = "")
    pdf(file_name, height = 8,width = 10)
        print(p)
    dev.off()
}

Extended Data Fig 3 a

Volcano plot highlighting selected from comparison between organoids at P4-diff phase of the IVMC protocol to organoids not treated with hormones but undergone breakdown (B_NH) at the same timepoint.

define contrast

(colnames(design_all) == "tp1_NB_H") - (colnames(design_all) == "tp1_NB_NH")
 [1]  0  0  0  0  0  0  1 -1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
qlf_3A<- glmQLFTest(fit_all, contrast = as.vector((colnames(design_all) == "tp1_NB_H") - (colnames(design_all) == "tp1_NB_NH")))
tt_3A <- as.data.frame(topTags(qlf_3A,n=nrow(counts.all.asDGEList)))
head(tt_3A)
          genes     logFC     logCPM        F       PValue          FDR
PHACTR3 PHACTR3  6.128475 -0.6838466 187.3729 6.317181e-28 1.101590e-23
CALCA     CALCA -5.766951  2.2525394 176.1686 5.760801e-26 4.586631e-22
PFKM       PFKM -1.891974  5.7037535 211.0908 7.890752e-26 4.586631e-22
VSTM2L   VSTM2L  4.708261  0.4139683 182.9378 2.069740e-25 9.023032e-22
GATA2     GATA2  9.389917  0.7377046 171.6309 2.743281e-24 9.567466e-21
EVA1C     EVA1C  3.113237  3.8054836 177.7423 1.924080e-23 5.592018e-20

define cut offs

FDR < 0.05 & logFC > 2.0 ~ up; FDR < 0.05 & logFC < -2.0 ~ down

# create significant (sig) column
results_3A <- mutate(tt_3A, sig = case_when(
                                FDR < 0.05 & logFC > 2.0 ~ up,
                                FDR < 0.05 & logFC < -2.0 ~ down,
                                TRUE ~ notsig))
sum(results_3A$sig == "Up")
[1] 781
sum(results_3A$sig == "Down")
[1] 290
file_name.table  <- paste(table_dir,"/EDFigure_3_A.DEG.tab",sep = "")
write.table(results_3A, file_name.table,
            quote = FALSE, row.names = FALSE, sep = "\t")

define genes to label

genes_3A_up_label <- c("SULT1E1", "GATA2", "SCGB1D4", "ALPG", "PAEP", "SCGB1D2", 
                       "SERPINA5", "S100P", "SCGB2A2", "SCGB2A1", "PTGS2", "ENPP3", 
                       "KRT17", "TPPP3", "IL1A")
genes_3A_down_label <- c("NCAM1", "SHISA3", "KRT4", "FZD10", "SSTR1", "SCTR", 
                         "FAM20A", "PTPRG", "RXRG", "MMP10", "FN1", "S100A9", 
                         "MMP3", "MMP1", "ASCL2")
genes_3A_label <- c(genes_3A_up_label,genes_3A_down_label)

# add gene labels
results_3A <- mutate(results_3A, labels = ifelse(genes %in% genes_3A_label, genes, ""))

volcano plot

# set up base plot
p <- ggplot(data = results_3A, aes(x = logFC, y = -log10(PValue))) +
  geom_point(aes(colour = sig)) +
    xlim(-12, +12) +
  scale_color_manual(name = "|log2FC| > 2, FDR < 0.05", values = volcano_colors) +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_blank(),
    axis.line = element_line(colour = "black"),
    legend.key = element_blank(),
    legend.position = "bottom",
    
    # increase text sizes
    axis.text = element_text(size = 18), 
    axis.title = element_text(size = 22),  
    plot.title = element_text(size = 26, hjust = 0.5), 
    legend.title = element_text(size = 20), 
    legend.text = element_text(size = 20) 
  ) +
  xlab('log2 FC') +
  ylab('-log10 p-value') +
  guides(color = guide_legend(override.aes = list(size=5))) +
  
  # add gene labels
  geom_text_repel(data = filter(results_3A, labels != ""), 
                         aes(label = labels),
                         min.segment.length = 0,
                         max.overlaps = Inf,
                         size = 7,  
                         show.legend = FALSE) +
  ggtitle('P4-diff (IVMC vs NH)')

print(p)

file_name <- paste(plot_dir,"/EDFigure_3_a.pdf", sep = "")
pdf(file_name,height = 8,width = 10)
    p
dev.off()
png 
  2 

Extended Data Fig 3 d

Volcano plot highlighting selected DEGs from comparison between organoids of the IVMC protocol at P4-diff phase to organoids at hormonal withdrawal 48 h.

define contrast

(colnames(design_all) == "tp3_NB_H") - (colnames(design_all) == "tp1_NB_H")  - ((colnames(design_all) == "tp3_NB_NH") - (colnames(design_all) == "tp1_NB_NH") )
 [1]  0  0  0  0  0  0 -1  1  0  0  1 -1  0  0  0  0  0  0  0  0  0  0  0  0
qlf_3D<- glmQLFTest(fit_all, contrast = as.vector( (colnames(design_all) == "tp3_NB_H") - (colnames(design_all) == "tp1_NB_H")  - ((colnames(design_all) == "tp3_NB_NH") - (colnames(design_all) == "tp1_NB_NH") ) ) )
tt_3D <- as.data.frame(topTags(qlf_3D,n=nrow(counts.all.asDGEList)))
head(tt_3D)
              genes     logFC   logCPM        F       PValue          FDR
HMGA2-AS1 HMGA2-AS1  3.790418 1.595473 49.92480 1.749615e-10 1.926207e-06
SIK1B         SIK1B -2.174675 4.245860 50.13533 2.509336e-10 1.926207e-06
PCCA           PCCA -1.535965 4.690401 48.55769 4.255336e-10 1.926207e-06
NR4A1         NR4A1 -2.513059 3.856891 48.44720 4.418413e-10 1.926207e-06
KRT17         KRT17 -3.379015 7.477419 45.55016 1.185183e-09 3.715991e-06
KRT16         KRT16 -4.403312 2.707835 45.32271 1.289910e-09 3.715991e-06

define cut offs

FDR < 0.05 & logFC > 2.0 ~ up; FDR < 0.05 & logFC < -2.0 ~ down

# create significant (sig) column
results_3D <- mutate(tt_3D, sig = case_when(
                                FDR < 0.05 & logFC > 2.0 ~ up,
                                FDR < 0.05 & logFC < -2.0 ~ down,
                                TRUE ~ notsig))
sum(results_3D$sig == "Up")
[1] 41
sum(results_3D$sig == "Down")
[1] 63
file_name.table  <- paste(table_dir,"/EDFigure_3_d.DEG.tab",sep = "")
write.table(results_3D, file_name.table,
            quote = FALSE, row.names = FALSE, sep = "\t")

define genes to label

genes_3D_up_label <- c("HMGA2-AS1", "SIGLEC15", "MEOX1", "VTCN1", "RXRG", "MT1E", 
                       "MMP10", "CASP1", "MT1G", "SSTR1")
genes_3D_down_label <- c("PHACTR3", "KRT16", "SERPINA3", "KRT17", "SFRP1", "NOTUM", 
                         "S100P", "CXCL8", "PAEP", "PLAUR", "AREG")
genes_3D_label <- c(genes_3D_up_label,genes_3D_down_label)

# add gene labels
results_3D <- mutate(results_3D, labels = ifelse(genes %in% genes_3D_label, genes, ""))

volcano plot

# set up base plot
p <- ggplot(data = results_3D, aes(x = logFC, y = -log10(PValue))) +
  geom_point(aes(colour = sig)) +
    xlim(-12, +12) +
  scale_color_manual(name = "|log2FC| > 2, FDR < 0.05", values = volcano_colors) +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_blank(),
    axis.line = element_line(colour = "black"),
    legend.key = element_blank(),
    legend.position = "bottom",
    
    # increase text sizes
    axis.text = element_text(size = 18), 
    axis.title = element_text(size = 22),  
    plot.title = element_text(size = 26, hjust = 0.5), 
    legend.title = element_text(size = 20), 
    legend.text = element_text(size = 20) 
  ) +
  xlab('log2 FC') +
  ylab('-log10 p-value') +
  guides(color = guide_legend(override.aes = list(size=5))) +
  
  # add gene labels 4
  geom_text_repel(data = filter(results_3D, labels != ""), 
                         aes(label = labels),
                         min.segment.length = 0,
                         max.overlaps = Inf,
                         size = 7,  
                         show.legend = FALSE) +
  ggtitle('Horm.Withdr. 48h vs P4-diff (controlled for hormone)')

print(p)

file_name <- paste(plot_dir,"/EDFigure_3_d.pdf", sep = "")
pdf(file_name,height = 8,width = 10)
    p
dev.off()
png 
  2 

Extended Data Fig 3 e

Volcano plot highlighting selected DEGs from comparison of organoids between E2-diff 48h phase of the IVMC protocol to organoids not treated with hormones but undergone breakdown (B_NH) at the same timepoint.

define contrast

(colnames(design_all) == "tp6_B_H") - (colnames(design_all) == "tp6_B_NH")
 [1]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1 -1  0  0
qlf_3E<- glmQLFTest(fit_all, contrast = as.vector((colnames(design_all) == "tp6_B_H") - (colnames(design_all) == "tp6_B_NH")))
tt_3E <- as.data.frame(topTags(qlf_3E,n=nrow(counts.all.asDGEList)))
head(tt_3E)
              genes     logFC     logCPM        F       PValue          FDR
GATA2-AS1 GATA2-AS1  2.920502 -0.6587471 88.07993 2.244719e-16 3.328586e-12
FBN2           FBN2 -2.313733  5.9715179 96.78891 3.817624e-16 3.328586e-12
SOBP           SOBP -2.109622  2.0542466 74.84218 1.322183e-13 7.685408e-10
OLFM4         OLFM4  8.552628  1.1146224 76.64165 3.246182e-13 1.415173e-09
CYP1A1       CYP1A1 -6.033534  0.6892229 64.94589 7.830694e-13 2.731033e-09
CALCA         CALCA -3.676129  2.2525394 60.06990 2.289224e-12 6.653249e-09

define cut offs

FDR < 0.05 & logFC > 2.0 ~ up; FDR < 0.05 & logFC < -2.0 ~ down

# create significant (sig) column
results_3E <- mutate(tt_3E, sig = case_when(
                                FDR < 0.05 & logFC > 2.0 ~ up,
                                FDR < 0.05 & logFC < -2.0 ~ down,
                                TRUE ~ notsig))
sum(results_3E$sig == "Up")
[1] 145
sum(results_3E$sig == "Down")
[1] 76
file_name.table  <- paste(table_dir,"/EDFigure_3_e.DEG.tab",sep = "")
write.table(results_3E, file_name.table,
            quote = FALSE, row.names = FALSE, sep = "\t")

define genes to label

genes_3E_up_label <- c("OLFM4", "COL1A2", "PGR", "CDC20B", "GMNC", "GREB1", "GATA5", "CCN5")
genes_3E_down_label <- c("KRT5", "MMP1", "PAX2", "FRZB", "MMP10", "SHISA3", "PLXNA4", "SSTR1")
genes_3E_label <- c(genes_3E_up_label,genes_3E_down_label)
 
# add gene labels
results_3E <- mutate(results_3E, labels = ifelse(genes %in% genes_3E_label, genes, ""))

volcano plot

# set up base plot
p <- ggplot(data = results_3E, aes(x = logFC, y = -log10(PValue))) +
  geom_point(aes(colour = sig)) +
    xlim(-12, +12) +
  scale_color_manual(name = "|log2FC| > 2, FDR < 0.05", values = volcano_colors) +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_blank(),
    axis.line = element_line(colour = "black"),
    legend.key = element_blank(),
    legend.position = "bottom",
    
    # increase text sizes
    axis.text = element_text(size = 18), 
    axis.title = element_text(size = 22),  
    plot.title = element_text(size = 26, hjust = 0.5), 
    legend.title = element_text(size = 20), 
    legend.text = element_text(size = 20) 
  ) +
  xlab('log2 FC') +
  ylab('-log10 p-value') +
  guides(color = guide_legend(override.aes = list(size=5))) +
  
  # add gene labels 4
  geom_text_repel(data = filter(results_3E, labels != ""), 
                         aes(label = labels),
                         min.segment.length = 0,
                         max.overlaps = Inf,
                         size = 7,  
                         show.legend = FALSE) +
  ggtitle('E2-diff 48h (IVMC vs B_NH)')

print(p)

file_name <- paste(plot_dir,"/EDFigure_3_e.pdf", sep = "")
pdf(file_name,height = 8,width = 10)
    p
dev.off()
png 
  2 

Extended Data Fig 4 a

using B0xx_tp0_NB_NH, B0xx_tp1_NB_H, B0xx_tp2_NB_H, B0xx_tp3_NB_H, B0xx_tp4_B_H, B0xx_tp5_B_H, B0xx_tp6_B_H this has been “sel_IVMC”

Line plots showing the mean expression levels (in cpm) of SOX9, FUT4, THBS1 and KRT13 genes across different timepoints in the IVMC protocol. The red line represents the mean cpm values across donors, while the shaded blue region indicates the standard deviation around the mean.

genes_for_line_plots <- c("SOX9", "FUT4","THBS1", "KRT13")

for (gene in genes_for_line_plots ) {
    expr.values.sel_IVMC <- counts.bc.cpm.sel_IVMC[gene,]

    # data to plot
    df2plot <- data.frame(samples=colnames(counts.bc.cpm.sel_IVMC), cpm=expr.values.sel_IVMC)

    # combine with metadata
    df2plot_withMetadata <- merge(df2plot, metadata.sel_IVMC, by.x = "samples", by.y = "sample", sort = FALSE)
 
    # aggregate
    df2plot_aggregate <- data.frame(general_timepoint = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,1],
                                    cpm_mean = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,2],
                                    cpm_mean_plus_sd = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,2] +
                                                       aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=sd)[,2],
                                    cpm_mean_minus_sd = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,2] -
                                                        aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=sd)[,2] )
    

    p <- ggplot(df2plot_aggregate, aes(y=cpm_mean, x=general_timepoint, group = 1)) + 
          geom_line(color = "red") +
          geom_point(color = "red", size = 2) +
          geom_ribbon(aes(x = general_timepoint, ymax = cpm_mean_plus_sd, ymin = cpm_mean_minus_sd), 
                      alpha = 0.3, fill = "lightblue") +
          ggtitle(bquote(italic(.(gene))) ) +
          ylab("cpm (Mean ± SD across donors)") +
          xlab("") +
          theme_bw() +
          theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
          theme(axis.line = element_line(colour = "black")) + 
          theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 24)) +
          theme(axis.text.y = element_text(size = 22)) +
          theme(axis.title.y = element_text(size = 24)) +
          theme(panel.border = element_blank())
    print(p)
    
    file_name <- paste(plot_dir,"/EDFigure_4_a__",gene,".pdf", sep = "")
    pdf(file_name, height = 8,width = 10)
        print(p)
    dev.off()
}

Extended Data Fig 5 a

Volcano plot highlighting selected DEGs comparing EOs at Post-breakdown 24h phase of the IVMC protocol to control EOs (B_NH) at the same timepoint.

(colnames(design_all) == "tp4_B_H") - (colnames(design_all) == "tp4_B_NH")
 [1]  0  0  0  0  0  0  0  0  0  0  0  0  1 -1  0  0  0  0  0  0  0  0  0  0
qlf_5A<- glmQLFTest(fit_all, contrast = as.vector((colnames(design_all) == "tp4_B_H") - (colnames(design_all) == "tp4_B_NH")))
tt_5A <- as.data.frame(topTags(qlf_5A,n=nrow(counts.all.asDGEList)))
head(tt_5A)
        genes     logFC     logCPM         F       PValue          FDR
CSF2     CSF2  3.142131 -1.1908599 118.28243 3.496674e-20 6.097500e-16
DMBT1   DMBT1  5.182767  0.2282031  83.83561 1.504947e-15 1.312163e-11
ADGRF5 ADGRF5  4.411076 -0.4612762  76.12266 4.642136e-14 2.698319e-10
FBN2     FBN2 -2.007908  5.9715179  75.67786 1.022128e-13 4.455968e-10
DUOXA2 DUOXA2  5.340019  2.9917768  67.57640 8.298885e-13 2.894319e-09
CALCA   CALCA -3.477386  2.2525394  53.30974 2.511962e-11 7.300598e-08

define cut offs

FDR < 0.05 & logFC > 2.0 ~ up; FDR < 0.05 & logFC < -2.0 ~ down

# create significant (sig) column
results_5A <- mutate(tt_5A, sig = case_when(
                                FDR < 0.05 & logFC > 2.0 ~ up,
                                FDR < 0.05 & logFC < -2.0 ~ down,
                                TRUE ~ notsig))
sum(results_5A$sig == "Up")
[1] 134
sum(results_5A$sig == "Down")
[1] 25
file_name.table  <- paste(table_dir,"/EDFigure_5_a.DEG.tab",sep = "")
write.table(results_5A, file_name.table,
            quote = FALSE, row.names = FALSE, sep = "\t")

define genes to label

genes_5A_up_label <- c("DUOXA2", "DUOX2", " IL19", "SAA2", "SAA1", "SAA4", "TREM1", 
                       "CCR7", "MUC13", "IL10", "CCL20", "IRAK3", "PF4V1")
genes_5A_down_label <- c("FBN2", "CALCA", "SHISA3", "KRT4", "CYP1A2", "NGFR", "KRT5",
                         "ESRRB", "SSTR1", "PLXNA4", "FRZB")
genes_5A_label <- c(genes_5A_up_label,genes_5A_down_label)

# add gene labels
results_5A <- mutate(results_5A, labels = ifelse(genes %in% genes_5A_label, genes, ""))

volcano plot

# set up base plot
p <- ggplot(data = results_5A, aes(x = logFC, y = -log10(PValue))) +
  geom_point(aes(colour = sig)) +
    xlim(-12, +12) +
  scale_color_manual(name = "|log2FC| > 2, FDR < 0.05", values = volcano_colors) +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_blank(),
    axis.line = element_line(colour = "black"),
    legend.key = element_blank(),
    legend.position = "bottom",
    
    # increase text sizes
    axis.text = element_text(size = 18), 
    axis.title = element_text(size = 22),  
    plot.title = element_text(size = 26, hjust = 0.5), 
    legend.title = element_text(size = 20), 
    legend.text = element_text(size = 20) 
  ) +
  xlab('log2 FC') +
  ylab('-log10 p-value') +
  guides(color = guide_legend(override.aes = list(size=5))) +
  
  # add gene labels 
  geom_text_repel(data = filter(results_5A, labels != ""), 
                         aes(label = labels),
                         min.segment.length = 0,
                         max.overlaps = Inf,
                         size = 7,  
                         show.legend = FALSE) +
  ggtitle('Post-breakdown 24h (IVMC vs B_NH)')

print(p)

file_name <- paste(plot_dir,"/EDFigure_5_a.pdf", sep = "")
pdf(file_name,height = 8,width = 10)
    p
dev.off()
png 
  2 

Extended Data Fig 5 b

Biological processes enriched in Post-breakdown 24h phase of the IVMC protocol using genes upregulated in A.

# all upregulated genes
genes_5A_up <- results_5A$genes[results_5A$sig == "Up"]

gostres <- gost(query = genes_5A_up, custom_bg = results_5A$genes, organism = "hsapiens", correction_method = "fdr")
Detected custom background input, domain scope is set to 'custom'.
head(gostres$result)
    query significant    p_value term_size query_size intersection_size
1 query_1        TRUE 0.01054274         1        121                 1
2 query_1        TRUE 0.01054274         1        121                 1
3 query_1        TRUE 0.01054274         1        121                 1
4 query_1        TRUE 0.01054274         1        121                 1
5 query_1        TRUE 0.01054274         1        121                 1
6 query_1        TRUE 0.01054274         1        121                 1
    precision recall    term_id source                                term_name
1 0.008264463      1 CORUM:7507  CORUM Potassium channel complex (KCNB1, KCNE1)
2 0.008264463      1  CORUM:426  CORUM                                 Meprin A
3 0.008264463      1  CORUM:845  CORUM                     PCI-PSA-SCG2 complex
4 0.008264463      1 CORUM:1439  CORUM                  PTGS2 homodimer complex
5 0.008264463      1 CORUM:5389  CORUM                    SERPINA3-CTSG complex
6 0.008264463      1 CORUM:7506  CORUM Potassium channel complex (KCNB1, KCNE2)
  effective_domain_size source_order      parents
1                 15781         2863 CORUM:00....
2                 15781          244 CORUM:00....
3                 15781          481 CORUM:00....
4                 15781          752 CORUM:00....
5                 15781         1666 CORUM:00....
6                 15781         2862 CORUM:00....
unique(gostres$result$source)
 [1] "CORUM" "GO:BP" "GO:CC" "GO:MF" "HP"    "HPA"   "KEGG"  "REAC"  "TF"   
[10] "WP"   
gostres_results.BP <- gostres$result[gostres$result$source == "GO:BP",]
unique(gostres_results.BP$source)
[1] "GO:BP"
gostres_results.BP$neg_log10Pvalue <- ( -log10(gostres_results.BP$p_value) )
gostres_results.BP <- gostres_results.BP[order(gostres_results.BP$neg_log10Pvalue, decreasing = TRUE),]
head(gostres_results.BP[,c("term_name","neg_log10Pvalue")])
                                     term_name neg_log10Pvalue
10                             cilium movement        8.691102
11   cilium movement involved in cell motility        8.317602
12              cilium-dependent cell motility        8.317602
13 cilium or flagellum-dependent cell motility        8.317602
14                               cell motility        6.706811
15                 acute inflammatory response        6.473471
file_name.table  <- paste(table_dir,"/EDFigure_5_b.tab",sep = "")
write.table(gostres_results.BP[,c("term_name","neg_log10Pvalue")], file_name.table,
            quote = FALSE, row.names = FALSE, sep = "\t")

representative_GO_terms <- c("cilium movement", 
                             "cell motility", 
                             "acute inflammatory response", 
                             "acute-phase response", 
                             "defense response",
                             "sperm motility",
                             "response to bacterium",
                             "leukocyte migration",
                             "neutrophil chemotaxis",
                             "extracellular transport")

head(gostres_results.BP[,c("term_name","neg_log10Pvalue")])
                                     term_name neg_log10Pvalue
10                             cilium movement        8.691102
11   cilium movement involved in cell motility        8.317602
12              cilium-dependent cell motility        8.317602
13 cilium or flagellum-dependent cell motility        8.317602
14                               cell motility        6.706811
15                 acute inflammatory response        6.473471
gostres_results.BP$term_name[gostres_results.BP$term_name %in% representative_GO_terms]
 [1] "cilium movement"             "cell motility"              
 [3] "acute inflammatory response" "acute-phase response"       
 [5] "defense response"            "sperm motility"             
 [7] "response to bacterium"       "leukocyte migration"        
 [9] "neutrophil chemotaxis"       "extracellular transport"    
data2plot <- gostres_results.BP[gostres_results.BP$term_name %in% representative_GO_terms,]

data2plot$term_name <- factor(data2plot$term_name, levels = rev(data2plot$term_name))

p <- ggplot(data2plot, aes(x=neg_log10Pvalue,y=term_name)) +
  geom_point(size = 10, color = "#00C5CD") +
  xlab('-log10 p-value') +
  xlim(0, 10) +
  ylab('') +
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.text.y = element_text(size=22)) +
  theme(axis.text.x = element_text(size=18)) +
  theme(axis.title.x =element_text(size=18)) +
  theme(panel.border = element_blank(), axis.line = element_line())

print(p)    

file_name <- paste(plot_dir,"/EDFigure_5_b.pdf", sep = "")
pdf(file_name,height = 8,width = 7)
    print(p)
dev.off() 
png 
  2 

Extended Data Fig 9 a

Heatmap depicting centered and scaled counts per million (cpm) of ligands involved in interactions between luminal early epithelial cells and other cell types, across batch corrected samples of EOs in the IVMC protocol.

genes <- c("WNT7A", "IL6", "CXCL1", "CXCL2", "CXCL3", "CXCL6", "CXCL8", "CXCL16", "ANXA1", "EDN1", "PDGFA",
           "PDGFB", "AREG", "C3", "CSF1", "CSF3", "NAMPT", "VEGFA", "TNF", "LIF", "ADM", "GDF15", "SEMA3A")

# define data to plot
data2plot.bc <- data.frame()

for (gene in genes) {
    # use counts.bc.cpm.sel_IVMC from figure 1e
    data2plot.bc <- rbind(data2plot.bc, counts.bc.cpm.sel_IVMC[rownames(counts.bc.cpm.sel_IVMC) == gene,])
}

rownames(data2plot.bc) <- genes
colnames(data2plot.bc) <- colnames(counts.bc.cpm.sel_IVMC)

# center and scale
data2plot.bc.sc <- t(scale(t(data2plot.bc)))

heatmap_selected_genes<- Heatmap(data2plot.bc.sc, cluster_columns=FALSE, cluster_rows=TRUE,
        heatmap_legend_param = list(legend_direction = "horizontal", 
                                    title = "cpm (centered \nand scaled)", 
                                    color_bar = "continuous"),
        column_split = c(rep(0, 6),rep(1, 6),rep(2,6),rep(3, 6),rep(4,6),rep(5, 6),rep(6,6)),
        column_title = "",
        bottom_annotation = HeatmapAnnotation(foo = anno_block(gp = gpar(fill = timepoint_colors, 
                                                                         col = timepoint_colors),
        labels = unique(metadata.sel_IVMC$description.alt), 
        labels_gp = gpar(col = heatmap_labels_colors, fontsize = 8))),
        show_column_names = FALSE)

draw(heatmap_selected_genes, heatmap_legend_side="bottom")

file_name <- paste(plot_dir,"/EDFigure_9_a.pdf", sep = "")
pdf(file_name,height = 10,width = 8)
    draw(heatmap_selected_genes, heatmap_legend_side="bottom")
dev.off()
png 
  2 

Extended Data Fig 10 a

Bar plot showing the average transcripts per million (TPM) of WNT ligands at Post-breakdown 24h in EOs undergoing the IVMC protocol, analysed with bulk RNAseq.

using TPM for B0xx_tp4_H_B

metadata.sel3 <- mutate(as.data.frame(metadata), sel3 = case_when(
                                timepoint == "tp4" & hormones == "H" & breakdown == "B" ~ TRUE,
                                TRUE ~ FALSE))

sel3 <- metadata$sample[metadata.sel3$sel3]
sel3
[1] "B032_tp4_B_H" "B044_tp4_B_H" "B050_tp4_B_H" "B066_tp4_B_H" "B078_tp4_B_H"
[6] "B080_tp4_B_H"
tpm.selected <- all_tpm[,sel3]
    
paralogues.WNT <- rownames(tpm.selected)[grep("WNT",rownames(tpm.selected))]
# exclude ncRNA and order    
paralogues.WNT <- paralogues.WNT[c(10,5,11,6,14,17,8,7,9,15,19,2,3,13,16,12,18,4,1)]  
paralogues.WNT
 [1] "WNT1"   "WNT2"   "WNT2B"  "WNT3"   "WNT3A"  "WNT4"   "WNT5A"  "WNT5B" 
 [9] "WNT6"   "WNT7A"  "WNT7B"  "WNT8A"  "WNT8B"  "WNT9A"  "WNT9B"  "WNT10A"
[17] "WNT10B" "WNT11"  "WNT16" 
tpm.selected.WNT <- data.frame(WNT = as.vector(as.matrix(tpm.selected[paralogues.WNT,])), 
                                paralogue = rep(paralogues.WNT,6), 
                                donor = (c(rep("B032",19), rep("B044",19), rep("B050",19), rep("B066",19), rep("B078",19), rep("B080",19))))
tpm.selected.WNT$paralogue <- factor(tpm.selected.WNT$paralogue, levels = tpm.selected.WNT$paralogue[1:19])

# and plot  
ggplot(tpm.selected.WNT, aes(y=WNT)) + 
          geom_bar(aes(x=paralogue, fill = donor), data = tpm.selected.WNT, stat="identity", position = "dodge") + 
          ylab('TPM') +
          ggtitle("Post-breakdown 24h") +
          scale_fill_manual(values = donor_colors) +
          theme_bw() +
          theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
          theme(axis.line = element_line(colour = "black")) + 
          theme(axis.text.x = element_text(angle = 90)) +
          theme(panel.border = element_blank()) 

p <- ggplot(tpm.selected.WNT, aes(x=paralogue,y=WNT)) + 
          stat_summary(fun=mean, geom="bar", fill="orange", ) +
          geom_point(size = 4) + 
          labs(y='TPM',x=NULL) +
          ggtitle("Post-breakdown 24h") +
          theme_bw() +
          theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
          theme(axis.line = element_line(colour = "black")) + 
          theme(axis.text.x = element_text(angle = 90, size=24, hjust=0.90,vjust=0.5)) +
          theme(axis.text.y = element_text(size=18)) +
          theme(axis.title.y =element_text(size=18)) +
          theme(axis.ticks = element_blank()) +
          theme(panel.border = element_blank()) 
    
print(p)    

file_name <- paste(plot_dir,"/EDFigure_10_a.pdf", sep = "")
pdf(file_name,height = 6,width = 8)
    print(p)
dev.off()    
png 
  2 

Extended Data Fig 10 b

Bar plot showing the average transcripts per million (TPM) of FZD receptors during at Post-breakdown 24h in EOs undergoing the IVMC protocol, analysed with bulk RNAseq.

using TPM for B0xx_tp4_H_B

# use tpm.selected from 10 a
paralogues.FZD <- rownames(tpm.selected)[grep("FZD",rownames(tpm.selected))]
# exclude ncRNA and order    
paralogues.FZD <- paralogues.FZD[c(4,9,1,7,5,6,3,8,10,2)]  
paralogues.FZD
 [1] "FZD1"  "FZD2"  "FZD3"  "FZD4"  "FZD5"  "FZD6"  "FZD7"  "FZD8"  "FZD9" 
[10] "FZD10"
tpm.selected.FZD <- data.frame(FZD = as.vector(as.matrix(tpm.selected[paralogues.FZD,])), 
                                paralogue = rep(paralogues.FZD,6), 
                                donor = (c(rep("B032",10), rep("B044",10), rep("B050",10), rep("B066",10), rep("B078",10), rep("B080",10))))
tpm.selected.FZD$paralogue <- factor(tpm.selected.FZD$paralogue, levels = tpm.selected.FZD$paralogue[1:10])

# and plot  
ggplot(tpm.selected.FZD, aes(y=FZD)) + 
          geom_bar(aes(x=paralogue, fill = donor), data = tpm.selected.FZD, stat="identity", position = "dodge") + 
          ylab('TPM') +
          ggtitle("Post-breakdown 24h") +
          scale_fill_manual(values = donor_colors) +
          theme_bw() +
          theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
          theme(axis.line = element_line(colour = "black")) + 
          theme(axis.text.x = element_text(angle = 90)) +
          theme(panel.border = element_blank()) 

p <- ggplot(tpm.selected.FZD, aes(x=paralogue,y=FZD)) + 
          stat_summary(fun=mean, geom="bar", fill="magenta4", ) +
          geom_point(size = 4) + 
          labs(y='TPM',x=NULL) +
          ggtitle("Post-breakdown 24h") +
          theme_bw() +
          theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
          theme(axis.line = element_line(colour = "black")) + 
          theme(axis.text.x = element_text(angle = 90, size=24, hjust=0.90,vjust=0.5)) +
          theme(axis.text.y = element_text(size=18)) +
          theme(axis.title.y =element_text(size=18)) +
          theme(axis.ticks = element_blank()) +
          theme(panel.border = element_blank()) 
    
print(p)    

file_name <- paste(plot_dir,"/EDFigure_10_b.pdf", sep = "")
pdf(file_name,height = 6,width = 8)
    print(p)
dev.off()    
png 
  2 

sessionInfo

sessionInfo()
R version 4.5.0 (2025-04-11)
Platform: x86_64-pc-linux-gnu
Running under: Rocky Linux 8.10 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: FlexiBLAS OPENBLAS;  LAPACK version 3.12.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: Europe/Zurich
tzcode source: system (glibc)

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] gprofiler2_0.2.3      ComplexHeatmap_2.24.0 PCAtools_2.20.0      
 [4] ggrepel_0.9.6         ggplot2_3.5.2         dplyr_1.1.4          
 [7] edgeR_4.6.2           limma_3.64.1          sva_3.56.0           
[10] BiocParallel_1.42.1   genefilter_1.90.0     mgcv_1.9-3           
[13] nlme_3.1-168         

loaded via a namespace (and not attached):
  [1] bitops_1.0-9              DBI_1.2.3                
  [3] rlang_1.1.6               magrittr_2.0.3           
  [5] clue_0.3-66               GetoptLong_1.0.5         
  [7] matrixStats_1.5.0         compiler_4.5.0           
  [9] RSQLite_2.4.1             DelayedMatrixStats_1.30.0
 [11] png_0.1-8                 vctrs_0.6.5              
 [13] reshape2_1.4.4            stringr_1.5.1            
 [15] shape_1.4.6.1             pkgconfig_2.0.3          
 [17] crayon_1.5.3              fastmap_1.2.0            
 [19] magick_2.8.7              XVector_0.48.0           
 [21] labeling_0.4.3            rmarkdown_2.29           
 [23] UCSC.utils_1.4.0          purrr_1.0.4              
 [25] bit_4.6.0                 xfun_0.52                
 [27] cachem_1.1.0              beachmat_2.24.0          
 [29] GenomeInfoDb_1.44.0       jsonlite_2.0.0           
 [31] blob_1.2.4                DelayedArray_0.34.1      
 [33] cluster_2.1.8.1           irlba_2.3.5.1            
 [35] parallel_4.5.0            R6_2.6.1                 
 [37] stringi_1.8.7             RColorBrewer_1.1-3       
 [39] Rcpp_1.0.14               iterators_1.0.14         
 [41] knitr_1.50                IRanges_2.42.0           
 [43] Matrix_1.7-3              splines_4.5.0            
 [45] tidyselect_1.2.1          dichromat_2.0-0.1        
 [47] abind_1.4-8               yaml_2.3.10              
 [49] doParallel_1.0.17         codetools_0.2-20         
 [51] lattice_0.22-7            tibble_3.3.0             
 [53] plyr_1.8.9                Biobase_2.68.0           
 [55] withr_3.0.2               KEGGREST_1.48.1          
 [57] evaluate_1.0.4            survival_3.8-3           
 [59] circlize_0.4.16           Biostrings_2.76.0        
 [61] pillar_1.10.2             MatrixGenerics_1.20.0    
 [63] foreach_1.5.2             stats4_4.5.0             
 [65] plotly_4.11.0             generics_0.1.4           
 [67] RCurl_1.98-1.17           S4Vectors_0.46.0         
 [69] sparseMatrixStats_1.20.0  scales_1.4.0             
 [71] xtable_1.8-4              glue_1.8.0               
 [73] lazyeval_0.2.2            tools_4.5.0              
 [75] data.table_1.17.6         ScaledMatrix_1.16.0      
 [77] annotate_1.86.1           locfit_1.5-9.12          
 [79] XML_3.99-0.18             Cairo_1.6-2              
 [81] cowplot_1.1.3             tidyr_1.3.1              
 [83] colorspace_2.1-1          AnnotationDbi_1.70.0     
 [85] GenomeInfoDbData_1.2.14   BiocSingular_1.24.0      
 [87] cli_3.6.5                 rsvd_1.0.5               
 [89] viridisLite_0.4.2         S4Arrays_1.8.1           
 [91] gtable_0.3.6              digest_0.6.37            
 [93] BiocGenerics_0.54.0       SparseArray_1.8.0        
 [95] dqrng_0.4.1               rjson_0.2.23             
 [97] htmlwidgets_1.6.4         farver_2.1.2             
 [99] memoise_2.0.1             htmltools_0.5.8.1        
[101] lifecycle_1.0.4           httr_1.4.7               
[103] GlobalOptions_0.1.2       statmod_1.5.0            
[105] bit64_4.6.0-1