library(sva)
library(edgeR)
library(dplyr)
library(PCAtools)
library(ComplexHeatmap)
library(gprofiler2)
gprofiler2::set_base_url("https://biit.cs.ut.ee/gprofiler_archive3/e112_eg59_p19")IVMC analysis and plots
Description
This script will generate the following plots:
Fig 1 d, e, f, g
Fig 3 c
Fig 5 d
ExtDat Fig 1 b, b, d
ExtDat Fig 2 a
ExtDat Fig 3 a, d, e
ExtDat Fig 4 a
ExtDat Fig 5 a, b
ExtDat Fig 9 a
ExtDat Fig 10 a, b
preparation
load libraries
new folders
plot_dir <- "paper_figures_IVMC"
if (!file.exists(plot_dir)) {
dir.create(plot_dir, showWarnings = FALSE, recursive = FALSE)
}
table_dir <- "paper_tables_IVMC"
if (!file.exists(table_dir)) {
dir.create(table_dir, showWarnings = FALSE, recursive = FALSE)
}reading raw counts and tpm values
all_counts <- read.delim("data/IVMC.raw_counts.20250403.tab")
# calculate cpm values on the complete data set first
all_counts.cpm <- cpm(all_counts, log=FALSE)
all_tpm <- read.delim("data/IVMC.TPM.20250403.tab")set up metadata
colnames(all_counts) [1] "B032_tp0_NB_NH" "B044_tp0_NB_NH" "B050_tp0_NB_NH" "B066_tp0_NB_NH"
[5] "B078_tp0_NB_NH" "B080_tp0_NB_NH" "B032_tp1_NB_H" "B032_tp1_NB_NH"
[9] "B044_tp1_NB_H" "B044_tp1_NB_NH" "B050_tp1_NB_H" "B050_tp1_NB_NH"
[13] "B066_tp1_NB_H" "B066_tp1_NB_NH" "B078_tp1_NB_H" "B078_tp1_NB_NH"
[17] "B080_tp1_NB_H" "B080_tp1_NB_NH" "B032_tp2_NB_H" "B032_tp2_NB_NH"
[21] "B044_tp2_NB_H" "B044_tp2_NB_NH" "B050_tp2_NB_H" "B050_tp2_NB_NH"
[25] "B066_tp2_NB_H" "B066_tp2_NB_NH" "B078_tp2_NB_H" "B078_tp2_NB_NH"
[29] "B080_tp2_NB_H" "B080_tp2_NB_NH" "B032_tp3_NB_H" "B032_tp3_NB_NH"
[33] "B044_tp3_NB_H" "B044_tp3_NB_NH" "B050_tp3_NB_H" "B050_tp3_NB_NH"
[37] "B066_tp3_NB_H" "B066_tp3_NB_NH" "B078_tp3_NB_H" "B078_tp3_NB_NH"
[41] "B080_tp3_NB_H" "B080_tp3_NB_NH" "B032_tp4_B_H" "B032_tp4_B_NH"
[45] "B032_tp4_NB_H" "B032_tp4_NB_NH" "B044_tp4_B_H" "B044_tp4_B_NH"
[49] "B044_tp4_NB_H" "B044_tp4_NB_NH" "B050_tp4_B_H" "B050_tp4_B_NH"
[53] "B050_tp4_NB_H" "B050_tp4_NB_NH" "B066_tp4_B_H" "B066_tp4_B_NH"
[57] "B066_tp4_NB_H" "B066_tp4_NB_NH" "B078_tp4_B_H" "B078_tp4_B_NH"
[61] "B078_tp4_NB_H" "B078_tp4_NB_NH" "B080_tp4_B_H" "B080_tp4_B_NH"
[65] "B080_tp4_NB_H" "B080_tp4_NB_NH" "B032_tp5_B_H" "B032_tp5_B_NH"
[69] "B032_tp5_NB_H" "B032_tp5_NB_NH" "B044_tp5_B_H" "B044_tp5_B_NH"
[73] "B044_tp5_NB_H" "B044_tp5_NB_NH" "B050_tp5_B_H" "B050_tp5_B_NH"
[77] "B050_tp5_NB_H" "B050_tp5_NB_NH" "B066_tp5_B_H" "B066_tp5_B_NH"
[81] "B066_tp5_NB_H" "B066_tp5_NB_NH" "B078_tp5_B_H" "B078_tp5_B_NH"
[85] "B078_tp5_NB_H" "B078_tp5_NB_NH" "B080_tp5_B_H" "B080_tp5_B_NH"
[89] "B080_tp5_NB_H" "B080_tp5_NB_NH" "B032_tp6_B_H" "B032_tp6_B_NH"
[93] "B032_tp6_NB_H" "B032_tp6_NB_NH" "B044_tp6_B_H" "B044_tp6_B_NH"
[97] "B044_tp6_NB_H" "B044_tp6_NB_NH" "B050_tp6_B_H" "B050_tp6_B_NH"
[101] "B050_tp6_NB_H" "B050_tp6_NB_NH" "B066_tp6_B_H" "B066_tp6_B_NH"
[105] "B066_tp6_NB_H" "B066_tp6_NB_NH" "B078_tp6_B_H" "B078_tp6_B_NH"
[109] "B078_tp6_NB_H" "B078_tp6_NB_NH" "B080_tp6_B_H" "B080_tp6_B_NH"
[113] "B080_tp6_NB_H" "B080_tp6_NB_NH"
metadata <- data.frame(sample = colnames(all_counts),
donor = unlist(strsplit(colnames(all_counts),"_"))[rep(c(TRUE,FALSE,FALSE,FALSE),114)],
timepoint = unlist(strsplit(colnames(all_counts),"_"))[rep(c(FALSE,TRUE,FALSE,FALSE),114)],
breakdown = unlist(strsplit(colnames(all_counts),"_"))[rep(c(FALSE,FALSE,TRUE,FALSE),114)],
hormones = unlist(strsplit(colnames(all_counts),"_"))[rep(c(FALSE,FALSE,FALSE,TRUE),114)] )
rownames(metadata) <- metadata$sample
metadata$timepoint_B_H <- paste(metadata$timepoint,metadata$breakdown,metadata$hormones, sep = "_")
metadata sample donor timepoint breakdown hormones timepoint_B_H
B032_tp0_NB_NH B032_tp0_NB_NH B032 tp0 NB NH tp0_NB_NH
B044_tp0_NB_NH B044_tp0_NB_NH B044 tp0 NB NH tp0_NB_NH
B050_tp0_NB_NH B050_tp0_NB_NH B050 tp0 NB NH tp0_NB_NH
B066_tp0_NB_NH B066_tp0_NB_NH B066 tp0 NB NH tp0_NB_NH
B078_tp0_NB_NH B078_tp0_NB_NH B078 tp0 NB NH tp0_NB_NH
B080_tp0_NB_NH B080_tp0_NB_NH B080 tp0 NB NH tp0_NB_NH
B032_tp1_NB_H B032_tp1_NB_H B032 tp1 NB H tp1_NB_H
B032_tp1_NB_NH B032_tp1_NB_NH B032 tp1 NB NH tp1_NB_NH
B044_tp1_NB_H B044_tp1_NB_H B044 tp1 NB H tp1_NB_H
B044_tp1_NB_NH B044_tp1_NB_NH B044 tp1 NB NH tp1_NB_NH
B050_tp1_NB_H B050_tp1_NB_H B050 tp1 NB H tp1_NB_H
B050_tp1_NB_NH B050_tp1_NB_NH B050 tp1 NB NH tp1_NB_NH
B066_tp1_NB_H B066_tp1_NB_H B066 tp1 NB H tp1_NB_H
B066_tp1_NB_NH B066_tp1_NB_NH B066 tp1 NB NH tp1_NB_NH
B078_tp1_NB_H B078_tp1_NB_H B078 tp1 NB H tp1_NB_H
B078_tp1_NB_NH B078_tp1_NB_NH B078 tp1 NB NH tp1_NB_NH
B080_tp1_NB_H B080_tp1_NB_H B080 tp1 NB H tp1_NB_H
B080_tp1_NB_NH B080_tp1_NB_NH B080 tp1 NB NH tp1_NB_NH
B032_tp2_NB_H B032_tp2_NB_H B032 tp2 NB H tp2_NB_H
B032_tp2_NB_NH B032_tp2_NB_NH B032 tp2 NB NH tp2_NB_NH
B044_tp2_NB_H B044_tp2_NB_H B044 tp2 NB H tp2_NB_H
B044_tp2_NB_NH B044_tp2_NB_NH B044 tp2 NB NH tp2_NB_NH
B050_tp2_NB_H B050_tp2_NB_H B050 tp2 NB H tp2_NB_H
B050_tp2_NB_NH B050_tp2_NB_NH B050 tp2 NB NH tp2_NB_NH
B066_tp2_NB_H B066_tp2_NB_H B066 tp2 NB H tp2_NB_H
B066_tp2_NB_NH B066_tp2_NB_NH B066 tp2 NB NH tp2_NB_NH
B078_tp2_NB_H B078_tp2_NB_H B078 tp2 NB H tp2_NB_H
B078_tp2_NB_NH B078_tp2_NB_NH B078 tp2 NB NH tp2_NB_NH
B080_tp2_NB_H B080_tp2_NB_H B080 tp2 NB H tp2_NB_H
B080_tp2_NB_NH B080_tp2_NB_NH B080 tp2 NB NH tp2_NB_NH
B032_tp3_NB_H B032_tp3_NB_H B032 tp3 NB H tp3_NB_H
B032_tp3_NB_NH B032_tp3_NB_NH B032 tp3 NB NH tp3_NB_NH
B044_tp3_NB_H B044_tp3_NB_H B044 tp3 NB H tp3_NB_H
B044_tp3_NB_NH B044_tp3_NB_NH B044 tp3 NB NH tp3_NB_NH
B050_tp3_NB_H B050_tp3_NB_H B050 tp3 NB H tp3_NB_H
B050_tp3_NB_NH B050_tp3_NB_NH B050 tp3 NB NH tp3_NB_NH
B066_tp3_NB_H B066_tp3_NB_H B066 tp3 NB H tp3_NB_H
B066_tp3_NB_NH B066_tp3_NB_NH B066 tp3 NB NH tp3_NB_NH
B078_tp3_NB_H B078_tp3_NB_H B078 tp3 NB H tp3_NB_H
B078_tp3_NB_NH B078_tp3_NB_NH B078 tp3 NB NH tp3_NB_NH
B080_tp3_NB_H B080_tp3_NB_H B080 tp3 NB H tp3_NB_H
B080_tp3_NB_NH B080_tp3_NB_NH B080 tp3 NB NH tp3_NB_NH
B032_tp4_B_H B032_tp4_B_H B032 tp4 B H tp4_B_H
B032_tp4_B_NH B032_tp4_B_NH B032 tp4 B NH tp4_B_NH
B032_tp4_NB_H B032_tp4_NB_H B032 tp4 NB H tp4_NB_H
B032_tp4_NB_NH B032_tp4_NB_NH B032 tp4 NB NH tp4_NB_NH
B044_tp4_B_H B044_tp4_B_H B044 tp4 B H tp4_B_H
B044_tp4_B_NH B044_tp4_B_NH B044 tp4 B NH tp4_B_NH
B044_tp4_NB_H B044_tp4_NB_H B044 tp4 NB H tp4_NB_H
B044_tp4_NB_NH B044_tp4_NB_NH B044 tp4 NB NH tp4_NB_NH
B050_tp4_B_H B050_tp4_B_H B050 tp4 B H tp4_B_H
B050_tp4_B_NH B050_tp4_B_NH B050 tp4 B NH tp4_B_NH
B050_tp4_NB_H B050_tp4_NB_H B050 tp4 NB H tp4_NB_H
B050_tp4_NB_NH B050_tp4_NB_NH B050 tp4 NB NH tp4_NB_NH
B066_tp4_B_H B066_tp4_B_H B066 tp4 B H tp4_B_H
B066_tp4_B_NH B066_tp4_B_NH B066 tp4 B NH tp4_B_NH
B066_tp4_NB_H B066_tp4_NB_H B066 tp4 NB H tp4_NB_H
B066_tp4_NB_NH B066_tp4_NB_NH B066 tp4 NB NH tp4_NB_NH
B078_tp4_B_H B078_tp4_B_H B078 tp4 B H tp4_B_H
B078_tp4_B_NH B078_tp4_B_NH B078 tp4 B NH tp4_B_NH
B078_tp4_NB_H B078_tp4_NB_H B078 tp4 NB H tp4_NB_H
B078_tp4_NB_NH B078_tp4_NB_NH B078 tp4 NB NH tp4_NB_NH
B080_tp4_B_H B080_tp4_B_H B080 tp4 B H tp4_B_H
B080_tp4_B_NH B080_tp4_B_NH B080 tp4 B NH tp4_B_NH
B080_tp4_NB_H B080_tp4_NB_H B080 tp4 NB H tp4_NB_H
B080_tp4_NB_NH B080_tp4_NB_NH B080 tp4 NB NH tp4_NB_NH
B032_tp5_B_H B032_tp5_B_H B032 tp5 B H tp5_B_H
B032_tp5_B_NH B032_tp5_B_NH B032 tp5 B NH tp5_B_NH
B032_tp5_NB_H B032_tp5_NB_H B032 tp5 NB H tp5_NB_H
B032_tp5_NB_NH B032_tp5_NB_NH B032 tp5 NB NH tp5_NB_NH
B044_tp5_B_H B044_tp5_B_H B044 tp5 B H tp5_B_H
B044_tp5_B_NH B044_tp5_B_NH B044 tp5 B NH tp5_B_NH
B044_tp5_NB_H B044_tp5_NB_H B044 tp5 NB H tp5_NB_H
B044_tp5_NB_NH B044_tp5_NB_NH B044 tp5 NB NH tp5_NB_NH
B050_tp5_B_H B050_tp5_B_H B050 tp5 B H tp5_B_H
B050_tp5_B_NH B050_tp5_B_NH B050 tp5 B NH tp5_B_NH
B050_tp5_NB_H B050_tp5_NB_H B050 tp5 NB H tp5_NB_H
B050_tp5_NB_NH B050_tp5_NB_NH B050 tp5 NB NH tp5_NB_NH
B066_tp5_B_H B066_tp5_B_H B066 tp5 B H tp5_B_H
B066_tp5_B_NH B066_tp5_B_NH B066 tp5 B NH tp5_B_NH
B066_tp5_NB_H B066_tp5_NB_H B066 tp5 NB H tp5_NB_H
B066_tp5_NB_NH B066_tp5_NB_NH B066 tp5 NB NH tp5_NB_NH
B078_tp5_B_H B078_tp5_B_H B078 tp5 B H tp5_B_H
B078_tp5_B_NH B078_tp5_B_NH B078 tp5 B NH tp5_B_NH
B078_tp5_NB_H B078_tp5_NB_H B078 tp5 NB H tp5_NB_H
B078_tp5_NB_NH B078_tp5_NB_NH B078 tp5 NB NH tp5_NB_NH
B080_tp5_B_H B080_tp5_B_H B080 tp5 B H tp5_B_H
B080_tp5_B_NH B080_tp5_B_NH B080 tp5 B NH tp5_B_NH
B080_tp5_NB_H B080_tp5_NB_H B080 tp5 NB H tp5_NB_H
B080_tp5_NB_NH B080_tp5_NB_NH B080 tp5 NB NH tp5_NB_NH
B032_tp6_B_H B032_tp6_B_H B032 tp6 B H tp6_B_H
B032_tp6_B_NH B032_tp6_B_NH B032 tp6 B NH tp6_B_NH
B032_tp6_NB_H B032_tp6_NB_H B032 tp6 NB H tp6_NB_H
B032_tp6_NB_NH B032_tp6_NB_NH B032 tp6 NB NH tp6_NB_NH
B044_tp6_B_H B044_tp6_B_H B044 tp6 B H tp6_B_H
B044_tp6_B_NH B044_tp6_B_NH B044 tp6 B NH tp6_B_NH
B044_tp6_NB_H B044_tp6_NB_H B044 tp6 NB H tp6_NB_H
B044_tp6_NB_NH B044_tp6_NB_NH B044 tp6 NB NH tp6_NB_NH
B050_tp6_B_H B050_tp6_B_H B050 tp6 B H tp6_B_H
B050_tp6_B_NH B050_tp6_B_NH B050 tp6 B NH tp6_B_NH
B050_tp6_NB_H B050_tp6_NB_H B050 tp6 NB H tp6_NB_H
B050_tp6_NB_NH B050_tp6_NB_NH B050 tp6 NB NH tp6_NB_NH
B066_tp6_B_H B066_tp6_B_H B066 tp6 B H tp6_B_H
B066_tp6_B_NH B066_tp6_B_NH B066 tp6 B NH tp6_B_NH
B066_tp6_NB_H B066_tp6_NB_H B066 tp6 NB H tp6_NB_H
B066_tp6_NB_NH B066_tp6_NB_NH B066 tp6 NB NH tp6_NB_NH
B078_tp6_B_H B078_tp6_B_H B078 tp6 B H tp6_B_H
B078_tp6_B_NH B078_tp6_B_NH B078 tp6 B NH tp6_B_NH
B078_tp6_NB_H B078_tp6_NB_H B078 tp6 NB H tp6_NB_H
B078_tp6_NB_NH B078_tp6_NB_NH B078 tp6 NB NH tp6_NB_NH
B080_tp6_B_H B080_tp6_B_H B080 tp6 B H tp6_B_H
B080_tp6_B_NH B080_tp6_B_NH B080 tp6 B NH tp6_B_NH
B080_tp6_NB_H B080_tp6_NB_H B080 tp6 NB H tp6_NB_H
B080_tp6_NB_NH B080_tp6_NB_NH B080 tp6 NB NH tp6_NB_NH
color and description scheme
source("../params.R")
timepoint_colors <- bulk_timepoint_colors[c("tp0","tp1","tp2","tp3","tp4","tp5","tp6")]
timepoint_colors.alt_nlb <- bulk_timepoint_colors[c("Pre-diff","P4-diff","H. W. 24h","H. W. 48h","P. B. 24h","E2-diff 24h","E2-diff 48h")]
heatmap_labels_colors <- c("black","white","black","black","white","black","black")
protocol_colors <- hormones_colors
names(protocol_colors) <- c("B_NH","IVMC")metadata$description <- bulk_timepoint_map_sl["tp0"]
metadata$description[metadata$timepoint == "tp1"] <- bulk_timepoint_map_sl["tp1"]
metadata$description[metadata$timepoint == "tp2"] <- bulk_timepoint_map_sl["tp2"]
metadata$description[metadata$timepoint == "tp3"] <- bulk_timepoint_map_sl["tp3"]
metadata$description[metadata$timepoint == "tp4"] <- bulk_timepoint_map_sl["tp4"]
metadata$description[metadata$timepoint == "tp5"] <- bulk_timepoint_map_sl["tp5"]
metadata$description[metadata$timepoint == "tp6"] <- bulk_timepoint_map_sl["tp6"]
# alternative
metadata$description.alt <- bulk_timepoint_map_mls["tp0"]
metadata$description.alt[metadata$timepoint == "tp1"] <- bulk_timepoint_map_mls["tp1"]
metadata$description.alt[metadata$timepoint == "tp2"] <- bulk_timepoint_map_mls["tp2"]
metadata$description.alt[metadata$timepoint == "tp3"] <- bulk_timepoint_map_mls["tp3"]
metadata$description.alt[metadata$timepoint == "tp4"] <- bulk_timepoint_map_mls["tp4"]
metadata$description.alt[metadata$timepoint == "tp5"] <- bulk_timepoint_map_mls["tp5"]
metadata$description.alt[metadata$timepoint == "tp6"] <- bulk_timepoint_map_mls["tp6"]
# alternative no line break
metadata$description.alt_nlb <- gsub("\n", " ", bulk_timepoint_map_mls["tp0"])
metadata$description.alt_nlb[metadata$timepoint == "tp1"] <- gsub("\n", " ", bulk_timepoint_map_mls["tp1"])
metadata$description.alt_nlb[metadata$timepoint == "tp2"] <- gsub("\n", " ", bulk_timepoint_map_mls["tp2"])
metadata$description.alt_nlb[metadata$timepoint == "tp3"] <- gsub("\n", " ", bulk_timepoint_map_mls["tp3"])
metadata$description.alt_nlb[metadata$timepoint == "tp4"] <- gsub("\n", " ", bulk_timepoint_map_mls["tp4"])
metadata$description.alt_nlb[metadata$timepoint == "tp5"] <- gsub("\n", " ", bulk_timepoint_map_mls["tp5"])
metadata$description.alt_nlb[metadata$timepoint == "tp6"] <- gsub("\n", " ", bulk_timepoint_map_mls["tp6"])
# alternative2
metadata$description.alt2 <- bulk_timepoint_map_mlm["tp0"]
metadata$description.alt2[metadata$timepoint == "tp1"] <- bulk_timepoint_map_mlm["tp1"]
metadata$description.alt2[metadata$timepoint == "tp2"] <- bulk_timepoint_map_mlm["tp2"]
metadata$description.alt2[metadata$timepoint == "tp3"] <- bulk_timepoint_map_mlm["tp3"]
metadata$description.alt2[metadata$timepoint == "tp4"] <- bulk_timepoint_map_mlm["tp4"]
metadata$description.alt2[metadata$timepoint == "tp5"] <- bulk_timepoint_map_mlm["tp5"]
metadata$description.alt2[metadata$timepoint == "tp6"] <- bulk_timepoint_map_mlm["tp6"]
# Pre-diff
metadata$timeline <- "other"
metadata$timeline[metadata$timepoint == "tp0"] <- "Pre-diff"batch correction by donor using ComBat-seq
# 'ComBat-seq()' requires integers
forceMatrixToInteger <- function(m){
apply (m, c (1, 2), function (x) {
(as.integer(x))
})
}
all_counts.integer <- forceMatrixToInteger(as.matrix(all_counts))
row.names(all_counts.integer) <- row.names(all_counts)
# actual batch correction
all_counts.CBSeq_byD <- ComBat_seq(all_counts.integer, batch=metadata$donor, group=NULL)Found 6 batches
Using null model in ComBat-seq.
Adjusting for 0 covariate(s) or covariate level(s)
Estimating dispersions
Fitting the GLM model
Shrinkage off - using GLM estimates for parameters
Adjusting the data
row.names(all_counts.CBSeq_byD) <- row.names(all_counts)
# calculate cpm values on the complete data set first
all_counts.bc.cpm <- cpm(all_counts.CBSeq_byD, log=FALSE)set up DGE
only one design matrix for Figures: 1f, ED3a, ED3d, ED3e, ED5a
groups_all <- metadata$timepoint_B_H
donor <- metadata$donor
# generate DGEList object (we work with the raw counts)
counts.all.asDGEList <- DGEList(counts=all_counts,
genes=rownames(all_counts),
group = groups_all )
counts.all.asDGEListAn object of class "DGEList"
$counts
B032_tp0_NB_NH B044_tp0_NB_NH B050_tp0_NB_NH B066_tp0_NB_NH
TSPAN6 2330.149 1669.757 2323.153 2178.668
TNMD 0.000 0.000 0.000 0.000
DPM1 1057.640 947.221 1898.128 1782.786
SCYL3 454.987 408.387 516.702 585.629
C1orf112 297.605 257.085 652.944 540.747
B078_tp0_NB_NH B080_tp0_NB_NH B032_tp1_NB_H B032_tp1_NB_NH
TSPAN6 2701.845 1941.214 2974.309 3363.321
TNMD 0.000 0.000 1.000 1.000
DPM1 1555.317 1670.424 1355.103 1017.533
SCYL3 491.001 505.327 569.013 635.134
C1orf112 562.380 776.523 207.487 238.741
B044_tp1_NB_H B044_tp1_NB_NH B050_tp1_NB_H B050_tp1_NB_NH
TSPAN6 1785.138 2988.257 3364.456 2172.189
TNMD 0.000 0.000 0.000 0.000
DPM1 833.345 1167.385 1371.059 964.385
SCYL3 450.662 639.321 615.183 523.009
C1orf112 106.805 242.218 183.012 201.001
B066_tp1_NB_H B066_tp1_NB_NH B078_tp1_NB_H B078_tp1_NB_NH
TSPAN6 1620.574 1486.001 1953.345 1571.570
TNMD 0.000 0.000 0.000 0.000
DPM1 898.820 789.880 857.830 1010.559
SCYL3 393.422 510.000 368.000 501.726
C1orf112 88.999 113.001 98.000 156.078
B080_tp1_NB_H B080_tp1_NB_NH B032_tp2_NB_H B032_tp2_NB_NH
TSPAN6 2214.665 1988.146 2545.154 4336.623
TNMD 0.000 0.000 0.000 1.004
DPM1 825.908 895.224 842.792 1120.162
SCYL3 546.883 441.936 500.231 824.888
C1orf112 96.618 146.963 257.938 292.632
B044_tp2_NB_H B044_tp2_NB_NH B050_tp2_NB_H B050_tp2_NB_NH
TSPAN6 1876.503 2702.298 2934.437 2295.110
TNMD 0.000 0.000 2.000 0.000
DPM1 723.453 974.237 1405.790 1127.157
SCYL3 431.433 688.475 698.526 563.000
C1orf112 114.247 252.666 261.026 359.000
B066_tp2_NB_H B066_tp2_NB_NH B078_tp2_NB_H B078_tp2_NB_NH
TSPAN6 1651.000 1907.695 1959.843 1928.319
TNMD 0.000 0.000 0.000 0.000
DPM1 734.001 1137.242 921.103 1024.922
SCYL3 380.062 625.350 500.304 480.033
C1orf112 154.001 266.930 166.999 276.000
B080_tp2_NB_H B080_tp2_NB_NH B032_tp3_NB_H B032_tp3_NB_NH
TSPAN6 1279.781 832.087 1907.078 3079.175
TNMD 0.000 0.000 0.000 0.000
DPM1 511.003 512.105 491.394 813.765
SCYL3 452.715 295.261 525.007 576.113
C1orf112 118.177 150.360 274.909 202.922
B044_tp3_NB_H B044_tp3_NB_NH B050_tp3_NB_H B050_tp3_NB_NH
TSPAN6 2022.268 2281.337 2705.818 2026.441
TNMD 0.000 0.000 0.000 1.000
DPM1 769.528 785.079 1145.941 1006.223
SCYL3 460.795 472.769 525.977 445.299
C1orf112 128.736 204.576 223.257 228.026
B066_tp3_NB_H B066_tp3_NB_NH B078_tp3_NB_H B078_tp3_NB_NH
TSPAN6 1418.719 1253.236 2006.202 1781.443
TNMD 0.000 0.000 0.000 0.000
DPM1 679.983 681.048 668.810 923.999
SCYL3 365.000 355.007 395.009 452.134
C1orf112 170.000 129.000 104.000 247.001
B080_tp3_NB_H B080_tp3_NB_NH B032_tp4_B_H B032_tp4_B_NH B032_tp4_NB_H
TSPAN6 1139.296 1665.859 1712.537 2589.469 1511.922
TNMD 0.000 0.000 1.000 0.000 0.000
DPM1 501.126 816.451 1211.173 1399.578 365.061
SCYL3 397.463 403.690 365.607 448.356 335.678
C1orf112 206.566 317.140 514.024 570.754 202.999
B032_tp4_NB_NH B044_tp4_B_H B044_tp4_B_NH B044_tp4_NB_H B044_tp4_NB_NH
TSPAN6 2832.630 1781.079 1626.481 2193.526 2103.678
TNMD 0.000 0.000 0.000 0.000 0.000
DPM1 523.200 1574.285 1588.111 812.236 909.040
SCYL3 497.140 454.168 319.324 439.047 431.911
C1orf112 156.971 400.628 451.690 163.855 210.561
B050_tp4_B_H B050_tp4_B_NH B050_tp4_NB_H B050_tp4_NB_NH B066_tp4_B_H
TSPAN6 2340.607 2207.438 2758.369 2588.953 2111.229
TNMD 0.000 0.000 0.000 0.000 0.000
DPM1 1995.332 1854.014 1160.185 1193.282 2159.178
SCYL3 654.503 567.400 600.064 515.000 620.316
C1orf112 630.985 540.000 213.063 233.002 605.000
B066_tp4_B_NH B066_tp4_NB_H B066_tp4_NB_NH B078_tp4_B_H B078_tp4_B_NH
TSPAN6 1712.219 1231.137 1238.061 2398.771 1561.133
TNMD 0.000 0.000 0.000 0.000 0.000
DPM1 1558.188 712.001 710.351 2079.461 1747.254
SCYL3 538.044 409.001 456.117 669.248 501.522
C1orf112 617.598 214.000 161.956 517.216 522.585
B078_tp4_NB_H B078_tp4_NB_NH B080_tp4_B_H B080_tp4_B_NH B080_tp4_NB_H
TSPAN6 1941.175 1498.209 1337.427 1965.989 1416.113
TNMD 0.000 0.000 0.000 0.000 0.000
DPM1 685.101 770.595 1122.677 1605.993 528.655
SCYL3 404.001 355.999 447.412 528.414 397.029
C1orf112 151.259 211.064 516.636 601.728 214.142
B080_tp4_NB_NH B032_tp5_B_H B032_tp5_B_NH B032_tp5_NB_H B032_tp5_NB_NH
TSPAN6 1045.357 2293.025 2703.466 774.906 3826.274
TNMD 0.000 1.000 1.000 0.000 0.000
DPM1 524.020 1625.211 1306.424 250.923 814.101
SCYL3 347.922 309.703 377.699 194.260 619.203
C1orf112 132.906 683.186 501.659 114.270 281.023
B044_tp5_B_H B044_tp5_B_NH B044_tp5_NB_H B044_tp5_NB_NH B050_tp5_B_H
TSPAN6 2787.210 1744.425 2582.013 2138.224 2951.892
TNMD 0.000 0.000 2.000 0.000 0.000
DPM1 1705.423 1222.215 927.060 860.911 2157.301
SCYL3 480.506 312.133 383.643 394.666 563.000
C1orf112 506.535 476.100 176.082 231.507 656.351
B050_tp5_B_NH B050_tp5_NB_H B050_tp5_NB_NH B066_tp5_B_H B066_tp5_B_NH
TSPAN6 3027.696 2856.884 2026.269 1438.731 1847.317
TNMD 0.000 0.000 0.000 0.000 0.000
DPM1 2143.770 1210.814 941.042 1450.999 2061.645
SCYL3 599.000 522.143 413.002 316.000 505.579
C1orf112 724.012 199.000 183.039 517.101 714.007
B066_tp5_NB_H B066_tp5_NB_NH B078_tp5_B_H B078_tp5_B_NH B078_tp5_NB_H
TSPAN6 2196.342 1366.697 3292.663 2734.971 1868.026
TNMD 0.000 0.000 0.000 0.000 0.000
DPM1 1226.282 757.420 1842.883 1907.728 646.581
SCYL3 441.458 348.261 645.144 535.413 296.257
C1orf112 150.001 153.000 560.135 670.537 145.000
B078_tp5_NB_NH B080_tp5_B_H B080_tp5_B_NH B080_tp5_NB_H B080_tp5_NB_NH
TSPAN6 1370.435 1544.203 1861.981 1336.852 1294.003
TNMD 0.000 0.000 0.000 0.000 0.000
DPM1 704.719 1182.420 1456.517 578.657 533.926
SCYL3 299.497 376.380 415.435 317.958 451.563
C1orf112 173.008 419.747 495.404 114.323 126.854
B032_tp6_B_H B032_tp6_B_NH B032_tp6_NB_H B032_tp6_NB_NH B044_tp6_B_H
TSPAN6 2140.944 2302.157 1260.407 2118.888 1967.731
TNMD 1.000 1.000 0.000 1.000 0.000
DPM1 1225.324 733.668 470.140 628.239 1453.737
SCYL3 338.175 375.167 319.911 580.092 420.029
C1orf112 439.444 359.567 159.121 208.633 556.262
B044_tp6_B_NH B044_tp6_NB_H B044_tp6_NB_NH B050_tp6_B_H B050_tp6_B_NH
TSPAN6 1674.798 1746.140 1532.562 3034.905 2505.743
TNMD 1.000 0.000 0.000 3.000 2.000
DPM1 1260.477 766.634 698.540 1884.420 1658.094
SCYL3 365.444 390.249 413.001 542.169 476.142
C1orf112 509.695 150.858 178.230 615.150 615.392
B050_tp6_NB_H B050_tp6_NB_NH B066_tp6_B_H B066_tp6_B_NH B066_tp6_NB_H
TSPAN6 3434.312 2182.102 1661.101 1887.268 1563.185
TNMD 0.000 1.000 0.000 0.000 0.000
DPM1 1124.856 944.508 2206.904 2293.305 652.809
SCYL3 405.188 354.011 435.413 466.060 261.000
C1orf112 204.124 235.001 852.744 942.025 165.000
B066_tp6_NB_NH B078_tp6_B_H B078_tp6_B_NH B078_tp6_NB_H B078_tp6_NB_NH
TSPAN6 1458.798 2651.344 2745.033 2378.005 1124.000
TNMD 0.000 0.000 3.000 0.000 0.000
DPM1 648.289 1550.912 1715.664 963.277 633.126
SCYL3 316.150 535.177 499.204 331.003 267.000
C1orf112 156.063 605.840 648.215 185.073 148.000
B080_tp6_B_H B080_tp6_B_NH B080_tp6_NB_H B080_tp6_NB_NH
TSPAN6 1388.112 2388.716 1195.576 1464.287
TNMD 0.000 0.000 0.000 0.000
DPM1 1183.336 2158.858 382.530 555.890
SCYL3 372.075 669.658 281.414 549.054
C1orf112 488.236 939.782 103.227 215.637
60225 more rows ...
$samples
group lib.size norm.factors
B032_tp0_NB_NH tp0_NB_NH 30144167 1
B044_tp0_NB_NH tp0_NB_NH 28732415 1
B050_tp0_NB_NH tp0_NB_NH 37521705 1
B066_tp0_NB_NH tp0_NB_NH 37201395 1
B078_tp0_NB_NH tp0_NB_NH 32579189 1
109 more rows ...
$genes
genes
TSPAN6 TSPAN6
TNMD TNMD
DPM1 DPM1
SCYL3 SCYL3
C1orf112 C1orf112
60225 more rows ...
# filter lowly expressed genes
# we have 6 replicas (aka donors)
mincpm <- 1
minsamples <- 6
counts.all.asDGEList <- counts.all.asDGEList[ rowSums(all_counts.cpm > mincpm) >= minsamples, , keep.lib.size=FALSE]
dim(counts.all.asDGEList)[1] 17438 114
# normalization
counts.all.asDGEList <- calcNormFactors(counts.all.asDGEList)
head(counts.all.asDGEList$samples) group lib.size norm.factors
B032_tp0_NB_NH tp0_NB_NH 30113869 1.122469
B044_tp0_NB_NH tp0_NB_NH 28701049 1.090766
B050_tp0_NB_NH tp0_NB_NH 37487039 1.180351
B066_tp0_NB_NH tp0_NB_NH 37168756 1.135219
B078_tp0_NB_NH tp0_NB_NH 32548843 1.170481
B080_tp0_NB_NH tp0_NB_NH 41483708 1.154863
plotMDS(counts.all.asDGEList)set up design matrix
(B032_tp0_NB_NH is used as intercept)
# use model.matrix
design_all <- model.matrix(~donor +group, data = counts.all.asDGEList$samples)
colnames(design_all)[2:24] <- substring(colnames(design_all)[2:24],6,15)
design_all (Intercept) B044 B050 B066 B078 B080 tp1_NB_H tp1_NB_NH tp2_NB_H
B032_tp0_NB_NH 1 0 0 0 0 0 0 0 0
B044_tp0_NB_NH 1 1 0 0 0 0 0 0 0
B050_tp0_NB_NH 1 0 1 0 0 0 0 0 0
B066_tp0_NB_NH 1 0 0 1 0 0 0 0 0
B078_tp0_NB_NH 1 0 0 0 1 0 0 0 0
B080_tp0_NB_NH 1 0 0 0 0 1 0 0 0
B032_tp1_NB_H 1 0 0 0 0 0 1 0 0
B032_tp1_NB_NH 1 0 0 0 0 0 0 1 0
B044_tp1_NB_H 1 1 0 0 0 0 1 0 0
B044_tp1_NB_NH 1 1 0 0 0 0 0 1 0
B050_tp1_NB_H 1 0 1 0 0 0 1 0 0
B050_tp1_NB_NH 1 0 1 0 0 0 0 1 0
B066_tp1_NB_H 1 0 0 1 0 0 1 0 0
B066_tp1_NB_NH 1 0 0 1 0 0 0 1 0
B078_tp1_NB_H 1 0 0 0 1 0 1 0 0
B078_tp1_NB_NH 1 0 0 0 1 0 0 1 0
B080_tp1_NB_H 1 0 0 0 0 1 1 0 0
B080_tp1_NB_NH 1 0 0 0 0 1 0 1 0
B032_tp2_NB_H 1 0 0 0 0 0 0 0 1
B032_tp2_NB_NH 1 0 0 0 0 0 0 0 0
B044_tp2_NB_H 1 1 0 0 0 0 0 0 1
B044_tp2_NB_NH 1 1 0 0 0 0 0 0 0
B050_tp2_NB_H 1 0 1 0 0 0 0 0 1
B050_tp2_NB_NH 1 0 1 0 0 0 0 0 0
B066_tp2_NB_H 1 0 0 1 0 0 0 0 1
B066_tp2_NB_NH 1 0 0 1 0 0 0 0 0
B078_tp2_NB_H 1 0 0 0 1 0 0 0 1
B078_tp2_NB_NH 1 0 0 0 1 0 0 0 0
B080_tp2_NB_H 1 0 0 0 0 1 0 0 1
B080_tp2_NB_NH 1 0 0 0 0 1 0 0 0
B032_tp3_NB_H 1 0 0 0 0 0 0 0 0
B032_tp3_NB_NH 1 0 0 0 0 0 0 0 0
B044_tp3_NB_H 1 1 0 0 0 0 0 0 0
B044_tp3_NB_NH 1 1 0 0 0 0 0 0 0
B050_tp3_NB_H 1 0 1 0 0 0 0 0 0
B050_tp3_NB_NH 1 0 1 0 0 0 0 0 0
B066_tp3_NB_H 1 0 0 1 0 0 0 0 0
B066_tp3_NB_NH 1 0 0 1 0 0 0 0 0
B078_tp3_NB_H 1 0 0 0 1 0 0 0 0
B078_tp3_NB_NH 1 0 0 0 1 0 0 0 0
B080_tp3_NB_H 1 0 0 0 0 1 0 0 0
B080_tp3_NB_NH 1 0 0 0 0 1 0 0 0
B032_tp4_B_H 1 0 0 0 0 0 0 0 0
B032_tp4_B_NH 1 0 0 0 0 0 0 0 0
B032_tp4_NB_H 1 0 0 0 0 0 0 0 0
B032_tp4_NB_NH 1 0 0 0 0 0 0 0 0
B044_tp4_B_H 1 1 0 0 0 0 0 0 0
B044_tp4_B_NH 1 1 0 0 0 0 0 0 0
B044_tp4_NB_H 1 1 0 0 0 0 0 0 0
B044_tp4_NB_NH 1 1 0 0 0 0 0 0 0
B050_tp4_B_H 1 0 1 0 0 0 0 0 0
B050_tp4_B_NH 1 0 1 0 0 0 0 0 0
B050_tp4_NB_H 1 0 1 0 0 0 0 0 0
B050_tp4_NB_NH 1 0 1 0 0 0 0 0 0
B066_tp4_B_H 1 0 0 1 0 0 0 0 0
B066_tp4_B_NH 1 0 0 1 0 0 0 0 0
B066_tp4_NB_H 1 0 0 1 0 0 0 0 0
B066_tp4_NB_NH 1 0 0 1 0 0 0 0 0
B078_tp4_B_H 1 0 0 0 1 0 0 0 0
B078_tp4_B_NH 1 0 0 0 1 0 0 0 0
B078_tp4_NB_H 1 0 0 0 1 0 0 0 0
B078_tp4_NB_NH 1 0 0 0 1 0 0 0 0
B080_tp4_B_H 1 0 0 0 0 1 0 0 0
B080_tp4_B_NH 1 0 0 0 0 1 0 0 0
B080_tp4_NB_H 1 0 0 0 0 1 0 0 0
B080_tp4_NB_NH 1 0 0 0 0 1 0 0 0
B032_tp5_B_H 1 0 0 0 0 0 0 0 0
B032_tp5_B_NH 1 0 0 0 0 0 0 0 0
B032_tp5_NB_H 1 0 0 0 0 0 0 0 0
B032_tp5_NB_NH 1 0 0 0 0 0 0 0 0
B044_tp5_B_H 1 1 0 0 0 0 0 0 0
B044_tp5_B_NH 1 1 0 0 0 0 0 0 0
B044_tp5_NB_H 1 1 0 0 0 0 0 0 0
B044_tp5_NB_NH 1 1 0 0 0 0 0 0 0
B050_tp5_B_H 1 0 1 0 0 0 0 0 0
B050_tp5_B_NH 1 0 1 0 0 0 0 0 0
B050_tp5_NB_H 1 0 1 0 0 0 0 0 0
B050_tp5_NB_NH 1 0 1 0 0 0 0 0 0
B066_tp5_B_H 1 0 0 1 0 0 0 0 0
B066_tp5_B_NH 1 0 0 1 0 0 0 0 0
B066_tp5_NB_H 1 0 0 1 0 0 0 0 0
B066_tp5_NB_NH 1 0 0 1 0 0 0 0 0
B078_tp5_B_H 1 0 0 0 1 0 0 0 0
B078_tp5_B_NH 1 0 0 0 1 0 0 0 0
B078_tp5_NB_H 1 0 0 0 1 0 0 0 0
B078_tp5_NB_NH 1 0 0 0 1 0 0 0 0
B080_tp5_B_H 1 0 0 0 0 1 0 0 0
B080_tp5_B_NH 1 0 0 0 0 1 0 0 0
B080_tp5_NB_H 1 0 0 0 0 1 0 0 0
B080_tp5_NB_NH 1 0 0 0 0 1 0 0 0
B032_tp6_B_H 1 0 0 0 0 0 0 0 0
B032_tp6_B_NH 1 0 0 0 0 0 0 0 0
B032_tp6_NB_H 1 0 0 0 0 0 0 0 0
B032_tp6_NB_NH 1 0 0 0 0 0 0 0 0
B044_tp6_B_H 1 1 0 0 0 0 0 0 0
B044_tp6_B_NH 1 1 0 0 0 0 0 0 0
B044_tp6_NB_H 1 1 0 0 0 0 0 0 0
B044_tp6_NB_NH 1 1 0 0 0 0 0 0 0
B050_tp6_B_H 1 0 1 0 0 0 0 0 0
B050_tp6_B_NH 1 0 1 0 0 0 0 0 0
B050_tp6_NB_H 1 0 1 0 0 0 0 0 0
B050_tp6_NB_NH 1 0 1 0 0 0 0 0 0
B066_tp6_B_H 1 0 0 1 0 0 0 0 0
B066_tp6_B_NH 1 0 0 1 0 0 0 0 0
B066_tp6_NB_H 1 0 0 1 0 0 0 0 0
B066_tp6_NB_NH 1 0 0 1 0 0 0 0 0
B078_tp6_B_H 1 0 0 0 1 0 0 0 0
B078_tp6_B_NH 1 0 0 0 1 0 0 0 0
B078_tp6_NB_H 1 0 0 0 1 0 0 0 0
B078_tp6_NB_NH 1 0 0 0 1 0 0 0 0
B080_tp6_B_H 1 0 0 0 0 1 0 0 0
B080_tp6_B_NH 1 0 0 0 0 1 0 0 0
B080_tp6_NB_H 1 0 0 0 0 1 0 0 0
B080_tp6_NB_NH 1 0 0 0 0 1 0 0 0
tp2_NB_NH tp3_NB_H tp3_NB_NH tp4_B_H tp4_B_NH tp4_NB_H tp4_NB_NH
B032_tp0_NB_NH 0 0 0 0 0 0 0
B044_tp0_NB_NH 0 0 0 0 0 0 0
B050_tp0_NB_NH 0 0 0 0 0 0 0
B066_tp0_NB_NH 0 0 0 0 0 0 0
B078_tp0_NB_NH 0 0 0 0 0 0 0
B080_tp0_NB_NH 0 0 0 0 0 0 0
B032_tp1_NB_H 0 0 0 0 0 0 0
B032_tp1_NB_NH 0 0 0 0 0 0 0
B044_tp1_NB_H 0 0 0 0 0 0 0
B044_tp1_NB_NH 0 0 0 0 0 0 0
B050_tp1_NB_H 0 0 0 0 0 0 0
B050_tp1_NB_NH 0 0 0 0 0 0 0
B066_tp1_NB_H 0 0 0 0 0 0 0
B066_tp1_NB_NH 0 0 0 0 0 0 0
B078_tp1_NB_H 0 0 0 0 0 0 0
B078_tp1_NB_NH 0 0 0 0 0 0 0
B080_tp1_NB_H 0 0 0 0 0 0 0
B080_tp1_NB_NH 0 0 0 0 0 0 0
B032_tp2_NB_H 0 0 0 0 0 0 0
B032_tp2_NB_NH 1 0 0 0 0 0 0
B044_tp2_NB_H 0 0 0 0 0 0 0
B044_tp2_NB_NH 1 0 0 0 0 0 0
B050_tp2_NB_H 0 0 0 0 0 0 0
B050_tp2_NB_NH 1 0 0 0 0 0 0
B066_tp2_NB_H 0 0 0 0 0 0 0
B066_tp2_NB_NH 1 0 0 0 0 0 0
B078_tp2_NB_H 0 0 0 0 0 0 0
B078_tp2_NB_NH 1 0 0 0 0 0 0
B080_tp2_NB_H 0 0 0 0 0 0 0
B080_tp2_NB_NH 1 0 0 0 0 0 0
B032_tp3_NB_H 0 1 0 0 0 0 0
B032_tp3_NB_NH 0 0 1 0 0 0 0
B044_tp3_NB_H 0 1 0 0 0 0 0
B044_tp3_NB_NH 0 0 1 0 0 0 0
B050_tp3_NB_H 0 1 0 0 0 0 0
B050_tp3_NB_NH 0 0 1 0 0 0 0
B066_tp3_NB_H 0 1 0 0 0 0 0
B066_tp3_NB_NH 0 0 1 0 0 0 0
B078_tp3_NB_H 0 1 0 0 0 0 0
B078_tp3_NB_NH 0 0 1 0 0 0 0
B080_tp3_NB_H 0 1 0 0 0 0 0
B080_tp3_NB_NH 0 0 1 0 0 0 0
B032_tp4_B_H 0 0 0 1 0 0 0
B032_tp4_B_NH 0 0 0 0 1 0 0
B032_tp4_NB_H 0 0 0 0 0 1 0
B032_tp4_NB_NH 0 0 0 0 0 0 1
B044_tp4_B_H 0 0 0 1 0 0 0
B044_tp4_B_NH 0 0 0 0 1 0 0
B044_tp4_NB_H 0 0 0 0 0 1 0
B044_tp4_NB_NH 0 0 0 0 0 0 1
B050_tp4_B_H 0 0 0 1 0 0 0
B050_tp4_B_NH 0 0 0 0 1 0 0
B050_tp4_NB_H 0 0 0 0 0 1 0
B050_tp4_NB_NH 0 0 0 0 0 0 1
B066_tp4_B_H 0 0 0 1 0 0 0
B066_tp4_B_NH 0 0 0 0 1 0 0
B066_tp4_NB_H 0 0 0 0 0 1 0
B066_tp4_NB_NH 0 0 0 0 0 0 1
B078_tp4_B_H 0 0 0 1 0 0 0
B078_tp4_B_NH 0 0 0 0 1 0 0
B078_tp4_NB_H 0 0 0 0 0 1 0
B078_tp4_NB_NH 0 0 0 0 0 0 1
B080_tp4_B_H 0 0 0 1 0 0 0
B080_tp4_B_NH 0 0 0 0 1 0 0
B080_tp4_NB_H 0 0 0 0 0 1 0
B080_tp4_NB_NH 0 0 0 0 0 0 1
B032_tp5_B_H 0 0 0 0 0 0 0
B032_tp5_B_NH 0 0 0 0 0 0 0
B032_tp5_NB_H 0 0 0 0 0 0 0
B032_tp5_NB_NH 0 0 0 0 0 0 0
B044_tp5_B_H 0 0 0 0 0 0 0
B044_tp5_B_NH 0 0 0 0 0 0 0
B044_tp5_NB_H 0 0 0 0 0 0 0
B044_tp5_NB_NH 0 0 0 0 0 0 0
B050_tp5_B_H 0 0 0 0 0 0 0
B050_tp5_B_NH 0 0 0 0 0 0 0
B050_tp5_NB_H 0 0 0 0 0 0 0
B050_tp5_NB_NH 0 0 0 0 0 0 0
B066_tp5_B_H 0 0 0 0 0 0 0
B066_tp5_B_NH 0 0 0 0 0 0 0
B066_tp5_NB_H 0 0 0 0 0 0 0
B066_tp5_NB_NH 0 0 0 0 0 0 0
B078_tp5_B_H 0 0 0 0 0 0 0
B078_tp5_B_NH 0 0 0 0 0 0 0
B078_tp5_NB_H 0 0 0 0 0 0 0
B078_tp5_NB_NH 0 0 0 0 0 0 0
B080_tp5_B_H 0 0 0 0 0 0 0
B080_tp5_B_NH 0 0 0 0 0 0 0
B080_tp5_NB_H 0 0 0 0 0 0 0
B080_tp5_NB_NH 0 0 0 0 0 0 0
B032_tp6_B_H 0 0 0 0 0 0 0
B032_tp6_B_NH 0 0 0 0 0 0 0
B032_tp6_NB_H 0 0 0 0 0 0 0
B032_tp6_NB_NH 0 0 0 0 0 0 0
B044_tp6_B_H 0 0 0 0 0 0 0
B044_tp6_B_NH 0 0 0 0 0 0 0
B044_tp6_NB_H 0 0 0 0 0 0 0
B044_tp6_NB_NH 0 0 0 0 0 0 0
B050_tp6_B_H 0 0 0 0 0 0 0
B050_tp6_B_NH 0 0 0 0 0 0 0
B050_tp6_NB_H 0 0 0 0 0 0 0
B050_tp6_NB_NH 0 0 0 0 0 0 0
B066_tp6_B_H 0 0 0 0 0 0 0
B066_tp6_B_NH 0 0 0 0 0 0 0
B066_tp6_NB_H 0 0 0 0 0 0 0
B066_tp6_NB_NH 0 0 0 0 0 0 0
B078_tp6_B_H 0 0 0 0 0 0 0
B078_tp6_B_NH 0 0 0 0 0 0 0
B078_tp6_NB_H 0 0 0 0 0 0 0
B078_tp6_NB_NH 0 0 0 0 0 0 0
B080_tp6_B_H 0 0 0 0 0 0 0
B080_tp6_B_NH 0 0 0 0 0 0 0
B080_tp6_NB_H 0 0 0 0 0 0 0
B080_tp6_NB_NH 0 0 0 0 0 0 0
tp5_B_H tp5_B_NH tp5_NB_H tp5_NB_NH tp6_B_H tp6_B_NH tp6_NB_H
B032_tp0_NB_NH 0 0 0 0 0 0 0
B044_tp0_NB_NH 0 0 0 0 0 0 0
B050_tp0_NB_NH 0 0 0 0 0 0 0
B066_tp0_NB_NH 0 0 0 0 0 0 0
B078_tp0_NB_NH 0 0 0 0 0 0 0
B080_tp0_NB_NH 0 0 0 0 0 0 0
B032_tp1_NB_H 0 0 0 0 0 0 0
B032_tp1_NB_NH 0 0 0 0 0 0 0
B044_tp1_NB_H 0 0 0 0 0 0 0
B044_tp1_NB_NH 0 0 0 0 0 0 0
B050_tp1_NB_H 0 0 0 0 0 0 0
B050_tp1_NB_NH 0 0 0 0 0 0 0
B066_tp1_NB_H 0 0 0 0 0 0 0
B066_tp1_NB_NH 0 0 0 0 0 0 0
B078_tp1_NB_H 0 0 0 0 0 0 0
B078_tp1_NB_NH 0 0 0 0 0 0 0
B080_tp1_NB_H 0 0 0 0 0 0 0
B080_tp1_NB_NH 0 0 0 0 0 0 0
B032_tp2_NB_H 0 0 0 0 0 0 0
B032_tp2_NB_NH 0 0 0 0 0 0 0
B044_tp2_NB_H 0 0 0 0 0 0 0
B044_tp2_NB_NH 0 0 0 0 0 0 0
B050_tp2_NB_H 0 0 0 0 0 0 0
B050_tp2_NB_NH 0 0 0 0 0 0 0
B066_tp2_NB_H 0 0 0 0 0 0 0
B066_tp2_NB_NH 0 0 0 0 0 0 0
B078_tp2_NB_H 0 0 0 0 0 0 0
B078_tp2_NB_NH 0 0 0 0 0 0 0
B080_tp2_NB_H 0 0 0 0 0 0 0
B080_tp2_NB_NH 0 0 0 0 0 0 0
B032_tp3_NB_H 0 0 0 0 0 0 0
B032_tp3_NB_NH 0 0 0 0 0 0 0
B044_tp3_NB_H 0 0 0 0 0 0 0
B044_tp3_NB_NH 0 0 0 0 0 0 0
B050_tp3_NB_H 0 0 0 0 0 0 0
B050_tp3_NB_NH 0 0 0 0 0 0 0
B066_tp3_NB_H 0 0 0 0 0 0 0
B066_tp3_NB_NH 0 0 0 0 0 0 0
B078_tp3_NB_H 0 0 0 0 0 0 0
B078_tp3_NB_NH 0 0 0 0 0 0 0
B080_tp3_NB_H 0 0 0 0 0 0 0
B080_tp3_NB_NH 0 0 0 0 0 0 0
B032_tp4_B_H 0 0 0 0 0 0 0
B032_tp4_B_NH 0 0 0 0 0 0 0
B032_tp4_NB_H 0 0 0 0 0 0 0
B032_tp4_NB_NH 0 0 0 0 0 0 0
B044_tp4_B_H 0 0 0 0 0 0 0
B044_tp4_B_NH 0 0 0 0 0 0 0
B044_tp4_NB_H 0 0 0 0 0 0 0
B044_tp4_NB_NH 0 0 0 0 0 0 0
B050_tp4_B_H 0 0 0 0 0 0 0
B050_tp4_B_NH 0 0 0 0 0 0 0
B050_tp4_NB_H 0 0 0 0 0 0 0
B050_tp4_NB_NH 0 0 0 0 0 0 0
B066_tp4_B_H 0 0 0 0 0 0 0
B066_tp4_B_NH 0 0 0 0 0 0 0
B066_tp4_NB_H 0 0 0 0 0 0 0
B066_tp4_NB_NH 0 0 0 0 0 0 0
B078_tp4_B_H 0 0 0 0 0 0 0
B078_tp4_B_NH 0 0 0 0 0 0 0
B078_tp4_NB_H 0 0 0 0 0 0 0
B078_tp4_NB_NH 0 0 0 0 0 0 0
B080_tp4_B_H 0 0 0 0 0 0 0
B080_tp4_B_NH 0 0 0 0 0 0 0
B080_tp4_NB_H 0 0 0 0 0 0 0
B080_tp4_NB_NH 0 0 0 0 0 0 0
B032_tp5_B_H 1 0 0 0 0 0 0
B032_tp5_B_NH 0 1 0 0 0 0 0
B032_tp5_NB_H 0 0 1 0 0 0 0
B032_tp5_NB_NH 0 0 0 1 0 0 0
B044_tp5_B_H 1 0 0 0 0 0 0
B044_tp5_B_NH 0 1 0 0 0 0 0
B044_tp5_NB_H 0 0 1 0 0 0 0
B044_tp5_NB_NH 0 0 0 1 0 0 0
B050_tp5_B_H 1 0 0 0 0 0 0
B050_tp5_B_NH 0 1 0 0 0 0 0
B050_tp5_NB_H 0 0 1 0 0 0 0
B050_tp5_NB_NH 0 0 0 1 0 0 0
B066_tp5_B_H 1 0 0 0 0 0 0
B066_tp5_B_NH 0 1 0 0 0 0 0
B066_tp5_NB_H 0 0 1 0 0 0 0
B066_tp5_NB_NH 0 0 0 1 0 0 0
B078_tp5_B_H 1 0 0 0 0 0 0
B078_tp5_B_NH 0 1 0 0 0 0 0
B078_tp5_NB_H 0 0 1 0 0 0 0
B078_tp5_NB_NH 0 0 0 1 0 0 0
B080_tp5_B_H 1 0 0 0 0 0 0
B080_tp5_B_NH 0 1 0 0 0 0 0
B080_tp5_NB_H 0 0 1 0 0 0 0
B080_tp5_NB_NH 0 0 0 1 0 0 0
B032_tp6_B_H 0 0 0 0 1 0 0
B032_tp6_B_NH 0 0 0 0 0 1 0
B032_tp6_NB_H 0 0 0 0 0 0 1
B032_tp6_NB_NH 0 0 0 0 0 0 0
B044_tp6_B_H 0 0 0 0 1 0 0
B044_tp6_B_NH 0 0 0 0 0 1 0
B044_tp6_NB_H 0 0 0 0 0 0 1
B044_tp6_NB_NH 0 0 0 0 0 0 0
B050_tp6_B_H 0 0 0 0 1 0 0
B050_tp6_B_NH 0 0 0 0 0 1 0
B050_tp6_NB_H 0 0 0 0 0 0 1
B050_tp6_NB_NH 0 0 0 0 0 0 0
B066_tp6_B_H 0 0 0 0 1 0 0
B066_tp6_B_NH 0 0 0 0 0 1 0
B066_tp6_NB_H 0 0 0 0 0 0 1
B066_tp6_NB_NH 0 0 0 0 0 0 0
B078_tp6_B_H 0 0 0 0 1 0 0
B078_tp6_B_NH 0 0 0 0 0 1 0
B078_tp6_NB_H 0 0 0 0 0 0 1
B078_tp6_NB_NH 0 0 0 0 0 0 0
B080_tp6_B_H 0 0 0 0 1 0 0
B080_tp6_B_NH 0 0 0 0 0 1 0
B080_tp6_NB_H 0 0 0 0 0 0 1
B080_tp6_NB_NH 0 0 0 0 0 0 0
tp6_NB_NH
B032_tp0_NB_NH 0
B044_tp0_NB_NH 0
B050_tp0_NB_NH 0
B066_tp0_NB_NH 0
B078_tp0_NB_NH 0
B080_tp0_NB_NH 0
B032_tp1_NB_H 0
B032_tp1_NB_NH 0
B044_tp1_NB_H 0
B044_tp1_NB_NH 0
B050_tp1_NB_H 0
B050_tp1_NB_NH 0
B066_tp1_NB_H 0
B066_tp1_NB_NH 0
B078_tp1_NB_H 0
B078_tp1_NB_NH 0
B080_tp1_NB_H 0
B080_tp1_NB_NH 0
B032_tp2_NB_H 0
B032_tp2_NB_NH 0
B044_tp2_NB_H 0
B044_tp2_NB_NH 0
B050_tp2_NB_H 0
B050_tp2_NB_NH 0
B066_tp2_NB_H 0
B066_tp2_NB_NH 0
B078_tp2_NB_H 0
B078_tp2_NB_NH 0
B080_tp2_NB_H 0
B080_tp2_NB_NH 0
B032_tp3_NB_H 0
B032_tp3_NB_NH 0
B044_tp3_NB_H 0
B044_tp3_NB_NH 0
B050_tp3_NB_H 0
B050_tp3_NB_NH 0
B066_tp3_NB_H 0
B066_tp3_NB_NH 0
B078_tp3_NB_H 0
B078_tp3_NB_NH 0
B080_tp3_NB_H 0
B080_tp3_NB_NH 0
B032_tp4_B_H 0
B032_tp4_B_NH 0
B032_tp4_NB_H 0
B032_tp4_NB_NH 0
B044_tp4_B_H 0
B044_tp4_B_NH 0
B044_tp4_NB_H 0
B044_tp4_NB_NH 0
B050_tp4_B_H 0
B050_tp4_B_NH 0
B050_tp4_NB_H 0
B050_tp4_NB_NH 0
B066_tp4_B_H 0
B066_tp4_B_NH 0
B066_tp4_NB_H 0
B066_tp4_NB_NH 0
B078_tp4_B_H 0
B078_tp4_B_NH 0
B078_tp4_NB_H 0
B078_tp4_NB_NH 0
B080_tp4_B_H 0
B080_tp4_B_NH 0
B080_tp4_NB_H 0
B080_tp4_NB_NH 0
B032_tp5_B_H 0
B032_tp5_B_NH 0
B032_tp5_NB_H 0
B032_tp5_NB_NH 0
B044_tp5_B_H 0
B044_tp5_B_NH 0
B044_tp5_NB_H 0
B044_tp5_NB_NH 0
B050_tp5_B_H 0
B050_tp5_B_NH 0
B050_tp5_NB_H 0
B050_tp5_NB_NH 0
B066_tp5_B_H 0
B066_tp5_B_NH 0
B066_tp5_NB_H 0
B066_tp5_NB_NH 0
B078_tp5_B_H 0
B078_tp5_B_NH 0
B078_tp5_NB_H 0
B078_tp5_NB_NH 0
B080_tp5_B_H 0
B080_tp5_B_NH 0
B080_tp5_NB_H 0
B080_tp5_NB_NH 0
B032_tp6_B_H 0
B032_tp6_B_NH 0
B032_tp6_NB_H 0
B032_tp6_NB_NH 1
B044_tp6_B_H 0
B044_tp6_B_NH 0
B044_tp6_NB_H 0
B044_tp6_NB_NH 1
B050_tp6_B_H 0
B050_tp6_B_NH 0
B050_tp6_NB_H 0
B050_tp6_NB_NH 1
B066_tp6_B_H 0
B066_tp6_B_NH 0
B066_tp6_NB_H 0
B066_tp6_NB_NH 1
B078_tp6_B_H 0
B078_tp6_B_NH 0
B078_tp6_NB_H 0
B078_tp6_NB_NH 1
B080_tp6_B_H 0
B080_tp6_B_NH 0
B080_tp6_NB_H 0
B080_tp6_NB_NH 1
attr(,"assign")
[1] 0 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
attr(,"contrasts")
attr(,"contrasts")$donor
[1] "contr.treatment"
attr(,"contrasts")$group
[1] "contr.treatment"
dispersion estimation and fitting
# dispersion estimation
counts.all.asDGEList <- estimateDisp(counts.all.asDGEList, design_all)
counts.all.asDGEList$common.dispersion[1] 0.09312949
plotBCV(counts.all.asDGEList)# fitting
fit_all <- glmQLFit(counts.all.asDGEList, design_all)
plotQLDisp(fit_all)Fig 1 d
PCA of batch corrected samples in the IVMC protocol analysed by bulk RNA sequencing, coloured by timepoint (n=6 independent organoid lines).
using B0xx_tp0_NB_NH, B0xx_tp1_NB_H, B0xx_tp2_NB_H, B0xx_tp3_NB_H, B0xx_tp4_B_H, B0xx_tp5_B_H, B0xx_tp6_B_H => “sel_IVMC”
metadata.sel_IVMC <- mutate(as.data.frame(metadata), sel_IVMC = case_when(
timepoint == "tp0" ~ TRUE,
timepoint == "tp1" & hormones == "H" ~ TRUE,
timepoint == "tp2" & hormones == "H" ~ TRUE,
timepoint == "tp3" & hormones == "H" ~ TRUE,
timepoint == "tp4" & hormones == "H" & breakdown == "B" ~ TRUE,
timepoint == "tp5" & hormones == "H" & breakdown == "B" ~ TRUE,
timepoint == "tp6" & hormones == "H" & breakdown == "B" ~ TRUE,
TRUE ~ FALSE))
sel_IVMC <- metadata$sample[metadata.sel_IVMC$sel_IVMC]
sel_IVMC [1] "B032_tp0_NB_NH" "B044_tp0_NB_NH" "B050_tp0_NB_NH" "B066_tp0_NB_NH"
[5] "B078_tp0_NB_NH" "B080_tp0_NB_NH" "B032_tp1_NB_H" "B044_tp1_NB_H"
[9] "B050_tp1_NB_H" "B066_tp1_NB_H" "B078_tp1_NB_H" "B080_tp1_NB_H"
[13] "B032_tp2_NB_H" "B044_tp2_NB_H" "B050_tp2_NB_H" "B066_tp2_NB_H"
[17] "B078_tp2_NB_H" "B080_tp2_NB_H" "B032_tp3_NB_H" "B044_tp3_NB_H"
[21] "B050_tp3_NB_H" "B066_tp3_NB_H" "B078_tp3_NB_H" "B080_tp3_NB_H"
[25] "B032_tp4_B_H" "B044_tp4_B_H" "B050_tp4_B_H" "B066_tp4_B_H"
[29] "B078_tp4_B_H" "B080_tp4_B_H" "B032_tp5_B_H" "B044_tp5_B_H"
[33] "B050_tp5_B_H" "B066_tp5_B_H" "B078_tp5_B_H" "B080_tp5_B_H"
[37] "B032_tp6_B_H" "B044_tp6_B_H" "B050_tp6_B_H" "B066_tp6_B_H"
[41] "B078_tp6_B_H" "B080_tp6_B_H"
counts.sel_IVMC<- all_counts[,sel_IVMC]
metadata.sel_IVMC <- metadata[metadata.sel_IVMC$sel_IVMC,]
metadata.sel_IVMC$description.alt_nlb <- factor(metadata.sel_IVMC$description.alt_nlb,
levels = unique(metadata.sel_IVMC$description.alt_nlb))
all_counts.CBSeq_byD.sel_IVMC <- all_counts.CBSeq_byD[,sel_IVMC]
all_counts.CBSeq_byD.sel_IVMC.log.cpm.pca <- pca(cpm(all_counts.CBSeq_byD.sel_IVMC, log=TRUE),
metadata = metadata.sel_IVMC, removeVar = 0.1)-- removing the lower 10% of variables based on variance
xlab = paste("PC1, ",round(all_counts.CBSeq_byD.sel_IVMC.log.cpm.pca$variance["PC1"],digits = 2),"% variance",sep = "")
ylab = paste("PC2, ",round(all_counts.CBSeq_byD.sel_IVMC.log.cpm.pca$variance["PC2"],digits = 2),"% variance",sep = "")
plot_object<- biplot(all_counts.CBSeq_byD.sel_IVMC.log.cpm.pca,
colby = 'description.alt_nlb',
colkey = timepoint_colors.alt_nlb,
pointSize = 8,
lab = NULL,
legendPosition = 'right',
legendIconSize = 8,
colLegendTitle = " ",
gridlines.major = FALSE,
gridlines.minor = FALSE,
xlab = xlab,
ylab = ylab,
title = "")Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
plot_objectfile_name <- paste(plot_dir,"/Figure_1_d.pdf", sep = "")
pdf(file_name, width = 8, height = 5)
plot_object
dev.off()png
2
Fig 1 e
Heatmap depicting batch corrected centered and scaled counts per million (cpm) of selected genes across the IVMC protocol
genes <- c("MKI67", "PCNA", "MCM2", "MCM7", "EXO1", "E2F2", "AXIN2", "CDH2", "ALDH1A1", "LGR5","MMP10", "RXRG",
"SCGB1D2", "SCGB1D4", "SCGB2A1", "S100P", "PAEP", "PTGS1", "PTGS2", "ENPP3",
"SERPINE1", "SERPINA5", "SERPINA6", "KRT17", "KRT16", "NKD2", "CNN1", "FOXJ1", "PIFO",
"RSPH1", "TPPP3", "KISS1R", "EGLN3", "HIF3A", "VEGFA", "ALPP", "ALPG", "CHGA",
"TNFRSF11B", "TNFRSF12A", "CSF2", "FGF2", "DKK1","SOX9", "FUT4", "PTGES", "TIGAR", "CD44",
"PLAUR", "PLAU", "PLAT", "KRT6A", "CXCL8", "CXCL10", "CXCL1", "CXCL11", "IL1RN", "IL23A",
"MMP7","OLFM4", "PGR", "ESR1", "GREB1", "COL1A2")
# select the samples (using selection vector from Fig 1 d)
counts.bc.cpm.sel_IVMC <- all_counts.bc.cpm[,sel_IVMC]
# define data to plot
data2plot.bc <- data.frame()
for (gene in genes) {
data2plot.bc <- rbind(data2plot.bc, counts.bc.cpm.sel_IVMC[rownames(counts.bc.cpm.sel_IVMC) == gene,])
}
rownames(data2plot.bc) <- genes
colnames(data2plot.bc) <- colnames(counts.bc.cpm.sel_IVMC)
# center and scale
data2plot.bc.sc <- t(scale(t(data2plot.bc)))
heatmap_selected_genes<- Heatmap(data2plot.bc.sc, cluster_columns=FALSE, cluster_rows=FALSE,
heatmap_legend_param = list(legend_direction = "horizontal",
title = "cpm (centered \nand scaled)",
color_bar = "continuous"),
column_split = c(rep(0, 6),rep(1, 6),rep(2,6),rep(3, 6),rep(4,6),rep(5, 6),rep(6,6)),
column_title = "",
bottom_annotation = HeatmapAnnotation(foo = anno_block(gp = gpar(fill = timepoint_colors,
col = timepoint_colors),
labels = unique(metadata.sel_IVMC$description.alt),
labels_gp = gpar(col = heatmap_labels_colors, fontsize = 8))),
show_column_names = FALSE)
draw(heatmap_selected_genes, heatmap_legend_side="bottom")file_name <- paste(plot_dir,"/Figure_1_e.pdf", sep = "")
pdf(file_name,height = 10,width = 8)
draw(heatmap_selected_genes, heatmap_legend_side="bottom")
dev.off()png
2
Fig 1 f
Volcano plot highlighting selected DEGs of organoids in post-breakdown phase of the IVMC protocol (Post-breakdown 24h) compared to organoids treated with hormones but not subjected to breakdown (NB_H; no breakdown hormones)
compare “tp4_B_H” versus “tp4_NB_H”
define contrast
(colnames(design_all) == "tp4_B_H") - (colnames(design_all) == "tp4_NB_H") [1] 0 0 0 0 0 0 0 0 0 0 0 0 1 0 -1 0 0 0 0 0 0 0 0 0
qlf_1F<- glmQLFTest(fit_all, contrast = as.vector((colnames(design_all) == "tp4_B_H") - (colnames(design_all) == "tp4_NB_H")))
tt_1F <- as.data.frame(topTags(qlf_1F,n=nrow(counts.all.asDGEList)))
head(tt_1F) genes logFC logCPM F PValue FDR
GALNT5 GALNT5 5.532962 1.8389142 286.1138 5.260153e-32 9.172655e-28
YKT6 YKT6 1.902848 6.0728781 273.8387 1.112590e-29 8.660889e-26
FGF2 FGF2 5.634464 0.4405043 230.1738 1.490003e-29 8.660889e-26
SBDS SBDS 2.076409 5.1949412 269.1516 2.044519e-29 8.913082e-26
CSF2 CSF2 7.312718 -1.1908599 203.6735 9.235046e-29 3.220815e-25
NIPA2 NIPA2 2.115973 5.1145905 252.2612 1.954534e-28 5.680528e-25
define cut offs
FDR < 0.05 & logFC > 2.0 ~ up; FDR < 0.05 & logFC < -2.0 ~ down
# Create significant (sig) column
results_1F <- mutate(tt_1F, sig = case_when(
FDR < 0.05 & logFC > 2.0 ~ up,
FDR < 0.05 & logFC < -2.0 ~ down,
TRUE ~ notsig))
sum(results_1F$sig == "Up")[1] 438
sum(results_1F$sig == "Down")[1] 1540
file_name.table <- paste(table_dir,"/Figure_1_f.DEG.tab",sep = "")
write.table(results_1F, file_name.table,
quote = FALSE, row.names = FALSE, sep = "\t")define genes to label
genes_1F_up_label <- c("HS3ST2", "CSF2", "KRT6A", "DKK1", "FGF2", "KRT13", "EDAR", "CXCL6",
"THBS1", "CXCL8", "CEACAM5", "EREG", "ADAMTS9", "FUT4", "AREG", "MMP7",
"IL23A", "ITGA2", "PLAUR", "PCNA")
genes_1F_down_label <- c("TBC1D3K", "TGM5", "LMO1", "SEMA5B", "COL23A1", "PNCK", "ANGPTL4",
"KISS1R", "CA9", "HIF3A", "ALDOC", "INHA", "PTPRR", "CXCR4", "HILPDA",
"OLFM2", "NUPR1", "PAX2", "EGLN3", "VEGFA")
genes_1F_label <- c(genes_1F_up_label,genes_1F_down_label)
# add gene labels
results_1F <- mutate(results_1F, labels = ifelse(genes %in% genes_1F_label, genes, ""))volcano plot
# set up base plot
p <- ggplot(data = results_1F, aes(x = logFC, y = -log10(PValue))) +
geom_point(aes(colour = sig)) +
xlim(-12, +12) +
scale_color_manual(name = "|log2FC| > 2, FDR < 0.05", values = volcano_colors) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
legend.key = element_blank(),
legend.position = "bottom",
# increase text sizes
axis.text = element_text(size = 18),
axis.title = element_text(size = 22),
plot.title = element_text(size = 26, hjust = 0.5),
legend.title = element_text(size = 20),
legend.text = element_text(size = 20)
) +
xlab('log2 FC') +
ylab('-log10 p-value') +
guides(color = guide_legend(override.aes = list(size=5))) +
# add gene labels
geom_text_repel(data = filter(results_1F, labels != ""),
aes(label = labels),
min.segment.length = 0,
max.overlaps = Inf,
size = 7,
show.legend = FALSE) +
ggtitle('Post-breakdown 24h (IVMC vs NB_H)')
print(p)file_name <- paste(plot_dir,"/Figure_1_f.pdf", sep = "")
pdf(file_name,height = 8,width = 10)
p
dev.off()png
2
Fig 1 g
Biological processes enriched from genes upregulated during post-breakdown phase of the IVMC protocol.
# all upregulated genes
genes_1F_up <- results_1F$genes[results_1F$sig == "Up"]
gostres <- gost(query = genes_1F_up, custom_bg = results_1F$genes, organism = "hsapiens", correction_method = "fdr")Detected custom background input, domain scope is set to 'custom'.
head(gostres$result) query significant p_value term_size query_size intersection_size
1 query_1 TRUE 3.956148e-02 2 377 2
2 query_1 TRUE 3.956148e-02 2 377 2
3 query_1 TRUE 4.494566e-06 62 377 13
4 query_1 TRUE 4.494566e-06 40 377 11
5 query_1 TRUE 2.157698e-05 278 377 25
6 query_1 TRUE 2.570997e-05 310 377 26
precision recall term_id source
1 0.00530504 1.00000000 CORUM:7112 CORUM
2 0.00530504 1.00000000 CORUM:7109 CORUM
3 0.03448276 0.20967742 GO:0019730 GO:BP
4 0.02917772 0.27500000 GO:0061844 GO:BP
5 0.06631300 0.08992806 GO:0050900 GO:BP
6 0.06896552 0.08387097 GO:0042330 GO:BP
term_name
1 PLAUR-PLAU complex
2 IGF2R-PLAUR-PLAU complex
3 antimicrobial humoral response
4 antimicrobial humoral immune response mediated by antimicrobial peptide
5 leukocyte migration
6 taxis
effective_domain_size source_order parents
1 15781 2609 CORUM:00....
2 15781 2606 CORUM:00....
3 15781 6021 GO:00069....
4 15781 15685 GO:0019730
5 15781 13342 GO:00023....
6 15781 10096 GO:00096....
unique(gostres$result$source) [1] "CORUM" "GO:BP" "GO:CC" "GO:MF" "HP" "KEGG" "MIRNA" "REAC" "TF"
[10] "WP"
gostres_results.BP <- gostres$result[gostres$result$source == "GO:BP",]
unique(gostres_results.BP$source)[1] "GO:BP"
gostres_results.BP$neg_log10Pvalue <- ( -log10(gostres_results.BP$p_value) )
gostres_results.BP <- gostres_results.BP[order(gostres_results.BP$neg_log10Pvalue, decreasing = TRUE),]
head(gostres_results.BP[,c("term_name","neg_log10Pvalue")]) term_name
3 antimicrobial humoral response
4 antimicrobial humoral immune response mediated by antimicrobial peptide
5 leukocyte migration
6 taxis
7 response to chemokine
8 chemotaxis
neg_log10Pvalue
3 5.347312
4 5.347312
5 4.666009
6 4.589898
7 4.589898
8 4.589898
file_name.table <- paste(table_dir,"/Figure_1_g.tab",sep = "")
write.table(gostres_results.BP[,c("term_name","neg_log10Pvalue")], file_name.table,
quote = FALSE, row.names = FALSE, sep = "\t")
representative_GO_terms <- c("antimicrobial humoral response",
"leukocyte migration",
"response to chemokine",
"chemotaxis",
"response to bacterium",
"locomotion",
"G protein-coupled receptor signaling pathway",
"cellular response to biotic stimulus",
"granulocyte migration",
"response to external stimulus",
"cellular response to lipopolysaccharide",
"humoral immune response",
"cell-cell signaling",
"negative regulation of wound healing",
"skin development",
"fibrinolysis",
"inflammatory response",
"regulation of wound healing",
"response to stress",
"intermediate filament organization",
"response to oxygen-containing compound",
"negative regulation of sprouting angiogenesis",
"regulation of angiogenesis")
data2plot <- gostres_results.BP[gostres_results.BP$term_name %in% representative_GO_terms,]
data2plot$term_name <- factor(data2plot$term_name, levels = rev(data2plot$term_name))
p <- ggplot(data2plot, aes(x=neg_log10Pvalue,y=term_name)) +
geom_point(size = 10, color = "#8B1A1A") +
xlab('-log10 p-value') +
xlim(0, 6) +
ylab('') +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(axis.text.y = element_text(size=22)) +
theme(axis.text.x = element_text(size=18)) +
theme(axis.title.x =element_text(size=18)) +
theme(panel.border = element_blank(), axis.line = element_line())
print(p) file_name <- paste(plot_dir,"/Figure_1_g.pdf", sep = "")
pdf(file_name,height = 8,width = 12)
print(p)
dev.off() png
2
Fig 3 c
Line plots depicting cpm of WNT7A in organoids undergoing the IVMC protocol (solid lines) and organoids not subjected to breakdown (NB_H) (dotted lines), analysed with bulk RNAseq.
using B0xx_tp0_NB_NH, B0xx_tp1_NB_H, B0xx_tp2_NB_H, B0xx_tp3_NB_H, B0xx_tp4_B_H, B0xx_tp5_B_H, B0xx_tp6_B_H this has been “sel_IVMC”
plus B0xx_tp3_NB_H, B0xx_tp4_NB_H, B0xx_tp5_NB_H, B0xx_tp6_NB_H => “sel_IVMC_NB”
# make a new selection (the ‘no passaging’)
metadata.sel_IVMC_NB <- mutate(as.data.frame(metadata), sel_IVMC_NB = case_when(
timepoint == "tp3" & hormones == "H" ~ TRUE,
timepoint == "tp4" & hormones == "H" & breakdown == "NB" ~ TRUE,
timepoint == "tp5" & hormones == "H" & breakdown == "NB" ~ TRUE,
timepoint == "tp6" & hormones == "H" & breakdown == "NB" ~ TRUE,
TRUE ~ FALSE))
sel_IVMC_NB <- metadata$sample[metadata.sel_IVMC_NB$sel_IVMC_NB]
sel_IVMC_NB [1] "B032_tp3_NB_H" "B044_tp3_NB_H" "B050_tp3_NB_H" "B066_tp3_NB_H"
[5] "B078_tp3_NB_H" "B080_tp3_NB_H" "B032_tp4_NB_H" "B044_tp4_NB_H"
[9] "B050_tp4_NB_H" "B066_tp4_NB_H" "B078_tp4_NB_H" "B080_tp4_NB_H"
[13] "B032_tp5_NB_H" "B044_tp5_NB_H" "B050_tp5_NB_H" "B066_tp5_NB_H"
[17] "B078_tp5_NB_H" "B080_tp5_NB_H" "B032_tp6_NB_H" "B044_tp6_NB_H"
[21] "B050_tp6_NB_H" "B066_tp6_NB_H" "B078_tp6_NB_H" "B080_tp6_NB_H"
metadata.sel_IVMC_NB <- metadata[metadata.sel_IVMC_NB$sel_IVMC_NB == TRUE,]
# batch corrected data
counts.bc.cpm.sel_IVMC_NB <- all_counts.bc.cpm[,sel_IVMC_NB]metadata.sel_IVMC$general_timepoint <- factor(metadata.sel_IVMC$description.alt2, levels = (unique(metadata.sel_IVMC$description.alt2)))
metadata.sel_IVMC_NB$general_timepoint <- factor(metadata.sel_IVMC_NB$description.alt2, levels = (unique(metadata.sel_IVMC_NB$description.alt2)))
genes_for_line_plots <- c("WNT7A")
for (gene in genes_for_line_plots ) {
expr.values.sel_IVMC <- counts.bc.cpm.sel_IVMC[gene,]
expr.values.sel_IVMC_NB <- counts.bc.cpm.sel_IVMC_NB[gene,]
# data to plot
df2plot <- data.frame(samples=colnames(counts.bc.cpm.sel_IVMC), cpm=expr.values.sel_IVMC)
df2plot.sel_IVMC_NB <- data.frame(samples=colnames(counts.bc.cpm.sel_IVMC_NB), cpm=expr.values.sel_IVMC_NB)
# combine with metadata
df2plot_withMetadata <- merge(df2plot, metadata.sel_IVMC, by.x = "samples", by.y = "sample", sort = FALSE)
df2plot_withMetadata.sel_IVMC_NB <- merge(df2plot.sel_IVMC_NB, metadata.sel_IVMC_NB, by.x = "samples", by.y = "sample", sort = FALSE)
p <- ggplot(df2plot_withMetadata, aes(y=cpm)) +
geom_line(aes(x=general_timepoint, color = donor, group = donor), linewidth=1.5, data = df2plot_withMetadata) +
geom_line(aes(y=cpm, x=general_timepoint, color = donor, group = donor), linetype = 2, linewidth=1.5, data = df2plot_withMetadata.sel_IVMC_NB)+
scale_color_manual(values = donor_colors) +
ylab(bquote(italic(.(gene))~' cpm')) +
xlab("") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(axis.line = element_line(colour = "black")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 0.95)) +
theme(axis.text.x = element_text(size = 24)) +
theme(axis.text.y = element_text(size = 22)) +
theme(axis.title.y = element_text(size = 24)) +
theme(panel.border = element_blank()) +
theme(legend.text=element_text(size=22)) +
theme(legend.title=element_text(size=22))
print(p)
file_name <- paste(plot_dir,"/Figure_3_c__",gene,".pdf", sep = "")
pdf(file_name, height = 8,width = 10)
print(p)
dev.off()
}Fig 5 d
using B0xx_tp0_NB_NH, B0xx_tp1_NB_H, B0xx_tp2_NB_H, B0xx_tp3_NB_H, B0xx_tp4_B_H, B0xx_tp5_B_H, B0xx_tp6_B_H this has been “sel_IVMC”
Line plots showing the (batch coreccted) mean expression levels (in cpm) of ANXA1 and CXCL8 across different timepoints in the IVMC protocol. The red line represents the mean cpm values across donors, while the shaded blue region indicates the standard deviation around the mean.
genes_for_line_plots <- c("ANXA1","CXCL8")
for (gene in genes_for_line_plots ) {
expr.values.sel_IVMC <- counts.bc.cpm.sel_IVMC[gene,]
# data to plot
df2plot <- data.frame(samples=colnames(counts.bc.cpm.sel_IVMC), cpm=expr.values.sel_IVMC)
# combine with metadata
df2plot_withMetadata <- merge(df2plot, metadata.sel_IVMC, by.x = "samples", by.y = "sample", sort = FALSE)
# aggregate
df2plot_aggregate <- data.frame(general_timepoint = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,1],
cpm_mean = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,2],
cpm_mean_plus_sd = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,2] +
aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=sd)[,2],
cpm_mean_minus_sd = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,2] -
aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=sd)[,2] )
p <- ggplot(df2plot_aggregate, aes(y=cpm_mean, x=general_timepoint, group = 1)) +
geom_line(color = "red") +
geom_point(color = "red", size = 2) +
geom_ribbon(aes(x = general_timepoint, ymax = cpm_mean_plus_sd, ymin = cpm_mean_minus_sd),
alpha = 0.3, fill = "lightblue") +
ggtitle(bquote(italic(.(gene)))) +
ylab("cpm (Mean ± SD across donors)") +
xlab("") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(axis.line = element_line(colour = "black")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 24)) +
theme(axis.text.y = element_text(size = 22)) +
theme(axis.title.y = element_text(size = 24)) +
theme(panel.border = element_blank())
print(p)
file_name <- paste(plot_dir,"/Figure_5_d__",gene,".pdf", sep = "")
pdf(file_name, height = 8,width = 10)
print(p)
dev.off()
}Extended Data Fig 1 b
PCA of all organoid samples from all conditions colored by the breakdown variable.
colnames(metadata)[c(4,5,11)] <- c("Breakdown","Hormones","Timeline")
all_counts.CBSeq_byD.log.cpm.pca <- pca(cpm(all_counts.CBSeq_byD, log=TRUE),
metadata = metadata, removeVar = 0.1)-- removing the lower 10% of variables based on variance
xlab = paste("PC1, ",round(all_counts.CBSeq_byD.log.cpm.pca$variance["PC1"],digits = 2),"% variance",sep = "")
ylab = paste("PC2, ",round(all_counts.CBSeq_byD.log.cpm.pca$variance["PC2"],digits = 2),"% variance",sep = "")
plot_object<- biplot(all_counts.CBSeq_byD.log.cpm.pca,
colby = 'Breakdown',
colkey = breakdown_colors,
shape = 'Timeline',
pointSize = 7,
lab = NULL,
legendPosition = 'bottom',
gridlines.major = FALSE,
gridlines.minor = FALSE,
xlab = xlab,
ylab = ylab,
title = "")Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
plot_objectfile_name <- paste(plot_dir,"/EDFigure_1_b.pdf", sep = "")
pdf(file_name, width = 8, height = 7.5)
plot_object
dev.off()png
2
Extended Data Fig 1 c
PCA of all organoid samples from all conditions colored by the hormonal treatment variable.
plot_object<- biplot(all_counts.CBSeq_byD.log.cpm.pca,
colby = 'Hormones',
colkey = hormones_colors,
shape = 'Timeline',
pointSize = 7,
lab = NULL,
legendPosition = 'bottom',
gridlines.major = FALSE,
gridlines.minor = FALSE,
xlab = xlab,
ylab = ylab,
title = "")Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
plot_objectfile_name <- paste(plot_dir,"/EDFigure_1_c.pdf", sep = "")
pdf(file_name, width = 8, height = 7.5)
plot_object
dev.off()png
2
colnames(metadata)[c(4,5,11)] <- c("breakdown","hormones","timeline")Extended Data Fig 1 d
Hierarchically clustered heatmap displaying the top 50 differentially upregulated genes from pairwise comparisons of each timepoint against all timepoints.
using B0xx_tp0_NB_NH, B0xx_tp1_NB_H, B0xx_tp2_NB_H, B0xx_tp3_NB_H, B0xx_tp4_B_H, B0xx_tp5_B_H, B0xx_tp6_B_H this has been “sel_IVMC”
DGE (specific for each time point) using the raw data
groups <- metadata.sel_IVMC$timepoint_B_H
donor <- metadata.sel_IVMC$donorsetting up DGEList
counts.sel_IVMC.asDGEList <- DGEList(counts=counts.sel_IVMC,
genes=rownames(counts.sel_IVMC),
group = groups )
counts.sel_IVMC.asDGEListAn object of class "DGEList"
$counts
B032_tp0_NB_NH B044_tp0_NB_NH B050_tp0_NB_NH B066_tp0_NB_NH
TSPAN6 2330.149 1669.757 2323.153 2178.668
TNMD 0.000 0.000 0.000 0.000
DPM1 1057.640 947.221 1898.128 1782.786
SCYL3 454.987 408.387 516.702 585.629
C1orf112 297.605 257.085 652.944 540.747
B078_tp0_NB_NH B080_tp0_NB_NH B032_tp1_NB_H B044_tp1_NB_H
TSPAN6 2701.845 1941.214 2974.309 1785.138
TNMD 0.000 0.000 1.000 0.000
DPM1 1555.317 1670.424 1355.103 833.345
SCYL3 491.001 505.327 569.013 450.662
C1orf112 562.380 776.523 207.487 106.805
B050_tp1_NB_H B066_tp1_NB_H B078_tp1_NB_H B080_tp1_NB_H B032_tp2_NB_H
TSPAN6 3364.456 1620.574 1953.345 2214.665 2545.154
TNMD 0.000 0.000 0.000 0.000 0.000
DPM1 1371.059 898.820 857.830 825.908 842.792
SCYL3 615.183 393.422 368.000 546.883 500.231
C1orf112 183.012 88.999 98.000 96.618 257.938
B044_tp2_NB_H B050_tp2_NB_H B066_tp2_NB_H B078_tp2_NB_H B080_tp2_NB_H
TSPAN6 1876.503 2934.437 1651.000 1959.843 1279.781
TNMD 0.000 2.000 0.000 0.000 0.000
DPM1 723.453 1405.790 734.001 921.103 511.003
SCYL3 431.433 698.526 380.062 500.304 452.715
C1orf112 114.247 261.026 154.001 166.999 118.177
B032_tp3_NB_H B044_tp3_NB_H B050_tp3_NB_H B066_tp3_NB_H B078_tp3_NB_H
TSPAN6 1907.078 2022.268 2705.818 1418.719 2006.202
TNMD 0.000 0.000 0.000 0.000 0.000
DPM1 491.394 769.528 1145.941 679.983 668.810
SCYL3 525.007 460.795 525.977 365.000 395.009
C1orf112 274.909 128.736 223.257 170.000 104.000
B080_tp3_NB_H B032_tp4_B_H B044_tp4_B_H B050_tp4_B_H B066_tp4_B_H
TSPAN6 1139.296 1712.537 1781.079 2340.607 2111.229
TNMD 0.000 1.000 0.000 0.000 0.000
DPM1 501.126 1211.173 1574.285 1995.332 2159.178
SCYL3 397.463 365.607 454.168 654.503 620.316
C1orf112 206.566 514.024 400.628 630.985 605.000
B078_tp4_B_H B080_tp4_B_H B032_tp5_B_H B044_tp5_B_H B050_tp5_B_H
TSPAN6 2398.771 1337.427 2293.025 2787.210 2951.892
TNMD 0.000 0.000 1.000 0.000 0.000
DPM1 2079.461 1122.677 1625.211 1705.423 2157.301
SCYL3 669.248 447.412 309.703 480.506 563.000
C1orf112 517.216 516.636 683.186 506.535 656.351
B066_tp5_B_H B078_tp5_B_H B080_tp5_B_H B032_tp6_B_H B044_tp6_B_H
TSPAN6 1438.731 3292.663 1544.203 2140.944 1967.731
TNMD 0.000 0.000 0.000 1.000 0.000
DPM1 1450.999 1842.883 1182.420 1225.324 1453.737
SCYL3 316.000 645.144 376.380 338.175 420.029
C1orf112 517.101 560.135 419.747 439.444 556.262
B050_tp6_B_H B066_tp6_B_H B078_tp6_B_H B080_tp6_B_H
TSPAN6 3034.905 1661.101 2651.344 1388.112
TNMD 3.000 0.000 0.000 0.000
DPM1 1884.420 2206.904 1550.912 1183.336
SCYL3 542.169 435.413 535.177 372.075
C1orf112 615.150 852.744 605.840 488.236
60225 more rows ...
$samples
group lib.size norm.factors
B032_tp0_NB_NH tp0_NB_NH 30144167 1
B044_tp0_NB_NH tp0_NB_NH 28732415 1
B050_tp0_NB_NH tp0_NB_NH 37521705 1
B066_tp0_NB_NH tp0_NB_NH 37201395 1
B078_tp0_NB_NH tp0_NB_NH 32579189 1
37 more rows ...
$genes
genes
TSPAN6 TSPAN6
TNMD TNMD
DPM1 DPM1
SCYL3 SCYL3
C1orf112 C1orf112
60225 more rows ...
# filter lowly expressed genes
counts.cpm.sel_IVMC <- all_counts.cpm[,metadata.sel_IVMC$sample]
counts.sel_IVMC.asDGEList <- counts.sel_IVMC.asDGEList[ rowSums(counts.cpm.sel_IVMC > mincpm) >= minsamples, , keep.lib.size=FALSE]
dim(counts.sel_IVMC.asDGEList)[1] 16451 42
# normalization
counts.sel_IVMC.asDGEList <- calcNormFactors(counts.sel_IVMC.asDGEList) # same as normLibSizes(y)
head(counts.sel_IVMC.asDGEList$samples) group lib.size norm.factors
B032_tp0_NB_NH tp0_NB_NH 30099923 1.077073
B044_tp0_NB_NH tp0_NB_NH 28687377 1.047688
B050_tp0_NB_NH tp0_NB_NH 37472680 1.140602
B066_tp0_NB_NH tp0_NB_NH 37157178 1.105160
B078_tp0_NB_NH tp0_NB_NH 32534805 1.126232
B080_tp0_NB_NH tp0_NB_NH 41470928 1.113365
plotMDS(counts.sel_IVMC.asDGEList)setting up the design matrix
# use model.matrix
design <- model.matrix(~donor +group, data = counts.sel_IVMC.asDGEList$samples)
colnames(design)[2:12] <- substring(colnames(design)[2:12],6,14)
design (Intercept) B044 B050 B066 B078 B080 tp1_NB_H tp2_NB_H tp3_NB_H
B032_tp0_NB_NH 1 0 0 0 0 0 0 0 0
B044_tp0_NB_NH 1 1 0 0 0 0 0 0 0
B050_tp0_NB_NH 1 0 1 0 0 0 0 0 0
B066_tp0_NB_NH 1 0 0 1 0 0 0 0 0
B078_tp0_NB_NH 1 0 0 0 1 0 0 0 0
B080_tp0_NB_NH 1 0 0 0 0 1 0 0 0
B032_tp1_NB_H 1 0 0 0 0 0 1 0 0
B044_tp1_NB_H 1 1 0 0 0 0 1 0 0
B050_tp1_NB_H 1 0 1 0 0 0 1 0 0
B066_tp1_NB_H 1 0 0 1 0 0 1 0 0
B078_tp1_NB_H 1 0 0 0 1 0 1 0 0
B080_tp1_NB_H 1 0 0 0 0 1 1 0 0
B032_tp2_NB_H 1 0 0 0 0 0 0 1 0
B044_tp2_NB_H 1 1 0 0 0 0 0 1 0
B050_tp2_NB_H 1 0 1 0 0 0 0 1 0
B066_tp2_NB_H 1 0 0 1 0 0 0 1 0
B078_tp2_NB_H 1 0 0 0 1 0 0 1 0
B080_tp2_NB_H 1 0 0 0 0 1 0 1 0
B032_tp3_NB_H 1 0 0 0 0 0 0 0 1
B044_tp3_NB_H 1 1 0 0 0 0 0 0 1
B050_tp3_NB_H 1 0 1 0 0 0 0 0 1
B066_tp3_NB_H 1 0 0 1 0 0 0 0 1
B078_tp3_NB_H 1 0 0 0 1 0 0 0 1
B080_tp3_NB_H 1 0 0 0 0 1 0 0 1
B032_tp4_B_H 1 0 0 0 0 0 0 0 0
B044_tp4_B_H 1 1 0 0 0 0 0 0 0
B050_tp4_B_H 1 0 1 0 0 0 0 0 0
B066_tp4_B_H 1 0 0 1 0 0 0 0 0
B078_tp4_B_H 1 0 0 0 1 0 0 0 0
B080_tp4_B_H 1 0 0 0 0 1 0 0 0
B032_tp5_B_H 1 0 0 0 0 0 0 0 0
B044_tp5_B_H 1 1 0 0 0 0 0 0 0
B050_tp5_B_H 1 0 1 0 0 0 0 0 0
B066_tp5_B_H 1 0 0 1 0 0 0 0 0
B078_tp5_B_H 1 0 0 0 1 0 0 0 0
B080_tp5_B_H 1 0 0 0 0 1 0 0 0
B032_tp6_B_H 1 0 0 0 0 0 0 0 0
B044_tp6_B_H 1 1 0 0 0 0 0 0 0
B050_tp6_B_H 1 0 1 0 0 0 0 0 0
B066_tp6_B_H 1 0 0 1 0 0 0 0 0
B078_tp6_B_H 1 0 0 0 1 0 0 0 0
B080_tp6_B_H 1 0 0 0 0 1 0 0 0
tp4_B_H tp5_B_H tp6_B_H
B032_tp0_NB_NH 0 0 0
B044_tp0_NB_NH 0 0 0
B050_tp0_NB_NH 0 0 0
B066_tp0_NB_NH 0 0 0
B078_tp0_NB_NH 0 0 0
B080_tp0_NB_NH 0 0 0
B032_tp1_NB_H 0 0 0
B044_tp1_NB_H 0 0 0
B050_tp1_NB_H 0 0 0
B066_tp1_NB_H 0 0 0
B078_tp1_NB_H 0 0 0
B080_tp1_NB_H 0 0 0
B032_tp2_NB_H 0 0 0
B044_tp2_NB_H 0 0 0
B050_tp2_NB_H 0 0 0
B066_tp2_NB_H 0 0 0
B078_tp2_NB_H 0 0 0
B080_tp2_NB_H 0 0 0
B032_tp3_NB_H 0 0 0
B044_tp3_NB_H 0 0 0
B050_tp3_NB_H 0 0 0
B066_tp3_NB_H 0 0 0
B078_tp3_NB_H 0 0 0
B080_tp3_NB_H 0 0 0
B032_tp4_B_H 1 0 0
B044_tp4_B_H 1 0 0
B050_tp4_B_H 1 0 0
B066_tp4_B_H 1 0 0
B078_tp4_B_H 1 0 0
B080_tp4_B_H 1 0 0
B032_tp5_B_H 0 1 0
B044_tp5_B_H 0 1 0
B050_tp5_B_H 0 1 0
B066_tp5_B_H 0 1 0
B078_tp5_B_H 0 1 0
B080_tp5_B_H 0 1 0
B032_tp6_B_H 0 0 1
B044_tp6_B_H 0 0 1
B050_tp6_B_H 0 0 1
B066_tp6_B_H 0 0 1
B078_tp6_B_H 0 0 1
B080_tp6_B_H 0 0 1
attr(,"assign")
[1] 0 1 1 1 1 1 2 2 2 2 2 2
attr(,"contrasts")
attr(,"contrasts")$donor
[1] "contr.treatment"
attr(,"contrasts")$group
[1] "contr.treatment"
dispersion estimation and fitting
# dispersion estimation
counts.sel_IVMC.asDGEList <- estimateDisp(counts.sel_IVMC.asDGEList, design)
counts.sel_IVMC.asDGEList$common.dispersion[1] 0.07067298
plotBCV(counts.sel_IVMC.asDGEList)# fitting
fit <- glmQLFit(counts.sel_IVMC.asDGEList, design)
plotQLDisp(fit)0) “Pre-diff” (tp0_NB_NH) specific
contrast_0 <- 1/6 * c(- (colnames(design) == "tp1_NB_H") - (colnames(design) == "tp2_NB_H") - (colnames(design) == "tp3_NB_H")
- (colnames(design) == "tp4_B_H") - (colnames(design) == "tp5_B_H") - (colnames(design) == "tp6_B_H") )
contrast_0 [1] 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
[7] -0.1666667 -0.1666667 -0.1666667 -0.1666667 -0.1666667 -0.1666667
qlf_0 <- glmQLFTest(fit, contrast = contrast_0)
tt_0 <- as.data.frame(topTags(qlf_0,n=nrow(counts.sel_IVMC.asDGEList)))
head(tt_0) genes logFC logCPM F PValue FDR
KRT4 KRT4 6.637973 1.4672627 1416.5371 6.950293e-41 1.143393e-36
CALCA CALCA 6.550895 1.6920612 1132.5048 1.870543e-37 1.538615e-33
PLXNA4 PLXNA4 6.866916 3.5815887 1126.6076 1.938841e-33 1.063196e-29
LGI2 LGI2 4.735619 -0.5563552 622.7545 7.720354e-32 3.175189e-28
SHISA3 SHISA3 4.913608 0.5925268 813.9653 1.095696e-31 3.605059e-28
FBN2 FBN2 3.715567 5.6922941 1222.4537 8.542720e-29 2.342271e-25
file_name.table <- paste(table_dir,"/EDFigure_1_d.tp0_TopTable.tab",sep = "")
write.table(tt_0, file_name.table, quote = FALSE, row.names = FALSE, sep = "\t")1) “P4-diff” (tp1_NB_H) specific
contrast_1 <- c((colnames(design) == "tp1_NB_H") - 1/6* (colnames(design) == "tp2_NB_H") - 1/6* (colnames(design) == "tp3_NB_H")
- 1/6* (colnames(design) == "tp4_B_H") - 1/6* (colnames(design) == "tp5_B_H") - 1/6* (colnames(design) == "tp6_B_H") )
contrast_1 [1] 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
[7] 1.0000000 -0.1666667 -0.1666667 -0.1666667 -0.1666667 -0.1666667
qlf_1 <- glmQLFTest(fit, contrast = contrast_1)
tt_1 <- as.data.frame(topTags(qlf_1,n=nrow(counts.sel_IVMC.asDGEList)))
head(tt_1) genes logFC logCPM F PValue
SST SST 5.678946 4.3893633 702.8144 2.092213e-28
COL23A1 COL23A1 4.280639 2.8836585 490.3516 7.719881e-26
CD55 CD55 2.526505 7.9750274 482.5971 4.780045e-22
RP11-856M9.1 RP11-856M9.1 3.286598 0.6042339 313.5169 9.562260e-22
RP1-153G14.4 RP1-153G14.4 3.410210 -0.5422749 235.9544 1.041374e-21
PHACTR3 PHACTR3 4.045445 0.3593289 282.7125 6.843084e-21
FDR
SST 3.441900e-24
COL23A1 6.349988e-22
CD55 2.621218e-18
RP11-856M9.1 3.426328e-18
RP1-153G14.4 3.426328e-18
PHACTR3 1.876260e-17
file_name.table <- paste(table_dir,"/EDFigure_1_d.tp1_TopTable.tab",sep = "")
write.table(tt_1, file_name.table, quote = FALSE, row.names = FALSE, sep = "\t")2) “Horm Withdr 24h” (tp2_NB_H) specific
contrast_2 <- c((colnames(design) == "tp2_NB_H") - 1/6* (colnames(design) == "tp1_NB_H") - 1/6* (colnames(design) == "tp3_NB_H")
- 1/6* (colnames(design) == "tp4_B_H") - 1/6* (colnames(design) == "tp5_B_H") - 1/6* (colnames(design) == "tp6_B_H") )
contrast_2 [1] 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
[7] -0.1666667 1.0000000 -0.1666667 -0.1666667 -0.1666667 -0.1666667
qlf_2 <- glmQLFTest(fit, contrast = contrast_2)
tt_2 <- as.data.frame(topTags(qlf_2,n=nrow(counts.sel_IVMC.asDGEList)))
head(tt_2) genes logFC logCPM F PValue FDR
TGM5 TGM5 3.926647 2.7556014 510.7290 8.182216e-25 1.346056e-20
COL23A1 COL23A1 4.068428 2.8836585 421.1466 1.752220e-24 1.441289e-20
ZMIZ1-AS1 ZMIZ1-AS1 3.547144 -0.5122914 241.2649 5.201602e-22 2.852385e-18
NDUFA4L2 NDUFA4L2 3.760156 5.5628212 383.4921 8.300260e-22 3.413689e-18
LINC02649 LINC02649 3.270517 0.7894746 237.5867 1.910928e-20 6.287336e-17
FBXO32 FBXO32 2.638391 3.0973920 349.0151 1.029472e-19 2.822641e-16
file_name.table <- paste(table_dir,"/EDFigure_1_d.tp2_TopTable.tab",sep = "")
write.table(tt_2, file_name.table, quote = FALSE, row.names = FALSE, sep = "\t")3) “Horm Withdr 48h” (tp3_NB_H) specific
contrast_3 <- c((colnames(design) == "tp3_NB_H") - 1/6* (colnames(design) == "tp1_NB_H") - 1/6* (colnames(design) == "tp2_NB_H")
- 1/6* (colnames(design) == "tp4_B_H") - 1/6* (colnames(design) == "tp5_B_H") - 1/6* (colnames(design) == "tp6_B_H") )
contrast_3 [1] 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
[7] -0.1666667 -0.1666667 1.0000000 -0.1666667 -0.1666667 -0.1666667
qlf_3 <- glmQLFTest(fit, contrast = contrast_3)
tt_3 <- as.data.frame(topTags(qlf_3,n=nrow(counts.sel_IVMC.asDGEList)))
head(tt_3) genes logFC logCPM F PValue FDR
TGM5 TGM5 4.952334 2.7556014 935.3743 6.416044e-30 1.055503e-25
COL23A1 COL23A1 4.543722 2.8836585 546.3425 8.193049e-27 6.739192e-23
NDUFA4L2 NDUFA4L2 4.484035 5.5628212 589.1112 3.528043e-25 1.934661e-21
PRDM16 PRDM16 5.106874 2.1512301 400.7150 1.150140e-24 4.730238e-21
PDE4C PDE4C 4.713820 0.6501294 364.1578 5.719239e-23 1.881744e-19
PNCK PNCK 4.797822 3.5242771 399.8600 7.821529e-23 2.144533e-19
file_name.table <- paste(table_dir,"/EDFigure_1_d.tp3_TopTable.tab",sep = "")
write.table(tt_3, file_name.table, quote = FALSE, row.names = FALSE, sep = "\t")4) “Post-breakdown 24h” (tp4_B_H) specific
contrast_4 <- c((colnames(design) == "tp4_B_H") - 1/6* (colnames(design) == "tp1_NB_H") - 1/6* (colnames(design) == "tp2_NB_H") - 1/6* (colnames(design) == "tp3_NB_H")
- 1/6* (colnames(design) == "tp5_B_H") - 1/6* (colnames(design) == "tp6_B_H") )
contrast_4 [1] 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
[7] -0.1666667 -0.1666667 -0.1666667 1.0000000 -0.1666667 -0.1666667
qlf_4 <- glmQLFTest(fit, contrast = contrast_4)
tt_4 <- as.data.frame(topTags(qlf_4,n=nrow(counts.sel_IVMC.asDGEList)))
head(tt_4) genes logFC logCPM F PValue FDR
CSF2 CSF2 7.172484 -0.09129763 670.1599 5.229538e-25 5.372197e-21
TNFRSF11B TNFRSF11B 3.977181 0.94760286 498.0793 8.168232e-25 5.372197e-21
IGFL1 IGFL1 5.819212 -0.42929220 436.4569 9.796725e-25 5.372197e-21
BCL2L15 BCL2L15 3.532552 5.00765998 666.5908 2.358005e-24 9.697884e-21
GALNT5 GALNT5 4.363803 2.16316015 476.9216 1.934371e-23 6.364467e-20
GLIPR1 GLIPR1 3.988663 1.74433247 434.7055 1.746932e-22 4.789796e-19
file_name.table <- paste(table_dir,"/EDFigure_1_d.tp4_TopTable.tab",sep = "")
write.table(tt_4, file_name.table, quote = FALSE, row.names = FALSE, sep = "\t")5) “E2-diff 24h” (tp5_B_H) specific
contrast_5 <- c((colnames(design) == "tp5_B_H") - 1/6* (colnames(design) == "tp1_NB_H") - 1/6* (colnames(design) == "tp2_NB_H") - 1/6* (colnames(design) == "tp3_NB_H")
- 1/6* (colnames(design) == "tp4_B_H") - 1/6* (colnames(design) == "tp6_B_H") )
qlf_5 <- glmQLFTest(fit, contrast = contrast_5)
contrast_5 [1] 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
[7] -0.1666667 -0.1666667 -0.1666667 -0.1666667 1.0000000 -0.1666667
tt_5 <- as.data.frame(topTags(qlf_5,n=nrow(counts.sel_IVMC.asDGEList)))
head(tt_5) genes logFC logCPM F PValue FDR
CXCL10 CXCL10 4.939035 5.405468 454.7192 5.139231e-23 8.454549e-19
CXCL11 CXCL11 5.006808 3.184803 388.5890 9.554431e-22 7.858997e-18
OAS1 OAS1 3.189627 6.132966 370.4554 3.447883e-20 1.890704e-16
BCL2L15 BCL2L15 2.591279 5.007660 328.5224 2.365402e-19 9.471202e-16
OAS3 OAS3 2.813307 7.168217 320.8204 3.409625e-19 9.471202e-16
IFIT1 IFIT1 4.074539 6.304042 317.0045 3.454332e-19 9.471202e-16
file_name.table <- paste(table_dir,"/EDFigure_1_d.tp5_TopTable.tab",sep = "")
write.table(tt_5, file_name.table, quote = FALSE, row.names = FALSE, sep = "\t")6) “E2 diff 48h” (tp6_B_H) specific
contrast_6 <- c((colnames(design) == "tp6_B_H") - 1/6* (colnames(design) == "tp1_NB_H") - 1/6* (colnames(design) == "tp2_NB_H") - 1/6* (colnames(design) == "tp3_NB_H")
- 1/6* (colnames(design) == "tp4_B_H") - 1/6* (colnames(design) == "tp5_B_H") )
qlf_6 <- glmQLFTest(fit, contrast = contrast_6)
contrast_6 [1] 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
[7] -0.1666667 -0.1666667 -0.1666667 -0.1666667 -0.1666667 1.0000000
tt_6 <- as.data.frame(topTags(qlf_6,n=nrow(counts.sel_IVMC.asDGEList)))
head(tt_6) genes logFC logCPM F PValue FDR
OAS3 OAS3 2.972377 7.168217 356.0451 6.514406e-20 4.927612e-16
CXCL10 CXCL10 4.205358 5.405468 300.4120 7.493286e-20 4.927612e-16
CXCL11 CXCL11 4.596896 3.184803 299.1716 8.985980e-20 4.927612e-16
OAS1 OAS1 3.069696 6.132966 330.6896 2.106641e-19 8.664089e-16
IFI27 IFI27 3.251867 8.425607 322.4245 3.191606e-19 1.010010e-15
IFI6 IFI6 3.727161 8.710519 319.2709 3.683704e-19 1.010010e-15
file_name.table <- paste(table_dir,"/EDFigure_1_d.tp6_TopTable.tab",sep = "")
write.table(tt_6, file_name.table, quote = FALSE, row.names = FALSE, sep = "\t")gene list Heatmaps
# top 50 genes with logFC > 2
genes_top50up_all <- c(tt_0[tt_0$logFC > 2,]$genes[1:50],
tt_1[tt_1$logFC > 2,]$genes[1:50],
tt_2[tt_2$logFC > 2,]$genes[1:50],
tt_3[tt_3$logFC > 2,]$genes[1:50],
tt_4[tt_4$logFC > 2,]$genes[1:50],
tt_5[tt_5$logFC > 2,]$genes[1:50],
tt_6[tt_6$logFC > 2,]$genes[1:50])
# define data to plot
data2plot <- data.frame()
for (gene in genes_top50up_all) {
data2plot <- rbind(data2plot, counts.bc.cpm.sel_IVMC[rownames(counts.bc.cpm.sel_IVMC) == gene,])
}
# rownames(data2plot) <- genes
colnames(data2plot) <- colnames(counts.bc.cpm.sel_IVMC)
# center and scale
data2plot.sc <- t(scale(t(data2plot)))
h <- Heatmap(data2plot.sc, cluster_columns=FALSE, cluster_rows=FALSE,
heatmap_legend_param = list(title = "cpm\n(centered\nscaled)", color_bar = "continuous"),
column_split = c(rep(0, 6),rep(1, 6),rep(2,6),rep(3, 6),rep(4,6),rep(5, 6),rep(6,6)),
row_split = c(rep(0,50),rep(1,50),rep(2,50),rep(3,50),rep(4,50),rep(5,50),rep(6,50)),
column_title = " ",
bottom_annotation = HeatmapAnnotation(foo = anno_block(gp = gpar(fill = timepoint_colors,
col = timepoint_colors),
labels = unique(metadata.sel_IVMC$description.alt2),
labels_gp = gpar(col = heatmap_labels_colors,
fontsize = 6))),
show_column_names = FALSE,
row_title = NULL)
print(h)file_name <- paste(plot_dir,"/EDFigure_1_d.pdf",sep = "")
pdf(file_name, width = 6, height = 7)
print(h)
dev.off()png
2
png(sub("\\.pdf$", ".png", file_name), width = 6, height = 7,
unit = "in", res = 400)
print(h)
dev.off()png
2
Extended Data Fig 2 a
using B0xx_tp0_NB_NH, B0xx_tp1_NB_H, B0xx_tp2_NB_H, B0xx_tp3_NB_H, B0xx_tp4_B_H, B0xx_tp5_B_H, B0xx_tp6_B_H this has been “sel_IVMC”
Line plots showing the (batch coreccted) mean expression levels (in cpm) of genes reported in literature to mark stem cells of the endometrial epithelium (MKI67, ALDH1A1, CDH2) across different timepoints in the IVMC protocol. The red line represents the mean cpm values across donors, while the shaded blue region indicates the standard deviation around the mean.
genes_for_line_plots <- c("MKI67","ALDH1A1","CDH2")
for (gene in genes_for_line_plots ) {
expr.values.sel_IVMC <- counts.bc.cpm.sel_IVMC[gene,]
# data to plot
df2plot <- data.frame(samples=colnames(counts.bc.cpm.sel_IVMC), cpm=expr.values.sel_IVMC)
# combine with metadata
df2plot_withMetadata <- merge(df2plot, metadata.sel_IVMC, by.x = "samples", by.y = "sample", sort = FALSE)
# aggregate
df2plot_aggregate <- data.frame(general_timepoint = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,1],
cpm_mean = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,2],
cpm_mean_plus_sd = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,2] +
aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=sd)[,2],
cpm_mean_minus_sd = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,2] -
aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=sd)[,2] )
p <- ggplot(df2plot_aggregate, aes(y=cpm_mean, x=general_timepoint, group = 1)) +
geom_line(color = "red") +
geom_point(color = "red", size = 2) +
geom_ribbon(aes(x = general_timepoint, ymax = cpm_mean_plus_sd, ymin = cpm_mean_minus_sd),
alpha = 0.3, fill = "lightblue") +
ggtitle(bquote(italic(.(gene)))) +
ylab("cpm (Mean ± SD across donors)") +
xlab("") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(axis.line = element_line(colour = "black")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 24)) +
theme(axis.text.y = element_text(size = 22)) +
theme(axis.title.y = element_text(size = 24)) +
theme(panel.border = element_blank())
print(p)
file_name <- paste(plot_dir,"/EDFigure_2_a__",gene,".pdf", sep = "")
pdf(file_name, height = 8,width = 10)
print(p)
dev.off()
}Extended Data Fig 3 a
Volcano plot highlighting selected from comparison between organoids at P4-diff phase of the IVMC protocol to organoids not treated with hormones but undergone breakdown (B_NH) at the same timepoint.
define contrast
(colnames(design_all) == "tp1_NB_H") - (colnames(design_all) == "tp1_NB_NH") [1] 0 0 0 0 0 0 1 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
qlf_3A<- glmQLFTest(fit_all, contrast = as.vector((colnames(design_all) == "tp1_NB_H") - (colnames(design_all) == "tp1_NB_NH")))
tt_3A <- as.data.frame(topTags(qlf_3A,n=nrow(counts.all.asDGEList)))
head(tt_3A) genes logFC logCPM F PValue FDR
PHACTR3 PHACTR3 6.128475 -0.6838466 187.3729 6.317181e-28 1.101590e-23
CALCA CALCA -5.766951 2.2525394 176.1686 5.760801e-26 4.586631e-22
PFKM PFKM -1.891974 5.7037535 211.0908 7.890752e-26 4.586631e-22
VSTM2L VSTM2L 4.708261 0.4139683 182.9378 2.069740e-25 9.023032e-22
GATA2 GATA2 9.389917 0.7377046 171.6309 2.743281e-24 9.567466e-21
EVA1C EVA1C 3.113237 3.8054836 177.7423 1.924080e-23 5.592018e-20
define cut offs
FDR < 0.05 & logFC > 2.0 ~ up; FDR < 0.05 & logFC < -2.0 ~ down
# create significant (sig) column
results_3A <- mutate(tt_3A, sig = case_when(
FDR < 0.05 & logFC > 2.0 ~ up,
FDR < 0.05 & logFC < -2.0 ~ down,
TRUE ~ notsig))
sum(results_3A$sig == "Up")[1] 781
sum(results_3A$sig == "Down")[1] 290
file_name.table <- paste(table_dir,"/EDFigure_3_A.DEG.tab",sep = "")
write.table(results_3A, file_name.table,
quote = FALSE, row.names = FALSE, sep = "\t")define genes to label
genes_3A_up_label <- c("SULT1E1", "GATA2", "SCGB1D4", "ALPG", "PAEP", "SCGB1D2",
"SERPINA5", "S100P", "SCGB2A2", "SCGB2A1", "PTGS2", "ENPP3",
"KRT17", "TPPP3", "IL1A")
genes_3A_down_label <- c("NCAM1", "SHISA3", "KRT4", "FZD10", "SSTR1", "SCTR",
"FAM20A", "PTPRG", "RXRG", "MMP10", "FN1", "S100A9",
"MMP3", "MMP1", "ASCL2")
genes_3A_label <- c(genes_3A_up_label,genes_3A_down_label)
# add gene labels
results_3A <- mutate(results_3A, labels = ifelse(genes %in% genes_3A_label, genes, ""))volcano plot
# set up base plot
p <- ggplot(data = results_3A, aes(x = logFC, y = -log10(PValue))) +
geom_point(aes(colour = sig)) +
xlim(-12, +12) +
scale_color_manual(name = "|log2FC| > 2, FDR < 0.05", values = volcano_colors) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
legend.key = element_blank(),
legend.position = "bottom",
# increase text sizes
axis.text = element_text(size = 18),
axis.title = element_text(size = 22),
plot.title = element_text(size = 26, hjust = 0.5),
legend.title = element_text(size = 20),
legend.text = element_text(size = 20)
) +
xlab('log2 FC') +
ylab('-log10 p-value') +
guides(color = guide_legend(override.aes = list(size=5))) +
# add gene labels
geom_text_repel(data = filter(results_3A, labels != ""),
aes(label = labels),
min.segment.length = 0,
max.overlaps = Inf,
size = 7,
show.legend = FALSE) +
ggtitle('P4-diff (IVMC vs NH)')
print(p)file_name <- paste(plot_dir,"/EDFigure_3_a.pdf", sep = "")
pdf(file_name,height = 8,width = 10)
p
dev.off()png
2
Extended Data Fig 3 d
Volcano plot highlighting selected DEGs from comparison between organoids of the IVMC protocol at P4-diff phase to organoids at hormonal withdrawal 48 h.
define contrast
(colnames(design_all) == "tp3_NB_H") - (colnames(design_all) == "tp1_NB_H") - ((colnames(design_all) == "tp3_NB_NH") - (colnames(design_all) == "tp1_NB_NH") ) [1] 0 0 0 0 0 0 -1 1 0 0 1 -1 0 0 0 0 0 0 0 0 0 0 0 0
qlf_3D<- glmQLFTest(fit_all, contrast = as.vector( (colnames(design_all) == "tp3_NB_H") - (colnames(design_all) == "tp1_NB_H") - ((colnames(design_all) == "tp3_NB_NH") - (colnames(design_all) == "tp1_NB_NH") ) ) )
tt_3D <- as.data.frame(topTags(qlf_3D,n=nrow(counts.all.asDGEList)))
head(tt_3D) genes logFC logCPM F PValue FDR
HMGA2-AS1 HMGA2-AS1 3.790418 1.595473 49.92480 1.749615e-10 1.926207e-06
SIK1B SIK1B -2.174675 4.245860 50.13533 2.509336e-10 1.926207e-06
PCCA PCCA -1.535965 4.690401 48.55769 4.255336e-10 1.926207e-06
NR4A1 NR4A1 -2.513059 3.856891 48.44720 4.418413e-10 1.926207e-06
KRT17 KRT17 -3.379015 7.477419 45.55016 1.185183e-09 3.715991e-06
KRT16 KRT16 -4.403312 2.707835 45.32271 1.289910e-09 3.715991e-06
define cut offs
FDR < 0.05 & logFC > 2.0 ~ up; FDR < 0.05 & logFC < -2.0 ~ down
# create significant (sig) column
results_3D <- mutate(tt_3D, sig = case_when(
FDR < 0.05 & logFC > 2.0 ~ up,
FDR < 0.05 & logFC < -2.0 ~ down,
TRUE ~ notsig))
sum(results_3D$sig == "Up")[1] 41
sum(results_3D$sig == "Down")[1] 63
file_name.table <- paste(table_dir,"/EDFigure_3_d.DEG.tab",sep = "")
write.table(results_3D, file_name.table,
quote = FALSE, row.names = FALSE, sep = "\t")define genes to label
genes_3D_up_label <- c("HMGA2-AS1", "SIGLEC15", "MEOX1", "VTCN1", "RXRG", "MT1E",
"MMP10", "CASP1", "MT1G", "SSTR1")
genes_3D_down_label <- c("PHACTR3", "KRT16", "SERPINA3", "KRT17", "SFRP1", "NOTUM",
"S100P", "CXCL8", "PAEP", "PLAUR", "AREG")
genes_3D_label <- c(genes_3D_up_label,genes_3D_down_label)
# add gene labels
results_3D <- mutate(results_3D, labels = ifelse(genes %in% genes_3D_label, genes, ""))volcano plot
# set up base plot
p <- ggplot(data = results_3D, aes(x = logFC, y = -log10(PValue))) +
geom_point(aes(colour = sig)) +
xlim(-12, +12) +
scale_color_manual(name = "|log2FC| > 2, FDR < 0.05", values = volcano_colors) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
legend.key = element_blank(),
legend.position = "bottom",
# increase text sizes
axis.text = element_text(size = 18),
axis.title = element_text(size = 22),
plot.title = element_text(size = 26, hjust = 0.5),
legend.title = element_text(size = 20),
legend.text = element_text(size = 20)
) +
xlab('log2 FC') +
ylab('-log10 p-value') +
guides(color = guide_legend(override.aes = list(size=5))) +
# add gene labels 4
geom_text_repel(data = filter(results_3D, labels != ""),
aes(label = labels),
min.segment.length = 0,
max.overlaps = Inf,
size = 7,
show.legend = FALSE) +
ggtitle('Horm.Withdr. 48h vs P4-diff (controlled for hormone)')
print(p)file_name <- paste(plot_dir,"/EDFigure_3_d.pdf", sep = "")
pdf(file_name,height = 8,width = 10)
p
dev.off()png
2
Extended Data Fig 3 e
Volcano plot highlighting selected DEGs from comparison of organoids between E2-diff 48h phase of the IVMC protocol to organoids not treated with hormones but undergone breakdown (B_NH) at the same timepoint.
define contrast
(colnames(design_all) == "tp6_B_H") - (colnames(design_all) == "tp6_B_NH") [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 -1 0 0
qlf_3E<- glmQLFTest(fit_all, contrast = as.vector((colnames(design_all) == "tp6_B_H") - (colnames(design_all) == "tp6_B_NH")))
tt_3E <- as.data.frame(topTags(qlf_3E,n=nrow(counts.all.asDGEList)))
head(tt_3E) genes logFC logCPM F PValue FDR
GATA2-AS1 GATA2-AS1 2.920502 -0.6587471 88.07993 2.244719e-16 3.328586e-12
FBN2 FBN2 -2.313733 5.9715179 96.78891 3.817624e-16 3.328586e-12
SOBP SOBP -2.109622 2.0542466 74.84218 1.322183e-13 7.685408e-10
OLFM4 OLFM4 8.552628 1.1146224 76.64165 3.246182e-13 1.415173e-09
CYP1A1 CYP1A1 -6.033534 0.6892229 64.94589 7.830694e-13 2.731033e-09
CALCA CALCA -3.676129 2.2525394 60.06990 2.289224e-12 6.653249e-09
define cut offs
FDR < 0.05 & logFC > 2.0 ~ up; FDR < 0.05 & logFC < -2.0 ~ down
# create significant (sig) column
results_3E <- mutate(tt_3E, sig = case_when(
FDR < 0.05 & logFC > 2.0 ~ up,
FDR < 0.05 & logFC < -2.0 ~ down,
TRUE ~ notsig))
sum(results_3E$sig == "Up")[1] 145
sum(results_3E$sig == "Down")[1] 76
file_name.table <- paste(table_dir,"/EDFigure_3_e.DEG.tab",sep = "")
write.table(results_3E, file_name.table,
quote = FALSE, row.names = FALSE, sep = "\t")define genes to label
genes_3E_up_label <- c("OLFM4", "COL1A2", "PGR", "CDC20B", "GMNC", "GREB1", "GATA5", "CCN5")
genes_3E_down_label <- c("KRT5", "MMP1", "PAX2", "FRZB", "MMP10", "SHISA3", "PLXNA4", "SSTR1")
genes_3E_label <- c(genes_3E_up_label,genes_3E_down_label)
# add gene labels
results_3E <- mutate(results_3E, labels = ifelse(genes %in% genes_3E_label, genes, ""))volcano plot
# set up base plot
p <- ggplot(data = results_3E, aes(x = logFC, y = -log10(PValue))) +
geom_point(aes(colour = sig)) +
xlim(-12, +12) +
scale_color_manual(name = "|log2FC| > 2, FDR < 0.05", values = volcano_colors) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
legend.key = element_blank(),
legend.position = "bottom",
# increase text sizes
axis.text = element_text(size = 18),
axis.title = element_text(size = 22),
plot.title = element_text(size = 26, hjust = 0.5),
legend.title = element_text(size = 20),
legend.text = element_text(size = 20)
) +
xlab('log2 FC') +
ylab('-log10 p-value') +
guides(color = guide_legend(override.aes = list(size=5))) +
# add gene labels 4
geom_text_repel(data = filter(results_3E, labels != ""),
aes(label = labels),
min.segment.length = 0,
max.overlaps = Inf,
size = 7,
show.legend = FALSE) +
ggtitle('E2-diff 48h (IVMC vs B_NH)')
print(p)file_name <- paste(plot_dir,"/EDFigure_3_e.pdf", sep = "")
pdf(file_name,height = 8,width = 10)
p
dev.off()png
2
Extended Data Fig 4 a
using B0xx_tp0_NB_NH, B0xx_tp1_NB_H, B0xx_tp2_NB_H, B0xx_tp3_NB_H, B0xx_tp4_B_H, B0xx_tp5_B_H, B0xx_tp6_B_H this has been “sel_IVMC”
Line plots showing the mean expression levels (in cpm) of SOX9, FUT4, THBS1 and KRT13 genes across different timepoints in the IVMC protocol. The red line represents the mean cpm values across donors, while the shaded blue region indicates the standard deviation around the mean.
genes_for_line_plots <- c("SOX9", "FUT4","THBS1", "KRT13")
for (gene in genes_for_line_plots ) {
expr.values.sel_IVMC <- counts.bc.cpm.sel_IVMC[gene,]
# data to plot
df2plot <- data.frame(samples=colnames(counts.bc.cpm.sel_IVMC), cpm=expr.values.sel_IVMC)
# combine with metadata
df2plot_withMetadata <- merge(df2plot, metadata.sel_IVMC, by.x = "samples", by.y = "sample", sort = FALSE)
# aggregate
df2plot_aggregate <- data.frame(general_timepoint = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,1],
cpm_mean = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,2],
cpm_mean_plus_sd = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,2] +
aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=sd)[,2],
cpm_mean_minus_sd = aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=mean)[,2] -
aggregate(df2plot_withMetadata$cpm, list(df2plot_withMetadata$general_timepoint), FUN=sd)[,2] )
p <- ggplot(df2plot_aggregate, aes(y=cpm_mean, x=general_timepoint, group = 1)) +
geom_line(color = "red") +
geom_point(color = "red", size = 2) +
geom_ribbon(aes(x = general_timepoint, ymax = cpm_mean_plus_sd, ymin = cpm_mean_minus_sd),
alpha = 0.3, fill = "lightblue") +
ggtitle(bquote(italic(.(gene))) ) +
ylab("cpm (Mean ± SD across donors)") +
xlab("") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(axis.line = element_line(colour = "black")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 24)) +
theme(axis.text.y = element_text(size = 22)) +
theme(axis.title.y = element_text(size = 24)) +
theme(panel.border = element_blank())
print(p)
file_name <- paste(plot_dir,"/EDFigure_4_a__",gene,".pdf", sep = "")
pdf(file_name, height = 8,width = 10)
print(p)
dev.off()
}Extended Data Fig 5 a
Volcano plot highlighting selected DEGs comparing EOs at Post-breakdown 24h phase of the IVMC protocol to control EOs (B_NH) at the same timepoint.
(colnames(design_all) == "tp4_B_H") - (colnames(design_all) == "tp4_B_NH") [1] 0 0 0 0 0 0 0 0 0 0 0 0 1 -1 0 0 0 0 0 0 0 0 0 0
qlf_5A<- glmQLFTest(fit_all, contrast = as.vector((colnames(design_all) == "tp4_B_H") - (colnames(design_all) == "tp4_B_NH")))
tt_5A <- as.data.frame(topTags(qlf_5A,n=nrow(counts.all.asDGEList)))
head(tt_5A) genes logFC logCPM F PValue FDR
CSF2 CSF2 3.142131 -1.1908599 118.28243 3.496674e-20 6.097500e-16
DMBT1 DMBT1 5.182767 0.2282031 83.83561 1.504947e-15 1.312163e-11
ADGRF5 ADGRF5 4.411076 -0.4612762 76.12266 4.642136e-14 2.698319e-10
FBN2 FBN2 -2.007908 5.9715179 75.67786 1.022128e-13 4.455968e-10
DUOXA2 DUOXA2 5.340019 2.9917768 67.57640 8.298885e-13 2.894319e-09
CALCA CALCA -3.477386 2.2525394 53.30974 2.511962e-11 7.300598e-08
define cut offs
FDR < 0.05 & logFC > 2.0 ~ up; FDR < 0.05 & logFC < -2.0 ~ down
# create significant (sig) column
results_5A <- mutate(tt_5A, sig = case_when(
FDR < 0.05 & logFC > 2.0 ~ up,
FDR < 0.05 & logFC < -2.0 ~ down,
TRUE ~ notsig))
sum(results_5A$sig == "Up")[1] 134
sum(results_5A$sig == "Down")[1] 25
file_name.table <- paste(table_dir,"/EDFigure_5_a.DEG.tab",sep = "")
write.table(results_5A, file_name.table,
quote = FALSE, row.names = FALSE, sep = "\t")define genes to label
genes_5A_up_label <- c("DUOXA2", "DUOX2", " IL19", "SAA2", "SAA1", "SAA4", "TREM1",
"CCR7", "MUC13", "IL10", "CCL20", "IRAK3", "PF4V1")
genes_5A_down_label <- c("FBN2", "CALCA", "SHISA3", "KRT4", "CYP1A2", "NGFR", "KRT5",
"ESRRB", "SSTR1", "PLXNA4", "FRZB")
genes_5A_label <- c(genes_5A_up_label,genes_5A_down_label)
# add gene labels
results_5A <- mutate(results_5A, labels = ifelse(genes %in% genes_5A_label, genes, ""))volcano plot
# set up base plot
p <- ggplot(data = results_5A, aes(x = logFC, y = -log10(PValue))) +
geom_point(aes(colour = sig)) +
xlim(-12, +12) +
scale_color_manual(name = "|log2FC| > 2, FDR < 0.05", values = volcano_colors) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
legend.key = element_blank(),
legend.position = "bottom",
# increase text sizes
axis.text = element_text(size = 18),
axis.title = element_text(size = 22),
plot.title = element_text(size = 26, hjust = 0.5),
legend.title = element_text(size = 20),
legend.text = element_text(size = 20)
) +
xlab('log2 FC') +
ylab('-log10 p-value') +
guides(color = guide_legend(override.aes = list(size=5))) +
# add gene labels
geom_text_repel(data = filter(results_5A, labels != ""),
aes(label = labels),
min.segment.length = 0,
max.overlaps = Inf,
size = 7,
show.legend = FALSE) +
ggtitle('Post-breakdown 24h (IVMC vs B_NH)')
print(p)file_name <- paste(plot_dir,"/EDFigure_5_a.pdf", sep = "")
pdf(file_name,height = 8,width = 10)
p
dev.off()png
2
Extended Data Fig 5 b
Biological processes enriched in Post-breakdown 24h phase of the IVMC protocol using genes upregulated in A.
# all upregulated genes
genes_5A_up <- results_5A$genes[results_5A$sig == "Up"]
gostres <- gost(query = genes_5A_up, custom_bg = results_5A$genes, organism = "hsapiens", correction_method = "fdr")Detected custom background input, domain scope is set to 'custom'.
head(gostres$result) query significant p_value term_size query_size intersection_size
1 query_1 TRUE 0.01054274 1 121 1
2 query_1 TRUE 0.01054274 1 121 1
3 query_1 TRUE 0.01054274 1 121 1
4 query_1 TRUE 0.01054274 1 121 1
5 query_1 TRUE 0.01054274 1 121 1
6 query_1 TRUE 0.01054274 1 121 1
precision recall term_id source term_name
1 0.008264463 1 CORUM:7507 CORUM Potassium channel complex (KCNB1, KCNE1)
2 0.008264463 1 CORUM:426 CORUM Meprin A
3 0.008264463 1 CORUM:845 CORUM PCI-PSA-SCG2 complex
4 0.008264463 1 CORUM:1439 CORUM PTGS2 homodimer complex
5 0.008264463 1 CORUM:5389 CORUM SERPINA3-CTSG complex
6 0.008264463 1 CORUM:7506 CORUM Potassium channel complex (KCNB1, KCNE2)
effective_domain_size source_order parents
1 15781 2863 CORUM:00....
2 15781 244 CORUM:00....
3 15781 481 CORUM:00....
4 15781 752 CORUM:00....
5 15781 1666 CORUM:00....
6 15781 2862 CORUM:00....
unique(gostres$result$source) [1] "CORUM" "GO:BP" "GO:CC" "GO:MF" "HP" "HPA" "KEGG" "REAC" "TF"
[10] "WP"
gostres_results.BP <- gostres$result[gostres$result$source == "GO:BP",]
unique(gostres_results.BP$source)[1] "GO:BP"
gostres_results.BP$neg_log10Pvalue <- ( -log10(gostres_results.BP$p_value) )
gostres_results.BP <- gostres_results.BP[order(gostres_results.BP$neg_log10Pvalue, decreasing = TRUE),]
head(gostres_results.BP[,c("term_name","neg_log10Pvalue")]) term_name neg_log10Pvalue
10 cilium movement 8.691102
11 cilium movement involved in cell motility 8.317602
12 cilium-dependent cell motility 8.317602
13 cilium or flagellum-dependent cell motility 8.317602
14 cell motility 6.706811
15 acute inflammatory response 6.473471
file_name.table <- paste(table_dir,"/EDFigure_5_b.tab",sep = "")
write.table(gostres_results.BP[,c("term_name","neg_log10Pvalue")], file_name.table,
quote = FALSE, row.names = FALSE, sep = "\t")
representative_GO_terms <- c("cilium movement",
"cell motility",
"acute inflammatory response",
"acute-phase response",
"defense response",
"sperm motility",
"response to bacterium",
"leukocyte migration",
"neutrophil chemotaxis",
"extracellular transport")
head(gostres_results.BP[,c("term_name","neg_log10Pvalue")]) term_name neg_log10Pvalue
10 cilium movement 8.691102
11 cilium movement involved in cell motility 8.317602
12 cilium-dependent cell motility 8.317602
13 cilium or flagellum-dependent cell motility 8.317602
14 cell motility 6.706811
15 acute inflammatory response 6.473471
gostres_results.BP$term_name[gostres_results.BP$term_name %in% representative_GO_terms] [1] "cilium movement" "cell motility"
[3] "acute inflammatory response" "acute-phase response"
[5] "defense response" "sperm motility"
[7] "response to bacterium" "leukocyte migration"
[9] "neutrophil chemotaxis" "extracellular transport"
data2plot <- gostres_results.BP[gostres_results.BP$term_name %in% representative_GO_terms,]
data2plot$term_name <- factor(data2plot$term_name, levels = rev(data2plot$term_name))
p <- ggplot(data2plot, aes(x=neg_log10Pvalue,y=term_name)) +
geom_point(size = 10, color = "#00C5CD") +
xlab('-log10 p-value') +
xlim(0, 10) +
ylab('') +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(axis.text.y = element_text(size=22)) +
theme(axis.text.x = element_text(size=18)) +
theme(axis.title.x =element_text(size=18)) +
theme(panel.border = element_blank(), axis.line = element_line())
print(p) file_name <- paste(plot_dir,"/EDFigure_5_b.pdf", sep = "")
pdf(file_name,height = 8,width = 7)
print(p)
dev.off() png
2
Extended Data Fig 9 a
Heatmap depicting centered and scaled counts per million (cpm) of ligands involved in interactions between luminal early epithelial cells and other cell types, across batch corrected samples of EOs in the IVMC protocol.
genes <- c("WNT7A", "IL6", "CXCL1", "CXCL2", "CXCL3", "CXCL6", "CXCL8", "CXCL16", "ANXA1", "EDN1", "PDGFA",
"PDGFB", "AREG", "C3", "CSF1", "CSF3", "NAMPT", "VEGFA", "TNF", "LIF", "ADM", "GDF15", "SEMA3A")
# define data to plot
data2plot.bc <- data.frame()
for (gene in genes) {
# use counts.bc.cpm.sel_IVMC from figure 1e
data2plot.bc <- rbind(data2plot.bc, counts.bc.cpm.sel_IVMC[rownames(counts.bc.cpm.sel_IVMC) == gene,])
}
rownames(data2plot.bc) <- genes
colnames(data2plot.bc) <- colnames(counts.bc.cpm.sel_IVMC)
# center and scale
data2plot.bc.sc <- t(scale(t(data2plot.bc)))
heatmap_selected_genes<- Heatmap(data2plot.bc.sc, cluster_columns=FALSE, cluster_rows=TRUE,
heatmap_legend_param = list(legend_direction = "horizontal",
title = "cpm (centered \nand scaled)",
color_bar = "continuous"),
column_split = c(rep(0, 6),rep(1, 6),rep(2,6),rep(3, 6),rep(4,6),rep(5, 6),rep(6,6)),
column_title = "",
bottom_annotation = HeatmapAnnotation(foo = anno_block(gp = gpar(fill = timepoint_colors,
col = timepoint_colors),
labels = unique(metadata.sel_IVMC$description.alt),
labels_gp = gpar(col = heatmap_labels_colors, fontsize = 8))),
show_column_names = FALSE)
draw(heatmap_selected_genes, heatmap_legend_side="bottom")file_name <- paste(plot_dir,"/EDFigure_9_a.pdf", sep = "")
pdf(file_name,height = 10,width = 8)
draw(heatmap_selected_genes, heatmap_legend_side="bottom")
dev.off()png
2
Extended Data Fig 10 a
Bar plot showing the average transcripts per million (TPM) of WNT ligands at Post-breakdown 24h in EOs undergoing the IVMC protocol, analysed with bulk RNAseq.
using TPM for B0xx_tp4_H_B
metadata.sel3 <- mutate(as.data.frame(metadata), sel3 = case_when(
timepoint == "tp4" & hormones == "H" & breakdown == "B" ~ TRUE,
TRUE ~ FALSE))
sel3 <- metadata$sample[metadata.sel3$sel3]
sel3[1] "B032_tp4_B_H" "B044_tp4_B_H" "B050_tp4_B_H" "B066_tp4_B_H" "B078_tp4_B_H"
[6] "B080_tp4_B_H"
tpm.selected <- all_tpm[,sel3]
paralogues.WNT <- rownames(tpm.selected)[grep("WNT",rownames(tpm.selected))]
# exclude ncRNA and order
paralogues.WNT <- paralogues.WNT[c(10,5,11,6,14,17,8,7,9,15,19,2,3,13,16,12,18,4,1)]
paralogues.WNT [1] "WNT1" "WNT2" "WNT2B" "WNT3" "WNT3A" "WNT4" "WNT5A" "WNT5B"
[9] "WNT6" "WNT7A" "WNT7B" "WNT8A" "WNT8B" "WNT9A" "WNT9B" "WNT10A"
[17] "WNT10B" "WNT11" "WNT16"
tpm.selected.WNT <- data.frame(WNT = as.vector(as.matrix(tpm.selected[paralogues.WNT,])),
paralogue = rep(paralogues.WNT,6),
donor = (c(rep("B032",19), rep("B044",19), rep("B050",19), rep("B066",19), rep("B078",19), rep("B080",19))))
tpm.selected.WNT$paralogue <- factor(tpm.selected.WNT$paralogue, levels = tpm.selected.WNT$paralogue[1:19])
# and plot
ggplot(tpm.selected.WNT, aes(y=WNT)) +
geom_bar(aes(x=paralogue, fill = donor), data = tpm.selected.WNT, stat="identity", position = "dodge") +
ylab('TPM') +
ggtitle("Post-breakdown 24h") +
scale_fill_manual(values = donor_colors) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(axis.line = element_line(colour = "black")) +
theme(axis.text.x = element_text(angle = 90)) +
theme(panel.border = element_blank()) p <- ggplot(tpm.selected.WNT, aes(x=paralogue,y=WNT)) +
stat_summary(fun=mean, geom="bar", fill="orange", ) +
geom_point(size = 4) +
labs(y='TPM',x=NULL) +
ggtitle("Post-breakdown 24h") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(axis.line = element_line(colour = "black")) +
theme(axis.text.x = element_text(angle = 90, size=24, hjust=0.90,vjust=0.5)) +
theme(axis.text.y = element_text(size=18)) +
theme(axis.title.y =element_text(size=18)) +
theme(axis.ticks = element_blank()) +
theme(panel.border = element_blank())
print(p) file_name <- paste(plot_dir,"/EDFigure_10_a.pdf", sep = "")
pdf(file_name,height = 6,width = 8)
print(p)
dev.off() png
2
Extended Data Fig 10 b
Bar plot showing the average transcripts per million (TPM) of FZD receptors during at Post-breakdown 24h in EOs undergoing the IVMC protocol, analysed with bulk RNAseq.
using TPM for B0xx_tp4_H_B
# use tpm.selected from 10 a
paralogues.FZD <- rownames(tpm.selected)[grep("FZD",rownames(tpm.selected))]
# exclude ncRNA and order
paralogues.FZD <- paralogues.FZD[c(4,9,1,7,5,6,3,8,10,2)]
paralogues.FZD [1] "FZD1" "FZD2" "FZD3" "FZD4" "FZD5" "FZD6" "FZD7" "FZD8" "FZD9"
[10] "FZD10"
tpm.selected.FZD <- data.frame(FZD = as.vector(as.matrix(tpm.selected[paralogues.FZD,])),
paralogue = rep(paralogues.FZD,6),
donor = (c(rep("B032",10), rep("B044",10), rep("B050",10), rep("B066",10), rep("B078",10), rep("B080",10))))
tpm.selected.FZD$paralogue <- factor(tpm.selected.FZD$paralogue, levels = tpm.selected.FZD$paralogue[1:10])
# and plot
ggplot(tpm.selected.FZD, aes(y=FZD)) +
geom_bar(aes(x=paralogue, fill = donor), data = tpm.selected.FZD, stat="identity", position = "dodge") +
ylab('TPM') +
ggtitle("Post-breakdown 24h") +
scale_fill_manual(values = donor_colors) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(axis.line = element_line(colour = "black")) +
theme(axis.text.x = element_text(angle = 90)) +
theme(panel.border = element_blank()) p <- ggplot(tpm.selected.FZD, aes(x=paralogue,y=FZD)) +
stat_summary(fun=mean, geom="bar", fill="magenta4", ) +
geom_point(size = 4) +
labs(y='TPM',x=NULL) +
ggtitle("Post-breakdown 24h") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(axis.line = element_line(colour = "black")) +
theme(axis.text.x = element_text(angle = 90, size=24, hjust=0.90,vjust=0.5)) +
theme(axis.text.y = element_text(size=18)) +
theme(axis.title.y =element_text(size=18)) +
theme(axis.ticks = element_blank()) +
theme(panel.border = element_blank())
print(p) file_name <- paste(plot_dir,"/EDFigure_10_b.pdf", sep = "")
pdf(file_name,height = 6,width = 8)
print(p)
dev.off() png
2
sessionInfo
sessionInfo()R version 4.5.0 (2025-04-11)
Platform: x86_64-pc-linux-gnu
Running under: Rocky Linux 8.10 (Green Obsidian)
Matrix products: default
BLAS/LAPACK: FlexiBLAS OPENBLAS; LAPACK version 3.12.0
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
time zone: Europe/Zurich
tzcode source: system (glibc)
attached base packages:
[1] grid stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] gprofiler2_0.2.3 ComplexHeatmap_2.24.0 PCAtools_2.20.0
[4] ggrepel_0.9.6 ggplot2_3.5.2 dplyr_1.1.4
[7] edgeR_4.6.2 limma_3.64.1 sva_3.56.0
[10] BiocParallel_1.42.1 genefilter_1.90.0 mgcv_1.9-3
[13] nlme_3.1-168
loaded via a namespace (and not attached):
[1] bitops_1.0-9 DBI_1.2.3
[3] rlang_1.1.6 magrittr_2.0.3
[5] clue_0.3-66 GetoptLong_1.0.5
[7] matrixStats_1.5.0 compiler_4.5.0
[9] RSQLite_2.4.1 DelayedMatrixStats_1.30.0
[11] png_0.1-8 vctrs_0.6.5
[13] reshape2_1.4.4 stringr_1.5.1
[15] shape_1.4.6.1 pkgconfig_2.0.3
[17] crayon_1.5.3 fastmap_1.2.0
[19] magick_2.8.7 XVector_0.48.0
[21] labeling_0.4.3 rmarkdown_2.29
[23] UCSC.utils_1.4.0 purrr_1.0.4
[25] bit_4.6.0 xfun_0.52
[27] cachem_1.1.0 beachmat_2.24.0
[29] GenomeInfoDb_1.44.0 jsonlite_2.0.0
[31] blob_1.2.4 DelayedArray_0.34.1
[33] cluster_2.1.8.1 irlba_2.3.5.1
[35] parallel_4.5.0 R6_2.6.1
[37] stringi_1.8.7 RColorBrewer_1.1-3
[39] Rcpp_1.0.14 iterators_1.0.14
[41] knitr_1.50 IRanges_2.42.0
[43] Matrix_1.7-3 splines_4.5.0
[45] tidyselect_1.2.1 dichromat_2.0-0.1
[47] abind_1.4-8 yaml_2.3.10
[49] doParallel_1.0.17 codetools_0.2-20
[51] lattice_0.22-7 tibble_3.3.0
[53] plyr_1.8.9 Biobase_2.68.0
[55] withr_3.0.2 KEGGREST_1.48.1
[57] evaluate_1.0.4 survival_3.8-3
[59] circlize_0.4.16 Biostrings_2.76.0
[61] pillar_1.10.2 MatrixGenerics_1.20.0
[63] foreach_1.5.2 stats4_4.5.0
[65] plotly_4.11.0 generics_0.1.4
[67] RCurl_1.98-1.17 S4Vectors_0.46.0
[69] sparseMatrixStats_1.20.0 scales_1.4.0
[71] xtable_1.8-4 glue_1.8.0
[73] lazyeval_0.2.2 tools_4.5.0
[75] data.table_1.17.6 ScaledMatrix_1.16.0
[77] annotate_1.86.1 locfit_1.5-9.12
[79] XML_3.99-0.18 Cairo_1.6-2
[81] cowplot_1.1.3 tidyr_1.3.1
[83] colorspace_2.1-1 AnnotationDbi_1.70.0
[85] GenomeInfoDbData_1.2.14 BiocSingular_1.24.0
[87] cli_3.6.5 rsvd_1.0.5
[89] viridisLite_0.4.2 S4Arrays_1.8.1
[91] gtable_0.3.6 digest_0.6.37
[93] BiocGenerics_0.54.0 SparseArray_1.8.0
[95] dqrng_0.4.1 rjson_0.2.23
[97] htmlwidgets_1.6.4 farver_2.1.2
[99] memoise_2.0.1 htmltools_0.5.8.1
[101] lifecycle_1.0.4 httr_1.4.7
[103] GlobalOptions_0.1.2 statmod_1.5.0
[105] bit64_4.6.0-1