params
## $gtf
## [1] "reference/Schizosaccharomyces_pombe.ASM294v2.55.gtf"
##
## $idmap
## [1] "data/id_mapping_table.txt"
##
## $rds500
## [1] "data/ipms_500_sce.rds"
##
## $rdsrnaseq_del
## [1] "data/rnaseq_del.rds"
##
## $rdsrnaseq_ko
## [1] "data/rnaseq_ko.rds"
##
## $rnatracks_ko
## [1] "data/genome_browser_tracks_rnaseq_ko.csv.gz"
##
## $chiptracks_ko
## [1] "data/genome_browser_tracks_chipseq_ko.csv.gz"
##
## $dist_from_ne
## [1] "data/Fig7_lacR_quantification_distance_radius.txt"
##
## $dist_from_ne_doubleko
## [1] "data/Fig7_lacR_quantification_distance_radius_doubleKO.txt"
suppressPackageStartupMessages({
library(SingleCellExperiment)
library(ggplot2)
library(cowplot)
library(dplyr)
library(ggrepel)
library(colorspace)
library(DESeq2)
library(ggh4x)
library(ggtext)
library(patchwork)
library(ggsignif)
library(kableExtra)
library(scales)
library(broom)
})
source("params/plot_settings.R")
source("params/get_testres_function.R")
source("params/mapping_functions.R")
We load a SingleCellExperiment object, containing data and results from the high-salt IP-MS experiment.
sce500 <- readRDS(params$rds500)
Next, we load two SummarizedExperiment objects with quantifications from RNA-seq experiments.
rnaseq_del <- readRDS(params$rdsrnaseq_del)
rnaseq_ko <- readRDS(params$rdsrnaseq_ko)
idmap <- read.delim(params$idmap)
## Read data with distances from nuclear envelope + cell radius and
## calculate relative distance from NE
dist_ne <- bind_rows(
read.delim(params$dist_from_ne),
read.delim(params$dist_from_ne_doubleko)) |>
group_by(Condition, Replicate) |>
filter(Condition != "wt_dKO") |>
mutate(rel_distance_from_ne = Distance_from_nuc_envelope / Radius_of_cell) |>
mutate(Condition = ifelse(Condition == "ntu1D", "ntu1\U141E",
ifelse(Condition == "ntu2D", "ntu2\U141E",
ifelse(Condition == "dKO", "ntu1\U141Entu2\U141E", Condition)))) |>
mutate(SampleID = factor(paste0(Condition, "\n", Replicate),
levels = c("wt\nrep1", "wt\nrep2", "ntu1\U141E\nrep1",
"ntu1\U141E\nrep2", "ntu2\U141E\nrep1",
"ntu2\U141E\nrep2","ntu1\U141Entu2\U141E\nrep1",
"ntu1\U141Entu2\U141E\nrep2"))) |>
dplyr::ungroup()
## Remove one cell where Distance was slightly larger than the Radius
# table(dist_ne$rel_distance_from_ne > 1)
dist_ne <- dist_ne |>
filter(rel_distance_from_ne >= 0 & rel_distance_from_ne <= 1) |>
dplyr::mutate(Condition = factor(Condition,
levels = c("wt", "ntu1\U141E", "ntu2\U141E",
"ntu1\U141Entu2\U141E")),
Replicate = factor(Replicate,
levels = c("rep1", "rep2")))
## Get zone classification
zone_thresholds <- 1 - c(1, sqrt(2), sqrt(3)) / sqrt(3)
zone_byrep <- dist_ne |>
mutate(Zone = ifelse(rel_distance_from_ne > zone_thresholds[1], "Zone III",
ifelse(rel_distance_from_ne > zone_thresholds[2],
"Zone II", "Zone I"))) |>
group_by(Condition, Replicate, Zone) |>
tally() |>
group_by(Condition, Replicate) |>
mutate(fraction_by_zone = n / sum(n)) |>
mutate(Zone = factor(Zone, levels = c("Zone I", "Zone II", "Zone III"))) |>
ungroup()
zone_byrep |>
mutate(fraction_by_zone = label_percent(accuracy = 0.01)(fraction_by_zone)) |>
rename(`Number of cells` = n,
`Percentage of cells` = fraction_by_zone) |>
kbl() |>
kable_styling()
| Condition | Replicate | Zone | Number of cells | Percentage of cells |
|---|---|---|---|---|
| wt | rep1 | Zone I | 86 | 56.95% |
| wt | rep1 | Zone II | 59 | 39.07% |
| wt | rep1 | Zone III | 6 | 3.97% |
| wt | rep2 | Zone I | 98 | 53.26% |
| wt | rep2 | Zone II | 70 | 38.04% |
| wt | rep2 | Zone III | 16 | 8.70% |
| ntu1ᐞ | rep1 | Zone I | 51 | 36.17% |
| ntu1ᐞ | rep1 | Zone II | 75 | 53.19% |
| ntu1ᐞ | rep1 | Zone III | 15 | 10.64% |
| ntu1ᐞ | rep2 | Zone I | 92 | 41.07% |
| ntu1ᐞ | rep2 | Zone II | 88 | 39.29% |
| ntu1ᐞ | rep2 | Zone III | 44 | 19.64% |
| ntu2ᐞ | rep1 | Zone I | 47 | 33.33% |
| ntu2ᐞ | rep1 | Zone II | 75 | 53.19% |
| ntu2ᐞ | rep1 | Zone III | 19 | 13.48% |
| ntu2ᐞ | rep2 | Zone I | 73 | 38.02% |
| ntu2ᐞ | rep2 | Zone II | 81 | 42.19% |
| ntu2ᐞ | rep2 | Zone III | 38 | 19.79% |
| ntu1ᐞntu2ᐞ | rep1 | Zone I | 85 | 41.46% |
| ntu1ᐞntu2ᐞ | rep1 | Zone II | 90 | 43.90% |
| ntu1ᐞntu2ᐞ | rep1 | Zone III | 30 | 14.63% |
| ntu1ᐞntu2ᐞ | rep2 | Zone I | 64 | 32.82% |
| ntu1ᐞntu2ᐞ | rep2 | Zone II | 89 | 45.64% |
| ntu1ᐞntu2ᐞ | rep2 | Zone III | 42 | 21.54% |
## Average zone distribution over replicates
zone_avg <- zone_byrep |>
dplyr::group_by(Zone, Condition) |>
dplyr::summarize(avg_fraction_by_zone = mean(fraction_by_zone),
sd_fraction_by_zone = sd(fraction_by_zone),
.groups = "drop")
## Fraction of cells in Zone I
zone_byrep |> filter(Zone == "Zone I") |>
group_by(Zone, Condition) |>
summarize(average_percentage = label_percent(accuracy = 0.01)(mean(fraction_by_zone)),
individual_percentages = paste(label_percent(accuracy = 0.01)(fraction_by_zone),
collapse = ","),
.groups = "drop") |>
rename(`Average percentage` = average_percentage,
`Percentages for replicates` = individual_percentages) |>
kbl() |>
kable_styling()
| Zone | Condition | Average percentage | Percentages for replicates |
|---|---|---|---|
| Zone I | wt | 55.11% | 56.95%,53.26% |
| Zone I | ntu1ᐞ | 38.62% | 36.17%,41.07% |
| Zone I | ntu2ᐞ | 35.68% | 33.33%,38.02% |
| Zone I | ntu1ᐞntu2ᐞ | 37.14% | 41.46%,32.82% |
## Fraction of cells in Zone III
zone_byrep |> filter(Zone == "Zone III") |>
group_by(Zone, Condition) |>
summarize(average_percentage = label_percent(accuracy = 0.01)(mean(fraction_by_zone)),
individual_percentages = paste(label_percent(accuracy = 0.01)(fraction_by_zone),
collapse = ","),
.groups = "drop") |>
rename(`Average percentage` = average_percentage,
`Percentages for replicates` = individual_percentages) |>
kbl() |>
kable_styling()
| Zone | Condition | Average percentage | Percentages for replicates |
|---|---|---|---|
| Zone III | wt | 6.33% | 3.97%,8.70% |
| Zone III | ntu1ᐞ | 15.14% | 10.64%,19.64% |
| Zone III | ntu2ᐞ | 16.63% | 13.48%,19.79% |
| Zone III | ntu1ᐞntu2ᐞ | 18.09% | 14.63%,21.54% |
## Number of cells per condition
table(dist_ne$Replicate, dist_ne$Condition)
##
## wt ntu1ᐞ ntu2ᐞ ntu1ᐞntu2ᐞ
## rep1 151 141 141 205
## rep2 184 224 192 195
get_xlabs <- function(x) {
vapply(x, function(xx) sub("\n", "<br>", paste0("*", xx, "*", "<br>(n = ",
length(which(paste0(dist_ne$Condition, "\n",
dist_ne$Replicate) == xx)), ")"), fixed = TRUE), "")
}
## Calculate p-values (average values per sample followed by linear model)
dps <- dist_ne |>
group_by(SampleID, Condition, Replicate) |>
summarize(mean_dist = mean(rel_distance_from_ne),
median_dist = median(rel_distance_from_ne),
.groups = "drop")
(aggreg <- summary(lm(median_dist ~ Condition, data = dps)) |> tidy())
## # A tibble: 4 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.171 0.0120 14.3 0.000138
## 2 Conditionntu1ᐞ 0.0467 0.0169 2.76 0.0508
## 3 Conditionntu2ᐞ 0.0486 0.0169 2.87 0.0453
## 4 Conditionntu1ᐞntu2ᐞ 0.0581 0.0169 3.43 0.0265
## wt vs ntu1D
p_ntu1 <- paste0("p==", signif(aggreg$p.value[aggreg$term == "Conditionntu1\U141E"], 3))
## wt vs ntu2D
p_ntu2 <- paste0("p==", signif(aggreg$p.value[aggreg$term == "Conditionntu2\U141E"], 3))
## wt vs ntu1Dntu2D
p_ntu12 <- paste0("p==", signif(aggreg$p.value[aggreg$term == "Conditionntu1\U141Entu2\U141E"], 3))
(p_rel_dis <- ggplot(dist_ne,
aes(x = SampleID, y = rel_distance_from_ne,
fill = Condition)) +
geom_violin(trim = TRUE, bounds = c(0, 1)) +
geom_signif(annotations = c(p_ntu1, p_ntu2, p_ntu12),
xmin = c(1.5, 1.5, 1.5), xmax = c(3.5, 5.5, 7.5),
y = c(1.05, 1.15, 1.25),
tip_length = 0, step_increase = 0.1, parse = TRUE) +
geom_boxplot(width = 0.1, fill = "white") +
labs(x = "", y = "Distance from NE relative to radius") +
scale_fill_manual(values = c(na_color, main_colors[5], main_colors[1],
main_colors[3]),
name = "Condition") +
scale_x_discrete(labels = get_xlabs) +
scale_y_continuous(breaks = c(0, 0.5, 1), limits = c(0, 1.3)) +
theme_cowplot() +
theme(axis.text.x = element_markdown(size = 8),
legend.text = element_text(face = "italic")))
get_xlabs_bar <- function(x) {
vapply(x, function(xx) sub("\n", "<br>", paste0("*", xx, "*", "<br>(n = ",
length(which(dist_ne$Condition == xx)), ")"), fixed = TRUE), "")
}
(p_zone <- ggplot(zone_avg, aes(fill = Zone, y = 100 * avg_fraction_by_zone,
x = Condition)) +
geom_bar(position = "stack", stat = "identity") +
geom_text(aes(label = paste0(round(100 * avg_fraction_by_zone, 2), "%")),
position = position_stack(vjust = 0.5), size = 4) +
labs(x = "", y = "Relative zone distribution [%]") +
scale_fill_manual(values = c(main_colors[1], main_colors[3], main_colors[5]),
name = "") +
scale_x_discrete(labels = get_xlabs_bar) +
theme_cowplot() +
theme(axis.text.x = element_markdown()))
Here we extract and visualize the t-statistics for all proteins in the ntu1 and ntu2 high-salt IP-MS experiments. In each case the condition of interest is compared to a broad complement group consisting of all other high-salt IP-MS samples.
## Extract t-statistics for the two comparisons
rdhigh <- rowData(sce500)
contrasts_high <- c(
"Ntu1_500_plate_vs_compl_Ntu1_500_plate.t",
"Ntu2_500_plate_vs_compl_Ntu2_500_plate.t")
high <- as.data.frame(rdhigh[, c("einprotId", contrasts_high)]) |>
setNames(c("einprotId", "Ntu1", "Ntu2"))
## Plot t-statistics
(gg_ipdimer <- ggplot(high, aes(x = Ntu1, y = Ntu2)) +
geom_abline(data = high |>
dplyr::filter(einprotId %in% c("Ntu1", "Ntu2")),
aes(slope = mean(Ntu2 / Ntu1), intercept = 0),
linetype = "dashed", color = "grey50") +
geom_text_repel(data = high |>
dplyr::filter(einprotId %in% c("Ntu1", "Ntu2") |
Ntu2 > 5),
aes(label = einprotId), min.segment.length = 0) +
geom_point(alpha = 0.25) +
geom_point(data = high |>
dplyr::filter(einprotId %in% c("Ntu1", "Ntu2")) |>
dplyr::mutate(type = "Baits"),
mapping = aes(fill = type), color = main_colors[3],
shape = 21, alpha = 1, size = 2) +
scale_fill_manual(values = c(Baits = main_colors[3])) +
theme_cowplot() +
labs(x = "500 mM NaCl mod. t-stat. (Ntu1)",
y = "500 mM NaCl mod. t-stat. (Ntu2)") +
theme(strip.text = element_blank(),
legend.position = "inside",
legend.position.inside = c(0.8, 0.1)) +
guides(fill = guide_legend(nrow = 1, byrow = TRUE, title = "")))
wt_tpm <- assay(rnaseq_del[, rnaseq_del$Group_name == "wt_WT_mRNA"], "abundance")
genepos <- rowRanges(rnaseq_del)
mcols(genepos) <- NULL
genepos <- as.data.frame(genepos)
stopifnot(rownames(wt_tpm) == rownames(genepos))
genepos$ave_tpm <- rowMeans(wt_tpm)
genepos$id <- rownames(genepos)
chrom <- genepos["tna1", "seqnames"]
startpos <- genepos["tna1", "start"]
endpos <- genepos["tna1", "end"]
windowsize <- 30000
(genomeplot_tpm <-
ggplot(genepos |>
dplyr::filter(seqnames == chrom &
start > startpos - windowsize &
end < endpos + windowsize),
aes(x = start / 1e6, y = ave_tpm, label = id)) +
geom_hline(yintercept = 0, color = na_color) +
geom_point(size = 3, alpha = 0.8, aes(color = id == "tna1")) +
scale_color_manual(values = c(`TRUE` = main_colors[4], `FALSE` = pt_color)) +
geom_text_repel(min.segment.length = 0, max.overlaps = Inf, size = 3,
force = 3) +
labs(x = "Genome position (Mb)", y = "Gene expression (TPM)") +
scale_y_continuous(expand = expansion(mult = c(0, 0), add = c(200, 50))) +
theme_cowplot() +
theme(legend.position = "none"))
## Filter lowly expressed genes and create DESeqDataSet
rnaseqfilt_ko <- rnaseq_ko[rowSums(assay(rnaseq_ko, "counts") >= 5) >= 2, ]
dds <- DESeq2::DESeqDataSet(rnaseqfilt_ko, design = ~ MappedGene)
dds <- DESeq2::estimateSizeFactors(dds)
## Filter genes with high inferential variance
highInfRV <- 10^(-0.75)
dds <- dds[rowData(dds)$meanInfRV < highInfRV, ]
## Perform differential expression analysis
dds <- DESeq2::DESeq(dds)
groups <- setdiff(dds$MappedGene, "wt")
names(groups) <- groups
resList <- lapply(groups, function(gr) {
DESeq2::results(dds, contrast = c("MappedGene", gr, "wt"),
alpha = 0.05)
})
## Create MA plots
rngY <- range(sapply(resList, function(rl) rl$log2FoldChange))
rngX <- range(sapply(resList, function(rl) log10(rl$baseMean)))
maplots <- lapply(structure(names(resList), names = names(resList)), function(nm) {
ggplot(as.data.frame(resList[[nm]]),
aes(x = log10(baseMean), y = log2FoldChange)) +
geom_point(mapping = aes(color = abs(log2FoldChange) > 3 &
padj < 0.001,
alpha = abs(log2FoldChange) > 3 &
padj < 0.001)) +
scale_color_manual(values = c(`TRUE` = main_colors[4], `FALSE` = pt_color)) +
scale_alpha_manual(values = c(`TRUE` = 1, `FALSE` = 0.25)) +
coord_cartesian(xlim = rngX, ylim = rngY) +
geom_text_repel(data = as.data.frame(resList[[nm]]) |>
tibble::rownames_to_column("gene") |>
dplyr::filter(gene == nm |
(log2FoldChange > 4.5 & padj < 0.05)),
aes(label = gene),
min.segment.length = 0, max.overlaps = Inf) +
theme_cowplot() +
labs(x = "Mean expression", y = "log2(fold-change)",
title = paste0(nm, "\U141E")) +
theme(legend.position = "none",
plot.title = element_text(face = "italic"))
})
cowplot::plot_grid(
plotlist = maplots, nrow = 1, align = "hv", axis = "b"
)
## Correlation between log2 fold changes for ntu1/ntu2
stopifnot(all(rownames(resList$ntu1) == rownames(resList$ntu2)))
dfcorr <- data.frame(gene = rownames(resList$ntu1),
ntu1 = resList$ntu1$log2FoldChange,
ntu2 = resList$ntu2$log2FoldChange)
(corrplot <- list(
ggplot(dfcorr, aes(x = ntu1, y = ntu2)) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed",
color = na_color) +
geom_point(alpha = 0.25) +
geom_text_repel(data = dfcorr |>
dplyr::filter(gene %in% c("tna1", "SPCC576.17c")),
aes(label = gene),
min.segment.length = 0, max.overlaps = Inf) +
labs(x = "*ntu1\U141E* log2(fold-change)",
y = "*ntu2\U141E* log2(fold-change)") +
theme_cowplot() +
theme(axis.title = element_markdown())
))
## [[1]]
## Add information about gene location to result lists
genepos <- rowRanges(dds)
mcols(genepos) <- NULL
genepos <- as.data.frame(genepos)
resList <- lapply(resList, function(rl) {
stopifnot(rownames(rl) == rownames(genepos))
cbind(rl, genepos) |>
as.data.frame() |>
tibble::rownames_to_column("gene")
})
## Subset to region of interest
chrom <- genepos["tna1", "seqnames"]
startpos <- genepos["tna1", "start"]
endpos <- genepos["tna1", "end"]
windowsize <- 30000
resListToPlot <- lapply(resList, function(rl) {
rl |> dplyr::filter(seqnames == chrom &
start > startpos - windowsize &
end < endpos + windowsize)
})
plotList <- lapply(names(resListToPlot), function(nm) {
ggplot(resListToPlot[[nm]],
aes(x = start, y = log2FoldChange,
color = abs(log2FoldChange) > 3 & padj < 0.001)) +
geom_rect(aes(xmin = endpos, xmax = startpos, ymin = -Inf, ymax = Inf),
color = lighten(main_colors[4], amount = 0.6), fill = NA,
alpha = 0.1, show.legend = FALSE) +
geom_point(alpha = 0.6, size = 2) +
scale_color_manual(values = c(`FALSE` = na_color,
`TRUE` = main_colors[4])) +
geom_text_repel(aes(label = ifelse(abs(log2FoldChange) > 3 & padj < 0.001,
as.character(gene), "")),
size = 2.5, max.overlaps = 30, force = 10) +
scale_x_continuous(labels = scales::label_number(scale = 1e-6)) +
scale_y_continuous(limits = c(-7, 7)) +
labs(x = "Genome position (Mb)", y = "log2(fold-change)",
title = paste0(nm, "\U141E")) +
theme_cowplot() +
theme(legend.position = "none",
plot.title = element_text(face = "italic"))
})
genomeplot <- cowplot::plot_grid(plotlist = plotList[c(2, 1)], ncol = 1)
covs_rna <- read.csv(params$rnatracks_ko) |>
left_join(data.frame(sample = colnames(rnaseq_ko),
libsize = colSums(assay(rnaseq_ko, "counts")))) |>
mutate(cpm = score / libsize * 1e6)
covs_chip <- read.csv(params$chiptracks_ko)
gtf <- rtracklayer::import(params$gtf)
goi <- c("med6", "SPAC1002.16c", "urg2", "urg3", "SPCC576.17c", "SPNCRNA.1263")
gtf <- subset(gtf, (gene_id %in% goi | gene_name %in% goi) & type == "gene") |>
as.data.frame() |>
left_join(idmap, by = join_by("gene_id" == "gene_stable_id")) |>
dplyr::select(seqnames, start, end, strand, unique_einprot_id) |>
mutate(y = ifelse(unique_einprot_id == "SPCC576.17c", 0, 1)) |>
mutate(start2 = ifelse(strand == "+", start, end),
end2 = ifelse(strand == "+", end, start)) |>
mutate(unique_einprot_id_label = ifelse(grepl("^SP", unique_einprot_id),
unique_einprot_id,
paste0("*", unique_einprot_id, "\U207A*")))
## Create data frames for plotting
plotdf_chip1 <- covs_chip |>
mutate(sample = sub("_rep[0-9]", "", sample)) |>
mutate(exptype = stringr::str_extract(sample, "ChIP|IN")) |>
mutate(pair = sub("_ChIP|_IN", "", sample)) |>
mutate(flag = .capitalize(sub("_*TAG", "", sub(".*_KO_", "", pair)))) |>
mutate(label = ifelse(pair == "ntu1_KO_ntu2TAG", "*ntu1\U141E*",
ifelse(pair == "ntu1_TAG", "*ntu2\U207A*",
ifelse(pair == "ntu2_TAG", "*ntu1\U207A*",
ifelse(pair == "ntu2_KO_ntu1TAG", "*ntu2\U141E*",
""))))) |>
mutate(cpm = score) |>
mutate(pos = start) |>
group_by(flag, seqnames) |>
mutate(cpmnorm = cpm / max(abs(cpm))) |>
ungroup() |>
filter(pair %in% c("ntu2_TAG", "ntu2_KO_ntu1TAG",
"ntu1_TAG", "ntu1_KO_ntu2TAG")) |>
mutate(pair = factor(pair, levels = c("ntu1_TAG", "ntu2_KO_ntu1TAG",
"ntu2_TAG", "ntu1_KO_ntu2TAG")))
table(plotdf_chip1$sample, plotdf_chip1$flag)
##
## Ntu1 Ntu2
## ntu1_KO_ntu2TAG_ChIP 0 14643
## ntu1_KO_ntu2TAG_IN 0 14643
## ntu1_TAG_ChIP 14643 0
## ntu1_TAG_IN 14643 0
## ntu2_KO_ntu1TAG_ChIP 14643 0
## ntu2_KO_ntu1TAG_IN 14643 0
## ntu2_TAG_ChIP 0 14643
## ntu2_TAG_IN 0 14643
table(plotdf_chip1$sample, plotdf_chip1$label)
##
## *ntu1⁺* *ntu1ᐞ* *ntu2⁺* *ntu2ᐞ*
## ntu1_KO_ntu2TAG_ChIP 0 14643 0 0
## ntu1_KO_ntu2TAG_IN 0 14643 0 0
## ntu1_TAG_ChIP 0 0 14643 0
## ntu1_TAG_IN 0 0 14643 0
## ntu2_KO_ntu1TAG_ChIP 0 0 0 14643
## ntu2_KO_ntu1TAG_IN 0 0 0 14643
## ntu2_TAG_ChIP 14643 0 0 0
## ntu2_TAG_IN 14643 0 0 0
plotdf_rna <- covs_rna |>
filter(sample %in% c("SPBC530.08_KO_mRNA_rep1",
"SPBC16G5.16_KO_mRNA_rep1",
"wt_KO_mRNA_rep1")) |>
mutate(sample = sub("_KO_mRNA_rep[0-9]", "", sample)) |>
mutate(sample = replace(sample, sample == "SPBC16G5.16", "ntu1\U141E")) |>
mutate(sample = replace(sample, sample == "SPBC530.08", "ntu2\U141E")) |>
mutate(gene = sample) |>
group_by(seqnames) |>
mutate(cpmnorm = cpm / max(abs(cpm))) |>
ungroup() |>
mutate(sample = factor(sample,
levels = c("wt", "ntu1\U141E", "ntu2\U141E")))
plotdf_label <- covs_rna |>
filter(sample == "wt_KO_mRNA_rep1") |>
mutate(cpmnorm = 0)
.get_empty_label <- function(x) {
c("ntu1_TAG" = "", "ntu2_KO_ntu1TAG" = "",
"ntu2_TAG" = "", "ntu1_KO_ntu2TAG" = "")
}
.add_space_to_label <- function(x) {
paste(x, " ")
}
maxIII <- max(plotdf_label$pos[plotdf_label$seqnames == "III"])
g1c <- ggplot(plotdf_label,
aes(x = pos, y = cpmnorm)) +
geom_line(color = "white") +
geom_segment(x = maxIII - 2000, xend = maxIII,
y = 0, yend = 0, color = "black") +
geom_text(data = data.frame(seqnames = "III",
x = maxIII - 1000,
y = 0,
label = "2kb"),
aes(x = x, y = y, label = label),
vjust = -1) +
facet_grid(~ seqnames, scales = "free_x", space = "free_x")
g1a <- ggplot(plotdf_chip1 |>
mutate(seqnames = paste0("chr", seqnames)),
aes(x = pos, y = cpmnorm)) +
geom_area(aes(group = exptype, fill = interaction(exptype, flag))) +
geom_richtext(data = plotdf_chip1 |>
mutate(seqnames = paste0("chr", seqnames)) |>
select(flag, pair, seqnames, pos, label) |>
filter(seqnames == "chrI") |>
filter(pos == min(pos)),
aes(x = pos, y = 0.65, label = label),
fill = NA, label.color = NA,
label.padding = grid::unit(rep(0, 4), "pt"),
hjust = 0, size = 4) +
scale_fill_manual(values = c(`ChIP.Ntu1` = main_colors[5],
`IN.Ntu1` = na_color,
`ChIP.Ntu2` = main_colors[1],
`IN.Ntu2` = na_color)) +
facet_nested(flag + pair ~ seqnames, scale = "free_x", space = "free_x",
switch = "y",
labeller = labeller(pair = .get_empty_label,
flag = .add_space_to_label),
nest_line = element_line(),
strip = strip_nested(size = "variable")) +
scale_y_continuous(limits = c(0, 0.9), expand = c(0, 0)) +
labs(y = "ChIP-seq")
g1b <- ggplot(plotdf_rna,
aes(x = pos, y = cpmnorm)) +
geom_area(aes(group = strand, fill = gene)) +
scale_fill_manual(values = c(wt = "black",
"ntu1\U141E" = main_colors[5],
"ntu2\U141E" = main_colors[1])) +
facet_grid(sample ~ seqnames, scale = "free_x", space = "free_x",
switch = "y") +
scale_y_continuous(limits = c(-0.9, 0.3), expand = c(0, 0)) +
labs(y = "RNA-seq")
g2 <- ggplot(gtf, aes(x = start2, xend = end2, y = y, yend = y,
color = unique_einprot_id %in% c("tna1", "SPCC576.17c"),
label = unique_einprot_id_label)) +
geom_segment(arrow = grid::arrow(length = unit(0.05, "inches")),
linewidth = 2) +
geom_richtext(
aes(x = (start2 + end2) / 2, y = y - 0.5),
fill = NA, label.color = NA,
label.padding = grid::unit(rep(0, 4), "pt"),
size = 4
) +
scale_color_manual(values = c(`FALSE` = "black", `TRUE` = main_colors[4])) +
facet_grid(~ seqnames, scales = "free_x", space = "free_x") +
ylim(-0.75, 1.5)
gcov <- cowplot::plot_grid(
g1c + theme_void() +
theme(strip.text = element_blank()),
g1a + theme_void() +
theme(legend.position = "none",
strip.text.y = element_text(size = 12),
strip.text.x = element_text(size = 12, hjust = 0.95),
axis.title.y = element_text(size = 12, angle = 90,
margin = margin(t = 0, r = 10, b = 0, l = 0))),
NULL,
g1b + theme_void() +
theme(strip.text.y = element_text(size = 12,
face = "italic"),
legend.position = "none",
strip.text.x = element_blank(),
axis.title.y = element_text(size = 12, angle = 90,
margin = margin(t = 0, r = 15, b = 0, l = 0))),
g2 + theme_void() +
theme(legend.position = "none",
strip.text = element_blank()),
ncol = 1, rel_heights = c(0.75, 2, 0.5, 2, 1),
align = "v", axis = "lr")
(gcov2 <- cowplot::plot_grid(
g1c + theme_bw() +
theme(axis.text.x = element_text(size = 5)),
g1a + theme_bw() +
theme(legend.position = "none",
axis.text.x = element_text(size = 5)),
g1b + theme_bw() +
theme(legend.position = "none",
axis.text.x = element_text(size = 5)),
g2 + theme_bw() +
theme(legend.position = "none",
axis.text.x = element_text(size = 5)),
ncol = 1, rel_heights = c(0.75, 2, 2, 1),
align = "v", axis = "lr"))
fig7b <- ggdraw() +
draw_image("schematics/Fig7B_Nattou_protein_domains_cartoon_Arial.png")
fig7e <- ggdraw() +
draw_image("schematics/Fig7E_Nattou_lacR_cartoon_images_Arial.png")
fig7f <- ggdraw() +
draw_image("schematics/Fig7F_Nattou_lacR_microscopy_images_Arial.png")
fig7c <- wrap_plots(maplots[c(2, 1)], ncol = 2)
cowplot::plot_grid(
cowplot::plot_grid(
gg_ipdimer,
fig7c,
nrow = 1,
rel_widths = c(1, 2),
labels = c("A", "C"),
align = "h", axis = "bt"
),
NULL,
cowplot::plot_grid(
fig7b,
gcov,
rel_widths = c(1, 1),
labels = c("B", "D")
),
NULL,
cowplot::plot_grid(
fig7e,
NULL,
fig7f,
NULL,
p_zone + theme(legend.position = "top"),
labels = c("E", "", "F", "", "G"), vjust = 2,
rel_widths = c(6, 0.75, 1.2, 0.75, 10.4), nrow = 1
),
ncol = 1, rel_heights = c(1, 0.1, 1, 0.1, 1)
)
plotdf_chip2 <- covs_chip |>
mutate(sample = sub("_rep[0-9]", "", sample)) |>
mutate(exptype = stringr::str_extract(sample, "ChIP|IN")) |>
mutate(pair = sub("_ChIP|_IN", "", sample)) |>
mutate(flag = sub("_*TAG", "", sub(".*_KO_", "", pair))) |>
mutate(label = ifelse(pair == "ntu2_delTMD", "delTMD",
ifelse(pair == "ntu2_TAG", "ntu2+", ""))) |>
mutate(cpm = score) |>
mutate(pos = start) |>
group_by(seqnames) |>
mutate(cpmnorm = cpm / max(abs(cpm))) |>
ungroup() |>
filter(pair %in% c("ntu2_TAG", "ntu2_delTMD")) |>
mutate(pair = ifelse(pair == "ntu2_delTMD", "Ntu2\U141E\U1D40\U1D39\U1D30",
ifelse(pair == "ntu2_TAG", "Ntu2", pair))) |>
mutate(pair = factor(pair, levels = c("Ntu2", "Ntu2\U141E\U1D40\U1D39\U1D30")))
table(plotdf_chip2$sample, plotdf_chip2$flag)
##
## ntu2 ntu2_delTMD
## ntu2_delTMD_ChIP 0 14643
## ntu2_delTMD_IN 0 14643
## ntu2_TAG_ChIP 14643 0
## ntu2_TAG_IN 14643 0
table(plotdf_chip2$sample, plotdf_chip2$pair)
##
## Ntu2 Ntu2ᐞᵀᴹᴰ
## ntu2_delTMD_ChIP 0 14643
## ntu2_delTMD_IN 0 14643
## ntu2_TAG_ChIP 14643 0
## ntu2_TAG_IN 14643 0
gtr <- ggplot(plotdf_chip2 |>
mutate(seqnames = paste0("chr", seqnames)),
aes(x = pos, y = cpmnorm)) +
geom_area(aes(group = exptype, fill = interaction(exptype, flag))) +
scale_fill_manual(values = c(ChIP.ntu2_delTMD = main_colors[3],
IN.ntu2_delTMD = na_color,
ChIP.ntu2 = main_colors[1],
IN.ntu2 = na_color)) +
facet_grid(pair ~ seqnames, scale = "free_x",
space = "free_x", switch = "y") +
scale_y_continuous(limits = c(0, 0.9), expand = c(0, 0))
gcovdel <- cowplot::plot_grid(
gtr + theme_void() +
theme(strip.text.y = element_text(size = 12, vjust = 0.05),
legend.position = "none",
strip.text.x = element_text(size = 12, hjust = 0.95)),
g2 + theme_void() +
theme(legend.position = "none",
strip.text = element_blank()),
ncol = 1, rel_heights = c(2, 1),
align = "v", axis = "lr")
figs7a <- ggdraw() +
draw_image("schematics/FigS7A_Nattou_AF2_fl_Arial.png")
cowplot::plot_grid(
figs7a,
NULL,
cowplot::plot_grid(
p_rel_dis,
cowplot::plot_grid(genomeplot_tpm, NULL,
ncol = 1, rel_heights = c(1, 0.075)),
nrow = 1,
rel_widths = c(1, 1),
labels = c("B", "C"),
vjust = 0.5
),
NULL,
cowplot::plot_grid(
genomeplot,
gcovdel,
nrow = 1,
rel_widths = c(1, 1),
labels = c("D", "E"),
vjust = 0.5
),
labels = c("A", "", ""),
ncol = 1, rel_heights = c(1, 0.1, 1, 0.05, 1)
)
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: FlexiBLAS OPENBLAS; LAPACK version 3.10.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Zurich
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] broom_1.0.5 scales_1.3.0
## [3] kableExtra_1.4.0 ggsignif_0.6.4
## [5] patchwork_1.2.0 ggtext_0.1.2
## [7] ggh4x_0.2.8 DESeq2_1.42.1
## [9] colorspace_2.1-0 ggrepel_0.9.5
## [11] dplyr_1.1.4 cowplot_1.1.3
## [13] ggplot2_3.5.0 SingleCellExperiment_1.24.0
## [15] SummarizedExperiment_1.32.0 Biobase_2.62.0
## [17] GenomicRanges_1.54.1 GenomeInfoDb_1.38.8
## [19] IRanges_2.36.0 S4Vectors_0.40.2
## [21] BiocGenerics_0.48.1 MatrixGenerics_1.14.0
## [23] matrixStats_1.3.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 viridisLite_0.4.2 farver_2.1.1
## [4] Biostrings_2.70.3 bitops_1.0-7 fastmap_1.1.1
## [7] RCurl_1.98-1.14 GenomicAlignments_1.38.2 XML_3.99-0.16.1
## [10] digest_0.6.35 lifecycle_1.0.4 magrittr_2.0.3
## [13] compiler_4.3.2 rlang_1.1.3 sass_0.4.9
## [16] tools_4.3.2 utf8_1.2.4 yaml_2.3.8
## [19] rtracklayer_1.62.0 knitr_1.46 labeling_0.4.3
## [22] S4Arrays_1.2.1 DelayedArray_0.28.0 xml2_1.3.6
## [25] abind_1.4-5 BiocParallel_1.36.0 withr_3.0.0
## [28] purrr_1.0.2 grid_4.3.2 fansi_1.0.6
## [31] cli_3.6.2 rmarkdown_2.26 crayon_1.5.2
## [34] generics_0.1.3 rstudioapi_0.16.0 rjson_0.2.21
## [37] commonmark_1.9.1 cachem_1.0.8 stringr_1.5.1
## [40] zlibbioc_1.48.2 parallel_4.3.2 restfulr_0.0.15
## [43] XVector_0.42.0 vctrs_0.6.5 Matrix_1.6-5
## [46] jsonlite_1.8.8 magick_2.8.3 systemfonts_1.0.6
## [49] locfit_1.5-9.9 jquerylib_0.1.4 tidyr_1.3.1
## [52] glue_1.7.0 codetools_0.2-19 stringi_1.8.3
## [55] gtable_0.3.5 BiocIO_1.12.0 munsell_0.5.1
## [58] tibble_3.2.1 pillar_1.9.0 htmltools_0.5.8.1
## [61] GenomeInfoDbData_1.2.11 R6_2.5.1 evaluate_0.23
## [64] lattice_0.22-6 markdown_1.12 highr_0.10
## [67] Rsamtools_2.18.0 backports_1.4.1 gridtext_0.1.5
## [70] bslib_0.7.0 Rcpp_1.0.12 svglite_2.1.3
## [73] SparseArray_1.2.4 xfun_0.43 pkgconfig_2.0.3