Parameter values

params

## $gtf
## [1] "reference/Schizosaccharomyces_pombe.ASM294v2.55.gtf"
## 
## $idmap
## [1] "data/id_mapping_table.txt"
## 
## $rds500
## [1] "data/ipms_500_sce.rds"
## 
## $rdsrnaseq_del
## [1] "data/rnaseq_del.rds"
## 
## $rdsrnaseq_ko
## [1] "data/rnaseq_ko.rds"
## 
## $rnatracks_ko
## [1] "data/genome_browser_tracks_rnaseq_ko.csv.gz"
## 
## $chiptracks_ko
## [1] "data/genome_browser_tracks_chipseq_ko.csv.gz"
## 
## $dist_from_ne
## [1] "data/Fig7_lacR_quantification_distance_radius.txt"
## 
## $dist_from_ne_doubleko
## [1] "data/Fig7_lacR_quantification_distance_radius_doubleKO.txt"

Load required packages and helper functions

suppressPackageStartupMessages({
    library(SingleCellExperiment)
    library(ggplot2)
    library(cowplot)
    library(dplyr)
    library(ggrepel)
    library(colorspace)
    library(DESeq2)
    library(ggh4x)
    library(ggtext)
    library(patchwork)
    library(ggsignif)
    library(kableExtra)
    library(scales)
    library(broom)
})

source("params/plot_settings.R")
source("params/get_testres_function.R")
source("params/mapping_functions.R")

Read data

We load a SingleCellExperiment object, containing data and results from the high-salt IP-MS experiment.

sce500 <- readRDS(params$rds500)

Next, we load two SummarizedExperiment objects with quantifications from RNA-seq experiments.

rnaseq_del <- readRDS(params$rdsrnaseq_del)
rnaseq_ko <- readRDS(params$rdsrnaseq_ko)
idmap <- read.delim(params$idmap)

Figure 7

Zone quantification

## Read data with distances from nuclear envelope + cell radius and 
## calculate relative distance from NE
dist_ne <- bind_rows(
    read.delim(params$dist_from_ne),
    read.delim(params$dist_from_ne_doubleko)) |>
    group_by(Condition, Replicate) |>
    filter(Condition != "wt_dKO") |>
    mutate(rel_distance_from_ne = Distance_from_nuc_envelope / Radius_of_cell) |>
    mutate(Condition = ifelse(Condition == "ntu1D", "ntu1\U141E", 
                              ifelse(Condition == "ntu2D", "ntu2\U141E", 
                                     ifelse(Condition == "dKO", "ntu1\U141Entu2\U141E", Condition)))) |>
    mutate(SampleID = factor(paste0(Condition, "\n", Replicate),
                             levels = c("wt\nrep1", "wt\nrep2", "ntu1\U141E\nrep1",
                                        "ntu1\U141E\nrep2", "ntu2\U141E\nrep1",
                                        "ntu2\U141E\nrep2","ntu1\U141Entu2\U141E\nrep1",
                                        "ntu1\U141Entu2\U141E\nrep2"))) |>
    dplyr::ungroup()

## Remove one cell where Distance was slightly larger than the Radius
# table(dist_ne$rel_distance_from_ne > 1)
dist_ne <- dist_ne |>
    filter(rel_distance_from_ne >= 0 & rel_distance_from_ne <= 1) |>
    dplyr::mutate(Condition = factor(Condition, 
                                     levels = c("wt", "ntu1\U141E", "ntu2\U141E",
                                                "ntu1\U141Entu2\U141E")),
                  Replicate = factor(Replicate, 
                                     levels = c("rep1", "rep2")))

## Get zone classification
zone_thresholds <- 1 - c(1, sqrt(2), sqrt(3)) / sqrt(3)
zone_byrep <- dist_ne |>
    mutate(Zone = ifelse(rel_distance_from_ne > zone_thresholds[1], "Zone III", 
                         ifelse(rel_distance_from_ne > zone_thresholds[2], 
                                "Zone II", "Zone I"))) |>
    group_by(Condition, Replicate, Zone) |>
    tally() |>
    group_by(Condition, Replicate) |>
    mutate(fraction_by_zone = n / sum(n)) |>
    mutate(Zone = factor(Zone, levels = c("Zone I", "Zone II", "Zone III"))) |>
    ungroup()
zone_byrep |>
    mutate(fraction_by_zone = label_percent(accuracy = 0.01)(fraction_by_zone)) |>
    rename(`Number of cells` = n, 
           `Percentage of cells` = fraction_by_zone) |>
    kbl() |>
    kable_styling()

Condition	Replicate	Zone	Number of cells	Percentage of cells
wt	rep1	Zone I	86	56.95%
wt	rep1	Zone II	59	39.07%
wt	rep1	Zone III	6	3.97%
wt	rep2	Zone I	98	53.26%
wt	rep2	Zone II	70	38.04%
wt	rep2	Zone III	16	8.70%
ntu1ᐞ	rep1	Zone I	51	36.17%
ntu1ᐞ	rep1	Zone II	75	53.19%
ntu1ᐞ	rep1	Zone III	15	10.64%
ntu1ᐞ	rep2	Zone I	92	41.07%
ntu1ᐞ	rep2	Zone II	88	39.29%
ntu1ᐞ	rep2	Zone III	44	19.64%
ntu2ᐞ	rep1	Zone I	47	33.33%
ntu2ᐞ	rep1	Zone II	75	53.19%
ntu2ᐞ	rep1	Zone III	19	13.48%
ntu2ᐞ	rep2	Zone I	73	38.02%
ntu2ᐞ	rep2	Zone II	81	42.19%
ntu2ᐞ	rep2	Zone III	38	19.79%
ntu1ᐞntu2ᐞ	rep1	Zone I	85	41.46%
ntu1ᐞntu2ᐞ	rep1	Zone II	90	43.90%
ntu1ᐞntu2ᐞ	rep1	Zone III	30	14.63%
ntu1ᐞntu2ᐞ	rep2	Zone I	64	32.82%
ntu1ᐞntu2ᐞ	rep2	Zone II	89	45.64%
ntu1ᐞntu2ᐞ	rep2	Zone III	42	21.54%

## Average zone distribution over replicates
zone_avg <- zone_byrep |> 
    dplyr::group_by(Zone, Condition) |> 
    dplyr::summarize(avg_fraction_by_zone = mean(fraction_by_zone), 
                     sd_fraction_by_zone = sd(fraction_by_zone), 
                     .groups = "drop")

## Fraction of cells in Zone I
zone_byrep |> filter(Zone == "Zone I") |>
    group_by(Zone, Condition) |>
    summarize(average_percentage = label_percent(accuracy = 0.01)(mean(fraction_by_zone)), 
              individual_percentages = paste(label_percent(accuracy = 0.01)(fraction_by_zone), 
                                             collapse = ","), 
              .groups = "drop") |>
    rename(`Average percentage` = average_percentage,
           `Percentages for replicates` = individual_percentages) |>
    kbl() |>
    kable_styling()

Zone	Condition	Average percentage	Percentages for replicates
Zone I	wt	55.11%	56.95%,53.26%
Zone I	ntu1ᐞ	38.62%	36.17%,41.07%
Zone I	ntu2ᐞ	35.68%	33.33%,38.02%
Zone I	ntu1ᐞntu2ᐞ	37.14%	41.46%,32.82%

## Fraction of cells in Zone III
zone_byrep |> filter(Zone == "Zone III") |>
    group_by(Zone, Condition) |>
    summarize(average_percentage = label_percent(accuracy = 0.01)(mean(fraction_by_zone)), 
              individual_percentages = paste(label_percent(accuracy = 0.01)(fraction_by_zone), 
                                             collapse = ","), 
              .groups = "drop") |>
    rename(`Average percentage` = average_percentage,
           `Percentages for replicates` = individual_percentages) |>
    kbl() |>
    kable_styling()

Zone	Condition	Average percentage	Percentages for replicates
Zone III	wt	6.33%	3.97%,8.70%
Zone III	ntu1ᐞ	15.14%	10.64%,19.64%
Zone III	ntu2ᐞ	16.63%	13.48%,19.79%
Zone III	ntu1ᐞntu2ᐞ	18.09%	14.63%,21.54%

Plot relative distance to perimeter in all conditions

## Number of cells per condition
table(dist_ne$Replicate, dist_ne$Condition)

##       
##         wt ntu1ᐞ ntu2ᐞ ntu1ᐞntu2ᐞ
##   rep1 151   141   141        205
##   rep2 184   224   192        195

get_xlabs <- function(x) {
    vapply(x, function(xx) sub("\n", "<br>", paste0("*", xx, "*", "<br>(n = ", 
                                                    length(which(paste0(dist_ne$Condition, "\n", 
                                                                        dist_ne$Replicate) == xx)), ")"), fixed = TRUE), "")
}

## Calculate p-values (average values per sample followed by linear model)
dps <- dist_ne |>
    group_by(SampleID, Condition, Replicate) |>
    summarize(mean_dist = mean(rel_distance_from_ne),
              median_dist = median(rel_distance_from_ne),
              .groups = "drop")
(aggreg <- summary(lm(median_dist ~ Condition, data = dps)) |> tidy())

## # A tibble: 4 × 5
##   term                estimate std.error statistic  p.value
##   <chr>                  <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)           0.171     0.0120     14.3  0.000138
## 2 Conditionntu1ᐞ        0.0467    0.0169      2.76 0.0508  
## 3 Conditionntu2ᐞ        0.0486    0.0169      2.87 0.0453  
## 4 Conditionntu1ᐞntu2ᐞ   0.0581    0.0169      3.43 0.0265

## wt vs ntu1D
p_ntu1 <- paste0("p==", signif(aggreg$p.value[aggreg$term == "Conditionntu1\U141E"], 3))

## wt vs ntu2D
p_ntu2 <- paste0("p==", signif(aggreg$p.value[aggreg$term == "Conditionntu2\U141E"], 3))

## wt vs ntu1Dntu2D
p_ntu12 <- paste0("p==", signif(aggreg$p.value[aggreg$term == "Conditionntu1\U141Entu2\U141E"], 3))

(p_rel_dis <- ggplot(dist_ne, 
                     aes(x = SampleID, y = rel_distance_from_ne, 
                         fill = Condition)) + 
        geom_violin(trim = TRUE, bounds = c(0, 1)) +
        geom_signif(annotations = c(p_ntu1, p_ntu2, p_ntu12),
                    xmin = c(1.5, 1.5, 1.5), xmax = c(3.5, 5.5, 7.5), 
                    y = c(1.05, 1.15, 1.25),
                    tip_length = 0, step_increase = 0.1, parse = TRUE) + 
        geom_boxplot(width = 0.1, fill = "white") +
        labs(x = "", y = "Distance from NE relative to radius") + 
        scale_fill_manual(values = c(na_color, main_colors[5], main_colors[1],
                                     main_colors[3]),
                          name = "Condition") + 
        scale_x_discrete(labels = get_xlabs) +
        scale_y_continuous(breaks = c(0, 0.5, 1), limits = c(0, 1.3)) + 
        theme_cowplot() + 
        theme(axis.text.x = element_markdown(size = 8),
              legend.text = element_text(face = "italic")))

Plot zone distribution by condition

get_xlabs_bar <- function(x) {
    vapply(x, function(xx) sub("\n", "<br>", paste0("*", xx, "*", "<br>(n = ", 
                                                    length(which(dist_ne$Condition == xx)), ")"), fixed = TRUE), "")
}

(p_zone <- ggplot(zone_avg, aes(fill = Zone, y = 100 * avg_fraction_by_zone, 
                                x = Condition)) + 
     geom_bar(position = "stack", stat = "identity") +
     geom_text(aes(label = paste0(round(100 * avg_fraction_by_zone, 2), "%")), 
                   position = position_stack(vjust = 0.5), size = 4) +
     labs(x = "", y = "Relative zone distribution [%]") +
     scale_fill_manual(values = c(main_colors[1], main_colors[3], main_colors[5]),
                       name = "") + 
     scale_x_discrete(labels = get_xlabs_bar) + 
     theme_cowplot() + 
     theme(axis.text.x = element_markdown()))

Ntu1/Ntu2 500 mM NaCl IP-MS

Here we extract and visualize the t-statistics for all proteins in the ntu1 and ntu2 high-salt IP-MS experiments. In each case the condition of interest is compared to a broad complement group consisting of all other high-salt IP-MS samples.

## Extract t-statistics for the two comparisons
rdhigh <- rowData(sce500)
contrasts_high <- c(
    "Ntu1_500_plate_vs_compl_Ntu1_500_plate.t", 
    "Ntu2_500_plate_vs_compl_Ntu2_500_plate.t")
high <- as.data.frame(rdhigh[, c("einprotId", contrasts_high)]) |>
    setNames(c("einprotId", "Ntu1", "Ntu2"))

## Plot t-statistics
(gg_ipdimer <- ggplot(high, aes(x = Ntu1, y = Ntu2)) +
        geom_abline(data = high |> 
                        dplyr::filter(einprotId %in% c("Ntu1", "Ntu2")), 
                    aes(slope = mean(Ntu2 / Ntu1), intercept = 0), 
                    linetype = "dashed", color = "grey50") + 
        geom_text_repel(data = high |> 
                            dplyr::filter(einprotId %in% c("Ntu1", "Ntu2") | 
                                              Ntu2 > 5),
                        aes(label = einprotId), min.segment.length = 0) + 
        geom_point(alpha = 0.25) + 
        geom_point(data = high |> 
                       dplyr::filter(einprotId %in% c("Ntu1", "Ntu2")) |>
                       dplyr::mutate(type = "Baits"),
                   mapping = aes(fill = type), color = main_colors[3],
                   shape = 21, alpha = 1, size = 2) + 
        scale_fill_manual(values = c(Baits = main_colors[3])) + 
        theme_cowplot() + 
        labs(x = "500 mM NaCl mod. t-stat. (Ntu1)", 
             y = "500 mM NaCl mod. t-stat. (Ntu2)") + 
        theme(strip.text = element_blank(),
              legend.position = "inside",
              legend.position.inside = c(0.8, 0.1)) + 
        guides(fill = guide_legend(nrow = 1, byrow = TRUE, title = "")))

RNA-seq expression levels for genes around the tna1 locus

wt_tpm <- assay(rnaseq_del[, rnaseq_del$Group_name == "wt_WT_mRNA"], "abundance")
genepos <- rowRanges(rnaseq_del)
mcols(genepos) <- NULL
genepos <- as.data.frame(genepos)
stopifnot(rownames(wt_tpm) == rownames(genepos))
genepos$ave_tpm <- rowMeans(wt_tpm)
genepos$id <- rownames(genepos)

chrom <- genepos["tna1", "seqnames"]
startpos <- genepos["tna1", "start"]
endpos <- genepos["tna1", "end"]
windowsize <- 30000

(genomeplot_tpm <- 
        ggplot(genepos |> 
                   dplyr::filter(seqnames == chrom & 
                                     start > startpos - windowsize & 
                                     end < endpos + windowsize), 
               aes(x = start / 1e6, y = ave_tpm, label = id)) +
        geom_hline(yintercept = 0, color = na_color) + 
        geom_point(size = 3, alpha = 0.8, aes(color = id == "tna1")) +
        scale_color_manual(values = c(`TRUE` = main_colors[4], `FALSE` = pt_color)) + 
        geom_text_repel(min.segment.length = 0, max.overlaps = Inf, size = 3, 
                        force = 3) + 
        labs(x = "Genome position (Mb)", y = "Gene expression (TPM)") + 
        scale_y_continuous(expand = expansion(mult = c(0, 0), add = c(200, 50))) + 
        theme_cowplot() + 
        theme(legend.position = "none"))

RNA-seq differential expression analysis - KO experiments

## Filter lowly expressed genes and create DESeqDataSet
rnaseqfilt_ko <- rnaseq_ko[rowSums(assay(rnaseq_ko, "counts") >= 5) >= 2, ]
dds <- DESeq2::DESeqDataSet(rnaseqfilt_ko, design = ~ MappedGene)
dds <- DESeq2::estimateSizeFactors(dds)

## Filter genes with high inferential variance
highInfRV <- 10^(-0.75)
dds <- dds[rowData(dds)$meanInfRV < highInfRV, ]

## Perform differential expression analysis
dds <- DESeq2::DESeq(dds)
groups <- setdiff(dds$MappedGene, "wt")
names(groups) <- groups
resList <- lapply(groups, function(gr) {
    DESeq2::results(dds, contrast = c("MappedGene", gr, "wt"), 
                    alpha = 0.05)
})

## Create MA plots
rngY <- range(sapply(resList, function(rl) rl$log2FoldChange))
rngX <- range(sapply(resList, function(rl) log10(rl$baseMean)))
maplots <- lapply(structure(names(resList), names = names(resList)), function(nm) {
    ggplot(as.data.frame(resList[[nm]]), 
           aes(x = log10(baseMean), y = log2FoldChange)) + 
        geom_point(mapping = aes(color = abs(log2FoldChange) > 3 & 
                                     padj < 0.001, 
                                 alpha = abs(log2FoldChange) > 3 & 
                                     padj < 0.001)) + 
        scale_color_manual(values = c(`TRUE` = main_colors[4], `FALSE` = pt_color)) + 
        scale_alpha_manual(values = c(`TRUE` = 1, `FALSE` = 0.25)) + 
        coord_cartesian(xlim = rngX, ylim = rngY) + 
        geom_text_repel(data = as.data.frame(resList[[nm]]) |>
                            tibble::rownames_to_column("gene") |>
                            dplyr::filter(gene == nm | 
                                              (log2FoldChange > 4.5 & padj < 0.05)), 
                        aes(label = gene), 
                        min.segment.length = 0, max.overlaps = Inf) + 
        theme_cowplot() + 
        labs(x = "Mean expression", y = "log2(fold-change)", 
             title = paste0(nm, "\U141E")) + 
        theme(legend.position = "none",
              plot.title = element_text(face = "italic"))
})

cowplot::plot_grid(
    plotlist = maplots, nrow = 1, align = "hv", axis = "b"
)

## Correlation between log2 fold changes for ntu1/ntu2
stopifnot(all(rownames(resList$ntu1) == rownames(resList$ntu2)))
dfcorr <- data.frame(gene = rownames(resList$ntu1), 
                     ntu1 = resList$ntu1$log2FoldChange,
                     ntu2 = resList$ntu2$log2FoldChange)
(corrplot <- list(
    ggplot(dfcorr, aes(x = ntu1, y = ntu2)) + 
        geom_abline(slope = 1, intercept = 0, linetype = "dashed", 
                    color = na_color) + 
        geom_point(alpha = 0.25) + 
        geom_text_repel(data = dfcorr |>
                            dplyr::filter(gene %in% c("tna1", "SPCC576.17c")), 
                        aes(label = gene), 
                        min.segment.length = 0, max.overlaps = Inf) + 
        labs(x = "*ntu1\U141E* log2(fold-change)",
             y = "*ntu2\U141E* log2(fold-change)") + 
        theme_cowplot() + 
        theme(axis.title = element_markdown())
))

## [[1]]

## Add information about gene location to result lists
genepos <- rowRanges(dds)
mcols(genepos) <- NULL
genepos <- as.data.frame(genepos)
resList <- lapply(resList, function(rl) {
    stopifnot(rownames(rl) == rownames(genepos))
    cbind(rl, genepos) |>
        as.data.frame() |>
        tibble::rownames_to_column("gene")
})

## Subset to region of interest
chrom <- genepos["tna1", "seqnames"]
startpos <- genepos["tna1", "start"]
endpos <- genepos["tna1", "end"]
windowsize <- 30000

resListToPlot <- lapply(resList, function(rl) {
    rl |> dplyr::filter(seqnames == chrom & 
                            start > startpos - windowsize & 
                            end < endpos + windowsize)
})
plotList <- lapply(names(resListToPlot), function(nm) {
    ggplot(resListToPlot[[nm]], 
           aes(x = start, y = log2FoldChange, 
               color = abs(log2FoldChange) > 3 & padj < 0.001)) + 
        geom_rect(aes(xmin = endpos, xmax = startpos, ymin = -Inf, ymax = Inf),
                  color = lighten(main_colors[4], amount = 0.6), fill = NA,
                  alpha = 0.1, show.legend = FALSE) + 
        geom_point(alpha = 0.6, size = 2) + 
        scale_color_manual(values = c(`FALSE` = na_color, 
                                      `TRUE` = main_colors[4])) + 
        geom_text_repel(aes(label = ifelse(abs(log2FoldChange) > 3 & padj < 0.001,
                                           as.character(gene), "")),
                        size = 2.5, max.overlaps = 30, force = 10) + 
        scale_x_continuous(labels = scales::label_number(scale = 1e-6)) + 
        scale_y_continuous(limits = c(-7, 7)) + 
        labs(x = "Genome position (Mb)", y = "log2(fold-change)",
             title = paste0(nm, "\U141E")) + 
        theme_cowplot() + 
        theme(legend.position = "none",
              plot.title = element_text(face = "italic"))
})
genomeplot <- cowplot::plot_grid(plotlist = plotList[c(2, 1)], ncol = 1)

Coverage tracks

covs_rna <- read.csv(params$rnatracks_ko) |>
    left_join(data.frame(sample = colnames(rnaseq_ko),
                         libsize = colSums(assay(rnaseq_ko, "counts")))) |>
    mutate(cpm = score / libsize * 1e6)
covs_chip <- read.csv(params$chiptracks_ko)
gtf <- rtracklayer::import(params$gtf)

goi <- c("med6", "SPAC1002.16c", "urg2", "urg3", "SPCC576.17c", "SPNCRNA.1263")
gtf <- subset(gtf, (gene_id %in% goi | gene_name %in% goi) & type == "gene") |>
    as.data.frame() |>
    left_join(idmap, by = join_by("gene_id" == "gene_stable_id")) |>
    dplyr::select(seqnames, start, end, strand, unique_einprot_id) |>
    mutate(y = ifelse(unique_einprot_id == "SPCC576.17c", 0, 1)) |>
    mutate(start2 = ifelse(strand == "+", start, end),
           end2 = ifelse(strand == "+", end, start)) |>
    mutate(unique_einprot_id_label = ifelse(grepl("^SP", unique_einprot_id), 
                                            unique_einprot_id, 
                                            paste0("*", unique_einprot_id, "\U207A*")))

## Create data frames for plotting
plotdf_chip1 <- covs_chip |>
    mutate(sample = sub("_rep[0-9]", "", sample)) |>
    mutate(exptype = stringr::str_extract(sample, "ChIP|IN")) |>
    mutate(pair = sub("_ChIP|_IN", "", sample)) |>
    mutate(flag = .capitalize(sub("_*TAG", "", sub(".*_KO_", "", pair)))) |>
    mutate(label = ifelse(pair == "ntu1_KO_ntu2TAG", "*ntu1\U141E*",
                          ifelse(pair == "ntu1_TAG", "*ntu2\U207A*",
                                 ifelse(pair == "ntu2_TAG", "*ntu1\U207A*",
                                        ifelse(pair == "ntu2_KO_ntu1TAG", "*ntu2\U141E*", 
                                               ""))))) |>
    mutate(cpm = score) |>
    mutate(pos = start) |>
    group_by(flag, seqnames) |>
    mutate(cpmnorm = cpm / max(abs(cpm))) |>
    ungroup() |>
    filter(pair %in% c("ntu2_TAG", "ntu2_KO_ntu1TAG", 
                       "ntu1_TAG", "ntu1_KO_ntu2TAG")) |>
    mutate(pair = factor(pair, levels = c("ntu1_TAG", "ntu2_KO_ntu1TAG",
                                          "ntu2_TAG", "ntu1_KO_ntu2TAG")))
table(plotdf_chip1$sample, plotdf_chip1$flag)

##                       
##                         Ntu1  Ntu2
##   ntu1_KO_ntu2TAG_ChIP     0 14643
##   ntu1_KO_ntu2TAG_IN       0 14643
##   ntu1_TAG_ChIP        14643     0
##   ntu1_TAG_IN          14643     0
##   ntu2_KO_ntu1TAG_ChIP 14643     0
##   ntu2_KO_ntu1TAG_IN   14643     0
##   ntu2_TAG_ChIP            0 14643
##   ntu2_TAG_IN              0 14643

table(plotdf_chip1$sample, plotdf_chip1$label)

##                       
##                        *ntu1⁺* *ntu1ᐞ* *ntu2⁺* *ntu2ᐞ*
##   ntu1_KO_ntu2TAG_ChIP       0   14643       0       0
##   ntu1_KO_ntu2TAG_IN         0   14643       0       0
##   ntu1_TAG_ChIP              0       0   14643       0
##   ntu1_TAG_IN                0       0   14643       0
##   ntu2_KO_ntu1TAG_ChIP       0       0       0   14643
##   ntu2_KO_ntu1TAG_IN         0       0       0   14643
##   ntu2_TAG_ChIP          14643       0       0       0
##   ntu2_TAG_IN            14643       0       0       0

plotdf_rna <- covs_rna |>
    filter(sample %in% c("SPBC530.08_KO_mRNA_rep1",
                         "SPBC16G5.16_KO_mRNA_rep1",
                         "wt_KO_mRNA_rep1")) |>
    mutate(sample = sub("_KO_mRNA_rep[0-9]", "", sample)) |>
    mutate(sample = replace(sample, sample == "SPBC16G5.16", "ntu1\U141E")) |>
    mutate(sample = replace(sample, sample == "SPBC530.08", "ntu2\U141E")) |>
    mutate(gene = sample) |>
    group_by(seqnames) |>
    mutate(cpmnorm = cpm / max(abs(cpm))) |>
    ungroup() |>
    mutate(sample = factor(sample, 
                           levels = c("wt", "ntu1\U141E", "ntu2\U141E")))
plotdf_label <- covs_rna |>
    filter(sample == "wt_KO_mRNA_rep1") |>
    mutate(cpmnorm = 0)

.get_empty_label <- function(x) {
    c("ntu1_TAG" = "", "ntu2_KO_ntu1TAG" = "",
      "ntu2_TAG" = "", "ntu1_KO_ntu2TAG" = "")
}
.add_space_to_label <- function(x) {
    paste(x, "    ")
}

maxIII <- max(plotdf_label$pos[plotdf_label$seqnames == "III"])
g1c <- ggplot(plotdf_label,
              aes(x = pos, y = cpmnorm)) + 
    geom_line(color = "white") + 
    geom_segment(x = maxIII - 2000, xend = maxIII,
                 y = 0, yend = 0, color = "black") + 
    geom_text(data = data.frame(seqnames = "III",
                                x = maxIII - 1000,
                                y = 0,
                                label = "2kb"),
              aes(x = x, y = y, label = label),
              vjust = -1) + 
    facet_grid(~ seqnames, scales = "free_x", space = "free_x")
g1a <- ggplot(plotdf_chip1 |>
                  mutate(seqnames = paste0("chr", seqnames)), 
              aes(x = pos, y = cpmnorm)) + 
    geom_area(aes(group = exptype, fill = interaction(exptype, flag))) + 
    geom_richtext(data = plotdf_chip1 |> 
                      mutate(seqnames = paste0("chr", seqnames)) |>
                      select(flag, pair, seqnames, pos, label) |>
                      filter(seqnames == "chrI") |>
                      filter(pos == min(pos)),
                  aes(x = pos, y = 0.65, label = label),
                  fill = NA, label.color = NA,
                  label.padding = grid::unit(rep(0, 4), "pt"),
                  hjust = 0, size = 4) + 
    scale_fill_manual(values = c(`ChIP.Ntu1` = main_colors[5], 
                                 `IN.Ntu1` = na_color,
                                 `ChIP.Ntu2` = main_colors[1], 
                                 `IN.Ntu2` = na_color)) + 
    facet_nested(flag + pair ~ seqnames, scale = "free_x", space = "free_x",
                 switch = "y",
                 labeller = labeller(pair = .get_empty_label, 
                                     flag = .add_space_to_label),
                 nest_line = element_line(),
                 strip = strip_nested(size = "variable")) + 
    scale_y_continuous(limits = c(0, 0.9), expand = c(0, 0)) + 
    labs(y = "ChIP-seq")
g1b <- ggplot(plotdf_rna, 
              aes(x = pos, y = cpmnorm)) + 
    geom_area(aes(group = strand, fill = gene)) + 
    scale_fill_manual(values = c(wt = "black", 
                                 "ntu1\U141E" = main_colors[5], 
                                 "ntu2\U141E" = main_colors[1])) + 
    facet_grid(sample ~ seqnames, scale = "free_x", space = "free_x",
               switch = "y") + 
    scale_y_continuous(limits = c(-0.9, 0.3), expand = c(0, 0)) + 
    labs(y = "RNA-seq")
g2 <- ggplot(gtf, aes(x = start2, xend = end2, y = y, yend = y, 
                      color = unique_einprot_id %in% c("tna1", "SPCC576.17c"), 
                      label = unique_einprot_id_label)) + 
    geom_segment(arrow = grid::arrow(length = unit(0.05, "inches")), 
                 linewidth = 2) + 
    geom_richtext(
        aes(x = (start2 + end2) / 2, y = y - 0.5),
        fill = NA, label.color = NA,
        label.padding = grid::unit(rep(0, 4), "pt"),
        size = 4
    ) + 
    scale_color_manual(values = c(`FALSE` = "black", `TRUE` = main_colors[4])) + 
    facet_grid(~ seqnames, scales = "free_x", space = "free_x") + 
    ylim(-0.75, 1.5)
gcov <- cowplot::plot_grid(
    g1c + theme_void() + 
        theme(strip.text = element_blank()),
    g1a + theme_void() + 
        theme(legend.position = "none", 
              strip.text.y = element_text(size = 12),
              strip.text.x = element_text(size = 12, hjust = 0.95),
              axis.title.y = element_text(size = 12, angle = 90,
                                          margin = margin(t = 0, r = 10, b = 0, l = 0))), 
    NULL,
    g1b + theme_void() + 
        theme(strip.text.y = element_text(size = 12, 
                                          face = "italic"), 
              legend.position = "none",
              strip.text.x = element_blank(),
              axis.title.y = element_text(size = 12, angle = 90,
                                          margin = margin(t = 0, r = 15, b = 0, l = 0))), 
    g2 + theme_void() + 
        theme(legend.position = "none",
              strip.text = element_blank()), 
    ncol = 1, rel_heights = c(0.75, 2, 0.5, 2, 1),
    align = "v", axis = "lr")
(gcov2 <- cowplot::plot_grid(
    g1c + theme_bw() + 
        theme(axis.text.x = element_text(size = 5)),
    g1a + theme_bw() + 
        theme(legend.position = "none",
              axis.text.x = element_text(size = 5)), 
    g1b + theme_bw() + 
        theme(legend.position = "none",
              axis.text.x = element_text(size = 5)), 
    g2 + theme_bw() + 
        theme(legend.position = "none",
              axis.text.x = element_text(size = 5)), 
    ncol = 1, rel_heights = c(0.75, 2, 2, 1),
    align = "v", axis = "lr"))

Put together

fig7b <- ggdraw() + 
    draw_image("schematics/Fig7B_Nattou_protein_domains_cartoon_Arial.png")
fig7e <- ggdraw() + 
    draw_image("schematics/Fig7E_Nattou_lacR_cartoon_images_Arial.png")
fig7f <- ggdraw() + 
    draw_image("schematics/Fig7F_Nattou_lacR_microscopy_images_Arial.png")

fig7c <- wrap_plots(maplots[c(2, 1)], ncol = 2)

cowplot::plot_grid(
    cowplot::plot_grid(
        gg_ipdimer, 
        fig7c,
        nrow = 1, 
        rel_widths = c(1, 2), 
        labels = c("A", "C"),
        align = "h", axis = "bt"
    ),
    NULL,
    cowplot::plot_grid(
        fig7b, 
        gcov,
        rel_widths = c(1, 1),
        labels = c("B", "D")
    ),
    NULL,
    cowplot::plot_grid(
        fig7e,
        NULL,
        fig7f,
        NULL,
        p_zone + theme(legend.position = "top"), 
        labels = c("E", "", "F", "", "G"), vjust = 2,
        rel_widths = c(6, 0.75, 1.2, 0.75, 10.4), nrow = 1
    ), 
    ncol = 1, rel_heights = c(1, 0.1, 1, 0.1, 1)
)

Supplementary Figure

plotdf_chip2 <- covs_chip |>
    mutate(sample = sub("_rep[0-9]", "", sample)) |>
    mutate(exptype = stringr::str_extract(sample, "ChIP|IN")) |>
    mutate(pair = sub("_ChIP|_IN", "", sample)) |>
    mutate(flag = sub("_*TAG", "", sub(".*_KO_", "", pair))) |>
    mutate(label = ifelse(pair == "ntu2_delTMD", "delTMD",
                          ifelse(pair == "ntu2_TAG", "ntu2+", ""))) |>
    mutate(cpm = score) |>
    mutate(pos = start) |>
    group_by(seqnames) |>
    mutate(cpmnorm = cpm / max(abs(cpm))) |>
    ungroup() |>
    filter(pair %in% c("ntu2_TAG", "ntu2_delTMD")) |>
    mutate(pair = ifelse(pair == "ntu2_delTMD", "Ntu2\U141E\U1D40\U1D39\U1D30", 
                         ifelse(pair == "ntu2_TAG", "Ntu2", pair))) |>
    mutate(pair = factor(pair, levels = c("Ntu2", "Ntu2\U141E\U1D40\U1D39\U1D30")))
table(plotdf_chip2$sample, plotdf_chip2$flag)

##                   
##                     ntu2 ntu2_delTMD
##   ntu2_delTMD_ChIP     0       14643
##   ntu2_delTMD_IN       0       14643
##   ntu2_TAG_ChIP    14643           0
##   ntu2_TAG_IN      14643           0

table(plotdf_chip2$sample, plotdf_chip2$pair)

##                   
##                     Ntu2 Ntu2ᐞᵀᴹᴰ
##   ntu2_delTMD_ChIP     0    14643
##   ntu2_delTMD_IN       0    14643
##   ntu2_TAG_ChIP    14643        0
##   ntu2_TAG_IN      14643        0

gtr <- ggplot(plotdf_chip2 |>
                  mutate(seqnames = paste0("chr", seqnames)), 
              aes(x = pos, y = cpmnorm)) + 
    geom_area(aes(group = exptype, fill = interaction(exptype, flag))) + 
    scale_fill_manual(values = c(ChIP.ntu2_delTMD = main_colors[3], 
                                 IN.ntu2_delTMD = na_color,
                                 ChIP.ntu2 = main_colors[1], 
                                 IN.ntu2 = na_color)) + 
    facet_grid(pair ~ seqnames, scale = "free_x",
               space = "free_x", switch = "y") + 
    scale_y_continuous(limits = c(0, 0.9), expand = c(0, 0))
gcovdel <- cowplot::plot_grid(
    gtr + theme_void() +
        theme(strip.text.y = element_text(size = 12, vjust = 0.05),
              legend.position = "none", 
              strip.text.x = element_text(size = 12, hjust = 0.95)), 
    g2 + theme_void() + 
        theme(legend.position = "none",
              strip.text = element_blank()), 
    ncol = 1, rel_heights = c(2, 1),
    align = "v", axis = "lr")

figs7a <- ggdraw() + 
    draw_image("schematics/FigS7A_Nattou_AF2_fl_Arial.png")

cowplot::plot_grid(
    figs7a,
    NULL,
    cowplot::plot_grid(
        p_rel_dis,
        cowplot::plot_grid(genomeplot_tpm, NULL,
                           ncol = 1, rel_heights = c(1, 0.075)),
        nrow = 1, 
        rel_widths = c(1, 1), 
        labels = c("B", "C"),
        vjust = 0.5
    ),
    NULL,
    cowplot::plot_grid(
        genomeplot,
        gcovdel,
        nrow = 1, 
        rel_widths = c(1, 1),
        labels = c("D", "E"),
        vjust = 0.5
    ),
    labels = c("A", "", ""), 
    ncol = 1, rel_heights = c(1, 0.1, 1, 0.05, 1)
)

Session info

Session info

sessionInfo()

## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: FlexiBLAS OPENBLAS;  LAPACK version 3.10.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Zurich
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] broom_1.0.5                 scales_1.3.0               
##  [3] kableExtra_1.4.0            ggsignif_0.6.4             
##  [5] patchwork_1.2.0             ggtext_0.1.2               
##  [7] ggh4x_0.2.8                 DESeq2_1.42.1              
##  [9] colorspace_2.1-0            ggrepel_0.9.5              
## [11] dplyr_1.1.4                 cowplot_1.1.3              
## [13] ggplot2_3.5.0               SingleCellExperiment_1.24.0
## [15] SummarizedExperiment_1.32.0 Biobase_2.62.0             
## [17] GenomicRanges_1.54.1        GenomeInfoDb_1.38.8        
## [19] IRanges_2.36.0              S4Vectors_0.40.2           
## [21] BiocGenerics_0.48.1         MatrixGenerics_1.14.0      
## [23] matrixStats_1.3.0          
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1         viridisLite_0.4.2        farver_2.1.1            
##  [4] Biostrings_2.70.3        bitops_1.0-7             fastmap_1.1.1           
##  [7] RCurl_1.98-1.14          GenomicAlignments_1.38.2 XML_3.99-0.16.1         
## [10] digest_0.6.35            lifecycle_1.0.4          magrittr_2.0.3          
## [13] compiler_4.3.2           rlang_1.1.3              sass_0.4.9              
## [16] tools_4.3.2              utf8_1.2.4               yaml_2.3.8              
## [19] rtracklayer_1.62.0       knitr_1.46               labeling_0.4.3          
## [22] S4Arrays_1.2.1           DelayedArray_0.28.0      xml2_1.3.6              
## [25] abind_1.4-5              BiocParallel_1.36.0      withr_3.0.0             
## [28] purrr_1.0.2              grid_4.3.2               fansi_1.0.6             
## [31] cli_3.6.2                rmarkdown_2.26           crayon_1.5.2            
## [34] generics_0.1.3           rstudioapi_0.16.0        rjson_0.2.21            
## [37] commonmark_1.9.1         cachem_1.0.8             stringr_1.5.1           
## [40] zlibbioc_1.48.2          parallel_4.3.2           restfulr_0.0.15         
## [43] XVector_0.42.0           vctrs_0.6.5              Matrix_1.6-5            
## [46] jsonlite_1.8.8           magick_2.8.3             systemfonts_1.0.6       
## [49] locfit_1.5-9.9           jquerylib_0.1.4          tidyr_1.3.1             
## [52] glue_1.7.0               codetools_0.2-19         stringi_1.8.3           
## [55] gtable_0.3.5             BiocIO_1.12.0            munsell_0.5.1           
## [58] tibble_3.2.1             pillar_1.9.0             htmltools_0.5.8.1       
## [61] GenomeInfoDbData_1.2.11  R6_2.5.1                 evaluate_0.23           
## [64] lattice_0.22-6           markdown_1.12            highr_0.10              
## [67] Rsamtools_2.18.0         backports_1.4.1          gridtext_0.1.5          
## [70] bslib_0.7.0              Rcpp_1.0.12              svglite_2.1.3           
## [73] SparseArray_1.2.4        xfun_0.43                pkgconfig_2.0.3

Figure 7 - heterodimer

Charlotte Soneson, Michael Stadler, Merle Skribbe

2025-01-18