Parameter values

Load required packages and helper functions

Helper functions

Read data

Extract data for heatmaps

Export data for TFexplorer

Facts and figures

Identification of baits in IP-MS
Summary tables

Figure 5

Summary heatmaps using results from the statistical tests (150 mM NaCl)

Bait-vs-bait heatmap
Bait-vs-bait, less stringent adjusted p-value threshold
SAGA/NuA4 heatmap
RNA / DNA binding proteins

Summary heatmaps using results from the statistical tests (500 mM NaCl)

Bait-vs-bait heatmap

Pie charts for bait-vs-bait interactions (within/between DBD families)
Compare t-statistics from low-salt and high-salt conditions
Tube vs plate t-statistics for Atf1 pull-down
Adn2 vs complement volcano plot
Histone proteins t-stats
Put together
Supplementary figure

Session info

Parameter values

params

## $rds150
## [1] "data/ipms_150_sce.rds"
## 
## $rds500
## [1] "data/ipms_500_sce.rds"
## 
## $rdsdal2
## [1] "data/ipms_dal2_sce.rds"
## 
## $complexes
## [1] "data/complexes.json"
## 
## $baitclass
## [1] "data/ipms_bait_class.txt"
## 
## $dnabinders
## [1] "data/Gene_list_DNAbinders_proteincoding.txt"
## 
## $rnabinders
## [1] "data/Gene_list_RNAbinders_proteincoding.txt"
## 
## $dbdtxt
## [1] "data/TF_DBD_annotation.txt"
## 
## $idmap
## [1] "data/id_mapping_table.txt"

Load required packages and helper functions

suppressPackageStartupMessages({
    library(SingleCellExperiment)
    library(ggplot2)
    library(cowplot)
    library(einprot)
    library(dplyr)
    library(forcats)
    library(jsonlite)
    library(ComplexHeatmap)
    library(colorspace)
    library(ggrepel)
    library(DT)
    library(stringr)
    library(tidyr)
    library(kableExtra)
    library(patchwork)
})

## Source scripts with required helper functions and settings
source("params/plot_settings.R")
source("params/get_testres_function.R")
source("params/ipms_params.R")
source("params/mapping_functions.R")
source("params/ipms_heatmap_functions.R")

## Heatmap sizes
w_ipmsHm <-  84 # heatmap body width (mm)
h_ipmsHm <- 178 # total heatmap height (mm)

fs <- 8  # small font size
fl <- 10 # large font size
ft <- 12 # title font size

Helper functions

## Overwrite the get_legend function from cowplot temporarily, 
## since the cowplot one doesn't work with ggplot2 3.5.0
## see https://github.com/wilkelab/cowplot/issues/202

get_legend <- function(plot, legend = NULL) {
    gt <- ggplotGrob(plot)
    
    pattern <- "guide-box"
    if (!is.null(legend)) {
        pattern <- paste0(pattern, "-", legend)
    }
    
    indices <- grep(pattern, gt$layout$name)
    
    not_empty <- !vapply(
        gt$grobs[indices], 
        inherits, what = "zeroGrob", 
        FUN.VALUE = logical(1)
    )
    indices <- indices[not_empty]
    
    if (length(indices) > 0) {
        return(gt$grobs[[indices[1]]])
    }
    return(NULL)
}

makePie <- function(baitdata, title) {
    ## Expected fraction of 'same family' pairs
    famtbl <- table(baitdata$nbrIntMat |>
                        filter(dbdfam != "Other" & 
                                   bait != "Untagged") |>
                        pull(dbdfam))
    nbait <- sum(baitdata$nbrIntMat$bait != "Untagged")
    exp_same <- sum(famtbl * (famtbl - 1)) / (nbait * (nbait - 1))
    print(paste0("Expected fraction of 'same family' pairs: ", 
                 round(exp_same, digits = 5)))
    
    ## Pie chart, within-vs-between DBD family interactions
    pie_data <- baitdata$nbrIntMat |>
        filter(bait != "Untagged") |>
        select(nbrTFs_samefam, nbrTFs_difffam) |>
        pivot_longer(everything(), names_to = "class", values_to = "nbrTF") |>
        group_by(class) |>
        summarize(nbrTF = sum(nbrTF), .groups = "drop") |>
        mutate(ypos = sum(nbrTF) - cumsum(nbrTF) + 0.5 * nbrTF)
    
    ggplot(
        pie_data,
        aes(x = "", y = nbrTF, fill = class)) + 
        geom_col(width = 1) + 
        geom_segment(x = 1.55, xend = 1.55, 
                     y = 0, yend = exp_same * sum(pie_data$nbrTF),
                     linewidth = 1.5) + 
        geom_text(aes(y = ypos, label = nbrTF), size = 7, color = "black") + 
        coord_polar("y", start = 0) + 
        scale_fill_manual(values = c(nbrTFs_samefam = main_colors[3],
                                     nbrTFs_difffam = main_colors[5]),
                          labels = c(nbrTFs_samefam = "Same DBD family",
                                     nbrTFs_difffam = "Different DBD family"),
                          name = "") + 
        labs(title = title) + 
        theme_void()
}

Read data

We load two SingleCellExperiment objects, containing data and results from the low- and high-salt experiments, respectively, as well as one object with results from a separate run of Dal2.

sce150 <- readRDS(params$rds150)
sce500 <- readRDS(params$rds500)
scedal2 <- readRDS(params$rdsdal2)

We then extract the full list of baits, as well as a classification according to the experiment(s) where a certain bait was pulled down.

idmap <- read.delim(params$idmap)
baitclass <- read.delim(params$baitclass)
DT::datatable(baitclass, rownames = FALSE)

Show entries

Search:

Gene_name	class	class500
Ace2	150and500	Lost in 500 mM
Cdc10	150and500	Retained in 500 mM
Cha4	nobait
Cuf1	150only
Cuf2	nobait
Deb1	150and500	Lost in 500 mM
Esc1	150only
Fep1	150only
Fhl1	150only
Fil1	150only

Showing 1 to 10 of 90 entries

Previous1 2 3 4 5…9Next

In the following set of IPs, we identified the bait as not pulled down.

(nobait_150 <- baitclass$Gene_name[baitclass$class == "nobait"])

## [1] "Cha4"        "Cuf2"        "Atf21"       "Cbf12"       "Sre2"       
## [6] "SPBC1348.12" "Untagged"

We also read a list of known complexes, which will be used for annotation purposes.

complexes <- jsonlite::read_json(params$complexes, simplifyVector = TRUE)
complexes$`MBF transcription complex`$color <- main_colors[5]
complexes$`CCAAT-binding factor complex`$color <- main_colors[1]
complexes$`Atf1-Pcr1`$color <- main_colors[3]
names(complexes)

##  [1] "NuA4"                            "SAGA"                           
##  [3] "19S proteasome - base"           "Clr6 HDAC - complex I'"         
##  [5] "SREBP-SCAP"                      "SPBC16G5.16-SPBC530.08"         
##  [7] "Atf1-Pcr1"                       "chaperonin-containing T-complex"
##  [9] "CCAAT-binding factor complex"    "Dal2"                           
## [11] "Cyc8(Ssn6)-Tup1"                 "Rad24-Rad25"                    
## [13] "Ste11-Cdc2-Cig2"                 "Pap1-Prr1"                      
## [15] "MBF transcription complex"

Finally we load a table classifying each TF to its DNA-binding domain family.

dbd <- read.delim(params$dbdtxt) |>
    dplyr::mutate(Gene_name = .capitalize(Gene_name))
DT::datatable(dbd, extensions = "FixedColumns", rownames = FALSE, 
              filter = list(position = "top", clear = FALSE),
              options = list(scrollX = TRUE, 
                             fixedColumns = list(leftColumns = 1),
                             search = list(regex = FALSE, caseInsensitive = TRUE),
                             pageLength = 10))

Show entries

Search:

TFID	PomBaseID	Gene_name	UniprotID	Taxonomic_distribution	DBD_class	DBD_feature	Conserved_human	Predicted_conserved_mouse	DBD_class_name

TFID	PomBaseID	Gene_name	UniprotID	Taxonomic_distribution	DBD_class	DBD_feature	Conserved_human	Predicted_conserved_mouse	DBD_class_name

ID1	SPAC6G10.12c	Ace2	O14258	other	COG5048	Zinc-coordinated	false	false	COG5048
ID2	SPBC336.12c	Cdc10	P01129	fungi only	KilA-N	TF homology	false	false	KilA-N
ID3	SPBC1683.13c	Cha4	Q9P6I9	fungi only	Zn(II)2Cys6	Zinc-coordinated	false	false	Zn(II)2Cys6
ID4	SPAC31A2.11c	Cuf1	Q09728	fungi only	Copper-fist	TF homology	false	false	Copper fist
ID5	SPCC584.02	Cuf2	O94588	fungi only	Copper-fist	TF homology	false	false	Copper fist
ID6	SPAC1B1.01	Deb1	O13862	other	zf-C2H2	Zinc-coordinated	false	false	C2H2
ID7	SPAC56F8.16	Esc1	Q04635	other	bHLH_SF	Basic helix-loop-helix	false	false	bHLH
ID8	SPAC23E2.01	Fep1	Q10134	other	ZnF_GATA	Zinc-coordinated	false	true	GATA
ID9	SPAC1142.08	Fhl1	O14270	other	COG5025 (FH)	Forkhead	false	false	Forkhead
ID10	SPCC1393.08	Fil1	O94720	other	ZnF_GATA	Zinc-coordinated	false	true	GATA

Showing 1 to 10 of 89 entries

Previous1 2 3 4 5…9Next

Extract data for heatmaps

hmdata_150 <- makeHeatmapData(sce = sce150, adjpthr = adjpThreshold, 
                              log2fcthr = log2fcThreshold, conc = 150,
                              idmap = idmap, baitclass = baitclass)
hmdata_150_loose <- makeHeatmapData(sce = sce150, adjpthr = adjpThresholdLoose, 
                                    log2fcthr = log2fcThreshold, conc = 150,
                                    idmap = idmap, baitclass = baitclass)
hmdata_500 <- makeHeatmapData(sce = sce500, adjpthr = adjpThreshold, 
                              log2fcthr = log2fcThreshold, conc = 500,
                              idmap = idmap, baitclass = baitclass)
hmdata_500_loose <- makeHeatmapData(sce = sce500, adjpthr = adjpThresholdLoose, 
                                    log2fcthr = log2fcThreshold, conc = 500,
                                    idmap = idmap, baitclass = baitclass)


baitdata_150 <- makeBaitHeatmapData(sce = sce150, adjpthr = adjpThreshold, 
                                    log2fcthr = log2fcThreshold, 
                                    nbrIntMat = hmdata_150$nbrIntMat, 
                                    nobait = nobait_150, idmap = idmap, 
                                    complexes = complexes)
baitdata_150_loose <- makeBaitHeatmapData(sce = sce150, adjpthr = adjpThresholdLoose, 
                                          log2fcthr = log2fcThreshold, 
                                          nbrIntMat = hmdata_150_loose$nbrIntMat, 
                                          nobait = nobait_150, idmap = idmap, 
                                          complexes = complexes)
baitdata_500 <- makeBaitHeatmapData(sce = sce500, adjpthr = adjpThreshold, 
                                    log2fcthr = log2fcThreshold, 
                                    nbrIntMat = hmdata_500$nbrIntMat, 
                                    nobait = nobait_150, idmap = idmap, 
                                    complexes = complexes)
baitdata_500_loose <- makeBaitHeatmapData(sce = sce500, adjpthr = adjpThresholdLoose, 
                                          log2fcthr = log2fcThreshold, 
                                          nbrIntMat = hmdata_500_loose$nbrIntMat, 
                                          nobait = nobait_150, idmap = idmap, 
                                          complexes = complexes)

Export data for TFexplorer

Here, we export data for use with TFexplorer, for both the low- and high-salt conditions.

## Heatmap data
.prepDataForTFexplorer <- function(sce, outfileHeatmap, outfileVolcano, idmap) {
    ## Heatmap
    ## -------------------------------------------------------------------------
    testres <- .getTestCols(sce = sce)
    testres$tstat <- testres$tstat[, colnames(testres$tstat) !=
                                       "Untagged_150_tube_vs_compl_Untagged_150_tube.t"]
    tstat <- testres$tstat
    colnames(tstat) <- .getProteinNameFromComparison(colnames(tstat))
    TFs <- colnames(tstat)
    
    ## set NAs to minimum value
    tstat[is.na(tstat)] <- min(na.omit(tstat))
    ## set all values below 0 to 0
    tstat[tstat < 0] = 0
    
    ## convert it back to a matrix
    heatmap_data_mean <- as.matrix(tstat)
    
    ## cluster the rows (to avoid slow clustering in LinkedCharts)
    dist_mat1 <- dist(heatmap_data_mean, method = "euclidean")
    hclust_avg1 <- hclust(dist_mat1, method = "average")
    heatmap_data_mean_clustered <- heatmap_data_mean[hclust_avg1$order, ]
    
    ## calculate row max 
    heatmapRowMax <- apply(heatmap_data_mean_clustered, 1, max)
    
    ## add Pombase IDs and names and uniprot IDs 
    PomIds2 <- left_join(data.frame(row.names(heatmap_data_mean_clustered)),
                         idmap |> dplyr::mutate(unique_einprot_id = .capitalize(unique_einprot_id)),
                         by = c("row.names.heatmap_data_mean_clustered." =
                                    "unique_einprot_id"))
    PomIds1 <- ifelse(is.na(PomIds2$gene_stable_id),
                      PomIds2$row.names.heatmap_data_mean_clustered., 
                      PomIds2$gene_stable_id)
    UniprotIDs <- PomIds2$xref
    
    ## convert the gene names to protein names
    colnames(heatmap_data_mean_clustered) <- 
        .capitalize(colnames(heatmap_data_mean_clustered))
    rownames(heatmap_data_mean_clustered) <- 
        .capitalize(rownames(heatmap_data_mean_clustered))
    
    ## save heatmap and related data as JSON
    heatmap_data_mean2 <- toJSON(list(
        heatmapMatrix = heatmap_data_mean_clustered, 
        TFs = colnames(heatmap_data_mean_clustered),
        proteins = rownames(heatmap_data_mean_clustered),
        MaxTSTAT = heatmapRowMax,
        PomIds = PomIds1,
        UniprotIDs = UniprotIDs),
        pretty = TRUE)
    write(heatmap_data_mean2, file = outfileHeatmap)
    
    ## Volcano plots
    ## -------------------------------------------------------------------------
    ## extract comparisons (include only plate format for Atf1 and untagged)
    volcano_list <- metadata(sce)$testres$tests
    volcano_list <- volcano_list[sub("\\.t", "", colnames(testres$tstat))]
    TFIDs <- names(volcano_list)
    TFnames <- .getProteinNameFromComparison(TFIDs)
    
    ## using sce object
    vp_list <- lapply(
        structure(colnames(heatmap_data_mean_clustered), 
                  names = colnames(heatmap_data_mean_clustered)), 
        function(selTF) {
            VP <- volcano_list[[which(TFnames == selTF)]][, c("pid", "logFC", "adj.P.Val",
                                                              "mlog10p", "showInVolcano")]
            VP$DE <- ifelse(VP$showInVolcano, "significant", "not significant")
            
            ## replace NAs with 0
            VP <- replace_na(data = VP, 
                             replace = list(pid = "unknown",
                                            logFC = 0,
                                            adj.P.Val = 1,
                                            mlog10p = 0,
                                            DE = "not significant"))
            
            ## add to list
            VP[, c("pid", "logFC", "adj.P.Val", "mlog10p", "DE")]
        })
    
    ## convert the gene names to protein names
    for (i in seq_along(vp_list)) {
        rownames(vp_list[[i]]) <- .capitalize(rownames(vp_list[[i]]))
        vp_list[[i]]$pid <- rownames(vp_list[[i]])
    }
    
    ## save volcano plot data as JSON
    vp_list2 <- toJSON(vp_list, dataframe = "columns", pretty = TRUE)
    write(vp_list2, file = outfileVolcano)
}

.prepDataForTFexplorer(sce = sce150, 
                       outfileHeatmap = "data/TSTAT_matrix4heatmap_150mM.json", 
                       outfileVolcano = "data/volcanoData_150mM.json", 
                       idmap = idmap)
.prepDataForTFexplorer(sce = sce500, 
                       outfileHeatmap = "data/TSTAT_matrix4heatmap_500mM.json", 
                       outfileVolcano = "data/volcanoData_500mM.json", 
                       idmap = idmap)

Facts and figures

Identification of baits in IP-MS

table(baitclass$class)

## 
## 150and500   150only    nobait 
##        40        43         7

Summary tables

The ‘strict cutoff’ thresholds correspond to an adjusted p-value cutoff of 0.0002 and a log2 fold change cutoff of 1. The ‘lenient cutoff’ thresholds correspond to an adjusted p-value cutoff of 0.01 and a log2 fold change cutoff of 1.

makeInteractorSummary <- function(sce, baitdata, adjp_cutoff, logfc_cutoff, idmap, 
                                  baits_to_keep, tbl_title) {
    ni_tc <- .getTestCols(sce = sce, adjp_cutoff = adjp_cutoff, 
                          logfc_cutoff = logfc_cutoff)
    ni_idx <- .getOrigBaitNameFromComparison(colnames(ni_tc$tstat), idmap = idmap) %in%
        baits_to_keep
    ni_int <- ni_tc$interactor[, ni_idx]
    ni_baits <- cbind(match(.getProteinNameFromComparison(colnames(ni_int)), 
                            rownames(ni_int)), 
                      seq_len(ncol(ni_int)))
    ni_int[ni_baits] <- NA
    
    ## TF-TF interactions
    tmp <- baitdata$nbrIntMat |>
        filter(nbrTFs_wobait > 0)
    
    ## Bilateral TF-TF interactions
    nbr_bilateral <- tmp |>
        select(bait, TFs) |>
        separate_longer_delim(TFs, delim = ",") |>
        mutate(o1 = paste(bait, TFs, sep = "-"),
               o2 = paste(TFs, bait, sep = "-")) |>
        filter(o1 %in% o2) |>
        nrow()
    
    data.frame(`Total number of TFs for which bait was pulled down` = ncol(ni_int),
               `Total number of interactions (excluding bait itself)` = length(which(ni_int > 0)),
               `Number of TFs with interactions` = length(which(colSums(ni_int, na.rm = TRUE) > 0)),
               `Number of TFs without observed interactions` = length(which(colSums(ni_int, na.rm = TRUE) == 0)),
               `Total number of TF-TF interactions (excluding bait itself)` = sum(baitdata$nbrIntMat$nbrTFs_wobait),
               `Number of TFs involved in these TF-TF interactions` = length(unique(c(tmp$bait, unlist(strsplit(tmp$TFs, ","))))),
               `Number of bilateral TF-TF interaction` = nbr_bilateral,
               check.names = FALSE) |>
        pivot_longer(cols = everything(), names_to = "Aspect", values_to = tbl_title)
}

makeInteractorSummary(sce = sce150, baitdata = baitdata_150, adjp_cutoff = adjpThreshold,
                      logfc_cutoff = log2fcThreshold, idmap = idmap, 
                      baits_to_keep = baitclass$Gene_name[baitclass$class != "nobait"],
                      tbl_title = "150 mM, strict cutoff") |>
    dplyr::full_join(
        makeInteractorSummary(sce = sce500, baitdata = baitdata_500, adjp_cutoff = adjpThreshold,
                              logfc_cutoff = log2fcThreshold, idmap = idmap, 
                              baits_to_keep = baitclass$Gene_name[baitclass$class != "nobait"],
                              tbl_title = "500 mM, strict cutoff"),
        by = join_by(Aspect)
    ) |> 
    dplyr::full_join(
        makeInteractorSummary(sce = sce150, baitdata = baitdata_150_loose, adjp_cutoff = adjpThresholdLoose,
                              logfc_cutoff = log2fcThreshold, idmap = idmap, 
                              baits_to_keep = baitclass$Gene_name[baitclass$class != "nobait"],
                              tbl_title = "150 mM, lenient cutoff"),
        by = join_by(Aspect)
    ) |>
    dplyr::full_join(
        makeInteractorSummary(sce = sce500, baitdata = baitdata_500_loose, adjp_cutoff = adjpThresholdLoose,
                              logfc_cutoff = log2fcThreshold, idmap = idmap, 
                              baits_to_keep = baitclass$Gene_name[baitclass$class != "nobait"],
                              tbl_title = "500 mM, lenient cutoff"),
        by = join_by(Aspect)
    ) |>
    kbl() |>
    kable_styling()

Aspect	150 mM, strict cutoff	500 mM, strict cutoff	150 mM, lenient cutoff	500 mM, lenient cutoff
Total number of TFs for which bait was pulled down	83	40	83	40
Total number of interactions (excluding bait itself)	352	110	881	202
Number of TFs with interactions	43	24	60	29
Number of TFs without observed interactions	40	16	23	11
Total number of TF-TF interactions (excluding bait itself)	21	15	39	17
Number of TFs involved in these TF-TF interactions	17	14	30	14
Number of bilateral TF-TF interaction	16	12	18	14

Figure 5

Summary heatmaps using results from the statistical tests (150 mM NaCl)

Bait-vs-bait heatmap

hm1b <- Heatmap(baitdata_150$mat, 
                cluster_rows = FALSE, 
                cluster_columns = FALSE, 
                col = makeHeatmapCol(stringency = "high"), 
                border = TRUE, 
                border_gp = gpar(lwd = 0.5),
                name = "Mod.\nt-stat.", 
                na_col = binary_heatmap_colors["FALSE"],
                column_title = paste(c("150 mM NaCl IP-MS", rep(" ", 25)), collapse = ""),
                row_title = "Transcription factors",
                column_title_gp = gpar(fontsize = fl + 3, fontface = "bold"),
                row_title_gp = gpar(fontsize = fl),
                column_names_gp = gpar(fontsize = fs),
                row_names_gp = gpar(fontsize = fs),
                show_row_names = FALSE, show_column_names = FALSE,
                use_raster = FALSE, show_heatmap_legend = TRUE, 
                left_annotation = makeComplexAndDBDAnnotation(
                    baitdata_150$mat, 
                    complexes[c("MBF transcription complex", 
                                "CCAAT-binding factor complex",
                                "Atf1-Pcr1")],
                    dbd = dbd, idmap = idmap, 
                    main_colors = main_colors, bg_color = bg_color, 
                    fontsize = fl),
                top_annotation = makeTFIntAnnotation(
                    baitdata_150, na_color = na_color,
                    fontsize_small = fs, fontsize_large = fl), 
                bottom_annotation = makeColLabels(baitdata_150$mat, 
                                                  c("Ntu1_plate", "Ntu2_plate", 
                                                    "Esc1_tube", "SPAC3F10.12c_plate"), fl),
                width = unit(w_ipmsHm, "mm") / 1.05, # heatmap body width
                heatmap_height = unit(h_ipmsHm / 1.6, "mm"),
                heatmap_legend_param = list(title_gp = gpar(fontsize = fl), 
                                            border = "gray10",
                                            border_gp = gpar(lwd = 0.5)))

set.seed(42L)
hm_150_bait <- grid.grabExpr(
    hm1b <- draw(hm1b, merge_legend = TRUE,
                 heatmap_legend_side = "right",
                 annotation_legend_side = "right"),
    width = w_ipmsHm / 1.05, height = h_ipmsHm / 1.6
)

plot_grid(hm_150_bait)

## Separate legends
use_complexes <- c("MBF transcription complex", 
                   "CCAAT-binding factor complex",
                   "Atf1-Pcr1")
lgd_complex <- Legend(labels = use_complexes,
                      legend_gp = gpar(fill = vapply(complexes[use_complexes],
                                                     function(x) x$color, NA_character_)),
                      title = "Complex", nrow = 2)

dbd_colors <- c(`KilA-N` = main_colors[5], 
                `Zn(II)2Cys6` = main_colors[4],
                bZIP = main_colors[3], 
                bHLH = main_colors[2],
                `Other (combined)` = bg_color)
lgd_dbd <- Legend(labels = names(dbd_colors), 
                  legend_gp = gpar(fill = dbd_colors), 
                  title = "DBD family", nrow = 2)

legend_bait_1 <- grid.grabExpr(
    hm1bl1 <- ComplexHeatmap::draw(lgd_complex),
    width = w_ipmsHm, height = h_ipmsHm / 8
)
legend_bait_2 <- grid.grabExpr(
    hm1bl2 <- ComplexHeatmap::draw(lgd_dbd),
    width = w_ipmsHm, height = h_ipmsHm / 8
)

Bait-vs-bait, less stringent adjusted p-value threshold

hm1b_loose <- Heatmap(baitdata_150_loose$mat, 
                      cluster_rows = FALSE, 
                      cluster_columns = FALSE, 
                      col = makeHeatmapCol(stringency = "low"), 
                      border = TRUE, 
                      border_gp = gpar(lwd = 0.5),
                      name = "Mod.\nt-stat.", 
                      na_col = binary_heatmap_colors["FALSE"],
                      column_title = paste(c("150 mM NaCl IP-MS", rep(" ", 25)), collapse = ""),
                      row_title = "Transcription factors",
                      column_title_gp = gpar(fontsize = fl + 3, fontface = "bold"),
                      row_title_gp = gpar(fontsize = fl),
                      column_names_gp = gpar(fontsize = fs),
                      row_names_gp = gpar(fontsize = fs),
                      show_row_names = FALSE, show_column_names = FALSE,
                      use_raster = FALSE, show_heatmap_legend = TRUE, 
                      left_annotation = makeComplexAndDBDAnnotation(
                          baitdata_150_loose$mat, 
                          complexes[c("MBF transcription complex", 
                                      "CCAAT-binding factor complex",
                                      "Atf1-Pcr1")],
                          dbd = dbd, idmap = idmap, 
                          main_colors = main_colors, bg_color = bg_color, 
                          fontsize = fl),
                      top_annotation = makeTFIntAnnotation(
                          baitdata_150_loose, na_color = na_color,
                          fontsize_small = fs, fontsize_large = fl), 
                      bottom_annotation = makeColLabels(baitdata_150_loose$mat, 
                                                        c("Ntu1_plate", "Ntu2_plate", 
                                                          "Esc1_tube", "SPAC3F10.12c_plate"), fl),
                      width = unit(w_ipmsHm, "mm") / 1.05, # heatmap body width
                      heatmap_height = unit(h_ipmsHm / 1.6, "mm"), 
                      heatmap_legend_param = list(title_gp = gpar(fontsize = fl), 
                                                  border = "gray10",
                                                  border_gp = gpar(lwd = 0.5)))

hm_150_0.05_bait <- grid.grabExpr(
    hm1b <- draw(hm1b_loose, merge_legend = TRUE,
                 heatmap_legend_side = "right",
                 annotation_legend_side = "right"),
    width = w_ipmsHm / 1.05, height = h_ipmsHm / 1.6
)

plot_grid(hm_150_0.05_bait)

SAGA/NuA4 heatmap

tc150 <- .getTestCols(sce150, adjp_cutoff = 0.05,
                      logfc_cutoff = log2fcThreshold)
keep_proteins <- .capitalize(c(complexes$SAGA$gene_names, complexes$NuA4$gene_names))
keep_exp <- which(.getProteinNameFromComparison(colnames(tc150$adjp)) %in% 
                      c("Ace2", "Pap1", "Rst2"))
binary_mat <- tc150$adjp[keep_proteins, keep_exp] < 0.05 &
    tc150$logfc[keep_proteins, keep_exp] > 1
binary_mat[is.na(binary_mat)] <- FALSE
colnames(binary_mat) <- .getProteinNameFromComparison(colnames(binary_mat))
ordr <- order(rownames(binary_mat) %in% .capitalize(complexes$SAGA$gene_names), 
              binary_mat[, "Ace2"], binary_mat[, "Pap1"], binary_mat[, "Rst2"], 
              decreasing = TRUE)
binary_mat <- binary_mat[ordr, ]

binary_annot <- data.frame(NuA4 = rownames(binary_mat) %in% 
                               .capitalize(complexes$NuA4$gene_names),
                           SAGA = rownames(binary_mat) %in% 
                               .capitalize(complexes$SAGA$gene_names), 
                           row.names = rownames(binary_mat))

mode(binary_mat) <- "character"
ht_opt(HEATMAP_LEGEND_PADDING = unit(0, "mm"))
hm1saga_nua4 <- Heatmap(
    binary_mat, 
    cluster_rows = FALSE, 
    cluster_columns = FALSE, 
    col = c(`FALSE` = "#11111100", `TRUE` = binary_heatmap_colors[["TRUE"]]), 
    border = TRUE, 
    border_gp = gpar(lwd = 0.5),
    name = " ", 
    na_col = binary_heatmap_colors["FALSE"],
    column_names_gp = gpar(fontsize = ft),
    row_names_gp = gpar(fontsize = fl),
    show_row_names = TRUE, show_column_names = TRUE, 
    row_names_side = "left",
    use_raster = FALSE, show_heatmap_legend = TRUE,
    left_annotation = rowAnnotation(
        df = binary_annot, 
        col = list(SAGA = c(`TRUE` = complex_colors[["SAGA"]],
                            `FALSE` = binary_heatmap_colors[["FALSE"]]),
                   NuA4 = c(`TRUE` = complex_colors[["NuA4"]],
                            `FALSE` = binary_heatmap_colors[["FALSE"]])),
        show_legend = FALSE),
    heatmap_height = unit(1.125 * h_ipmsHm, "mm"), # whole heatmap height
    column_title_gp = gpar(fontsize = ft),
    heatmap_legend_param = list(title_gp = gpar(fontsize = ft),
                                at = c("FALSE", "TRUE"),
                                labels = c("", "adj. p < 0.05 &\nlog2FC > 1"), 
                                border = "#11111100",
                                legend_gap = unit(0, "mm"))
)

hm_150_saga_nua4 <- grid.grabExpr(
    hm1sn <- draw(hm1saga_nua4, merge_legend = TRUE,
                  heatmap_legend_side = "bottom",
                  annotation_legend_side = "right",
                  align_heatmap_legend = "global_center",
                  padding = unit(c(3, 1, 1, 1), "mm")),
    width = w_ipmsHm, height = 1.125 * h_ipmsHm
)

plot_grid(hm_150_saga_nua4)

ht_opt(RESET = TRUE)

RNA / DNA binding proteins

rnabinding <- read.delim(params$rnabinders)
keep <- rownames(hmdata_150$tstat) %in% c(rnabinding$PomBaseID, .capitalize(rnabinding$Gene_name))
sum(keep)

## [1] 29

hm1rna <- Heatmap(hmdata_150$tstat[keep, ], 
                  cluster_rows = FALSE, 
                  cluster_columns = TRUE, 
                  col = makeHeatmapCol(stringency = "high"), 
                  border = TRUE, 
                  border_gp = gpar(lwd = 0.5),
                  name = "Mod.\nt-stat.", 
                  na_col = binary_heatmap_colors["FALSE"],
                  column_names_gp = gpar(fontsize = fs),
                  row_names_gp = gpar(fontsize = fs),
                  show_row_names = TRUE, show_column_names = FALSE,
                  use_raster = FALSE, show_heatmap_legend = TRUE,
                  width = unit(w_ipmsHm, "mm"), # heatmap body width
                  heatmap_height = unit(h_ipmsHm/2, "mm"), # whole heatmap height
                  heatmap_legend_param = list(title_gp = gpar(fontsize = fl), 
                                border = "gray10",
                                border_gp = gpar(lwd = 0.5)))

set.seed(42L)
hm_150_rna <- grid.grabExpr(
    hm1b <- draw(hm1rna, merge_legend = TRUE,
                heatmap_legend_side = "right",
                annotation_legend_side = "right"),
    width = w_ipmsHm, height = h_ipmsHm/2
)

plot_grid(hm_150_rna)

dnabinding <- read.delim(params$dnabinders)
keep <- rownames(hmdata_150$tstat) %in% c(dnabinding$PomBaseID, .capitalize(dnabinding$Gene_name))
sum(keep)

## [1] 127

hm1dna <- Heatmap(hmdata_150$tstat[keep, ], 
                  cluster_rows = FALSE, 
                  cluster_columns = TRUE, 
                  col = makeHeatmapCol(stringency = "high"), 
                  border = TRUE, 
                  border_gp = gpar(lwd = 0.5),
                  name = "Mod.\nt-stat.", 
                  na_col = binary_heatmap_colors["FALSE"],
                  column_names_gp = gpar(fontsize = fs),
                  row_names_gp = gpar(fontsize = fs),
                  show_row_names = TRUE, show_column_names = FALSE,
                  use_raster = FALSE, show_heatmap_legend = TRUE,
                  width = unit(w_ipmsHm, "mm"), # heatmap body width
                  heatmap_height = unit(1.5 * h_ipmsHm, "mm"), # whole heatmap height
                  heatmap_legend_param = list(title_gp = gpar(fontsize = fl), 
                                border = "gray10",
                                border_gp = gpar(lwd = 0.5)))

set.seed(42L)
hm_150_dna <- grid.grabExpr(
    hm1b2 <- draw(hm1dna, merge_legend = TRUE,
                  heatmap_legend_side = "right",
                  annotation_legend_side = "right"),
    width = w_ipmsHm, height = 1.5 * h_ipmsHm
)
plot_grid(hm_150_dna)

Summary heatmaps using results from the statistical tests (500 mM NaCl)

Bait-vs-bait heatmap

hm1b2 <- Heatmap(baitdata_500$mat, 
                 cluster_rows = FALSE, 
                 cluster_columns = FALSE, 
                 col = makeHeatmapCol(stringency = "high"), 
                 border = TRUE, 
                 border_gp = gpar(lwd = 0.5),
                 name = "Mod.\nt-stat.", 
                 na_col = binary_heatmap_colors["FALSE"],
                 column_title = paste(c("500 mM NaCl IP-MS", rep(" ", 25)), collapse = ""),
                 row_title = "Transcription factors",
                 column_title_gp = gpar(fontsize = fl + 3, fontface = "bold"),
                 row_title_gp = gpar(fontsize = fl),
                 column_names_gp = gpar(fontsize = fs),
                 row_names_gp = gpar(fontsize = fs),
                 show_row_names = FALSE, show_column_names = FALSE,
                 use_raster = FALSE, show_heatmap_legend = TRUE,
                 left_annotation = makeComplexAndDBDAnnotation(
                     baitdata_500$mat, 
                     complexes[c("MBF transcription complex", 
                                 "CCAAT-binding factor complex", "Atf1-Pcr1")], 
                     dbd = dbd, idmap = idmap, 
                     main_colors = main_colors, bg_color = bg_color, 
                     fontsize = fl),
                 top_annotation = makeTFIntAnnotation(
                     baitdata_500, na_color = na_color,
                     fontsize_small = fs, fontsize_large = fl), 
                 bottom_annotation = makeColLabels(baitdata_500$mat, 
                                                   c("Ntu1_plate", "Ntu2_plate",
                                                     "Esc1_plate", "SPAC3F10.12c_plate"), fl),
                 width = unit(w_ipmsHm, "mm") / 1.05, # heatmap body width
                 heatmap_height = unit(h_ipmsHm / 1.6, "mm"), # whole heatmap height
                 heatmap_legend_param = list(title_gp = gpar(fontsize = fl), 
                                            border = "gray10",
                                            border_gp = gpar(lwd = 0.5)))

hm_500_bait <- grid.grabExpr(
    hm1b2 <- draw(hm1b2, merge_legend = TRUE,
                  heatmap_legend_side = "right",
                  annotation_legend_side = "right"),
    width = w_ipmsHm / 1.05, height = h_ipmsHm / 1.6
)

plot_grid(hm_500_bait)

Pie charts for bait-vs-bait interactions (within/between DBD families)

The pie charts below display the number of observed within- and between-DBD family interactions under different salt conditions and using different thresholds for identifying interactions. The black arc indicate the
fraction of within-DBD family interactions expected by chance, given the observed number of elements in each of the families.

pie_bait_150 <- makePie(baitdata = baitdata_150,
                        title = "150 mM NaCl\nstrict cutoff")

## [1] "Expected fraction of 'same family' pairs: 0.14837"

pie_bait_150_loose <- makePie(baitdata = baitdata_150_loose, 
                              title = "150 mM NaCl\nlenient cutoff")

## [1] "Expected fraction of 'same family' pairs: 0.14837"

pie_bait_500 <- makePie(baitdata = baitdata_500, 
                        title = "500 mM NaCl\nstrict cutoff")

## [1] "Expected fraction of 'same family' pairs: 0.06795"

pie_bait_500_loose <- makePie(baitdata = baitdata_500_loose, 
                              title = "500 mM NaCl\nlenient cutoff")

## [1] "Expected fraction of 'same family' pairs: 0.06795"

(pies <- cowplot::plot_grid(
    cowplot::plot_grid(pie_bait_150 + theme(legend.position = "none"), 
                       pie_bait_150_loose + theme(legend.position = "none"), 
                       pie_bait_500 + theme(legend.position = "none"),
                       pie_bait_500_loose + theme(legend.position = "none"), nrow = 2),
    get_legend(pie_bait_150 + theme_cowplot(ft) + theme(legend.position = "bottom",
                                                        legend.justification = "center")),
    ncol = 1, rel_heights = c(2, 0.25)
))

Compare t-statistics from low-salt and high-salt conditions

The figures below show scatter plots of moderated t-statistics for selected bait pull-downs compared to their complements, in the high-salt vs low-salt conditions.

## Extract results from statistical tests
rdlow <- rowData(sce150)
rdhigh <- rowData(sce500)

## Get t-statistics
contrasts_to_compare <- grep("\\.t$", colnames(rdhigh), value = TRUE)
contrasts_low <- gsub("_compl", "_highsaltcompl", 
                      gsub("_500_", "_150_", contrasts_to_compare))
idx <- which(!contrasts_low %in% colnames(rdlow))
contrasts_low[idx] <- gsub("_plate", "_tube", contrasts_low[idx])
stopifnot(all(contrasts_low %in% colnames(rdlow)))

## t-statistics, logFCs and adjusted p-values, low salt
low <- as.data.frame(rdlow[, c("einprotId", contrasts_low)]) |>
    tidyr::pivot_longer(names_to = "contrast_low", values_to = "tstat_low", -einprotId) |>
    dplyr::mutate(contrast_low = sub("\\.t$", "", contrast_low)) |>
    dplyr::left_join(
        as.data.frame(rdlow[, c("einprotId", sub("\\.t$", ".adj.P.Val",
                                                 contrasts_low))]) |>
            tidyr::pivot_longer(names_to = "contrast_low", 
                                values_to = "adjp_low", -einprotId) |>
            dplyr::mutate(contrast_low = sub("\\.adj.P.Val$", "", contrast_low))) |>
    dplyr::left_join(
        as.data.frame(rdlow[, c("einprotId", sub("\\.t$", ".logFC",
                                                 contrasts_low))]) |>
            tidyr::pivot_longer(names_to = "contrast_low", 
                                values_to = "logfc_low", -einprotId) |>
            dplyr::mutate(contrast_low = sub("\\.logFC$", "", contrast_low)))

## t-statistics, logFCs and adjusted p-values, high salt
high <- as.data.frame(rdhigh[, c("einprotId", contrasts_to_compare)]) |>
    tidyr::pivot_longer(names_to = "contrast_high", 
                        values_to = "tstat_high", -einprotId) |>
    dplyr::mutate(contrast_high = sub("\\.t$", "", contrast_high)) |>
    dplyr::left_join(
        as.data.frame(rdhigh[, c("einprotId", sub("\\.t$", ".adj.P.Val",
                                                  contrasts_to_compare))]) |>
            tidyr::pivot_longer(names_to = "contrast_high", 
                                values_to = "adjp_high", -einprotId) |>
            dplyr::mutate(contrast_high = sub("\\.adj.P.Val$", "", 
                                              contrast_high))) |>
    dplyr::left_join(
        as.data.frame(rdhigh[, c("einprotId", sub("\\.t$", ".logFC",
                                                  contrasts_to_compare))]) |>
            tidyr::pivot_longer(names_to = "contrast_high", 
                                values_to = "logfc_high", -einprotId) |>
            dplyr::mutate(contrast_high = sub("\\.logFC$", "", 
                                              contrast_high))) |>
    dplyr::mutate(contrast_low = sub("\\.t$", "", 
                                     contrasts_low[match(paste0(contrast_high, ".t"),
                                                         contrasts_to_compare)]))

## Merge data.frames
lowhigh <- low |> dplyr::full_join(high) |>
    dplyr::mutate(tstat_low = replace(tstat_low, is.na(tstat_low), 0),
                  tstat_high = replace(tstat_high, is.na(tstat_high), 0),
                  logfc_low = replace(logfc_low, is.na(logfc_low), 0),
                  logfc_high = replace(logfc_high, is.na(logfc_high), 0),
                  adjp_low = replace(adjp_low, is.na(adjp_low), 1),
                  adjp_high = replace(adjp_high, is.na(adjp_high), 1)) |>
    dplyr::mutate(bait = .getProteinNameFromComparison(contrast_low)) |>
    dplyr::mutate(splitvar = .getProteinNameFromComparison(contrast_low))

## Add dal2
rddal2 <- rowData(scedal2)
contrasts_low_dal2 <- c("Dal2_150_plate_vs_Untagged_150_plate.t")
low_dal2 <- as.data.frame(rddal2[, c("einprotId", contrasts_low_dal2)]) |>
    tidyr::pivot_longer(names_to = "contrast_low", 
                        values_to = "tstat_low", -einprotId) |>
    dplyr::mutate(contrast_low = sub("\\.t$", "", contrast_low)) |>
    dplyr::left_join(
        as.data.frame(rddal2[, c("einprotId", sub("\\.t$", ".adj.P.Val",
                                                  contrasts_low_dal2))]) |>
            tidyr::pivot_longer(names_to = "contrast_low", 
                                values_to = "adjp_low", -einprotId) |>
            dplyr::mutate(contrast_low = sub("\\.adj.P.Val$", "", contrast_low))) |>
    dplyr::left_join(
        as.data.frame(rddal2[, c("einprotId", sub("\\.t$", ".logFC",
                                                  contrasts_low_dal2))]) |>
            tidyr::pivot_longer(names_to = "contrast_low", 
                                values_to = "logfc_low", -einprotId) |>
            dplyr::mutate(contrast_low = sub("\\.logFC$", "", contrast_low)))

contrasts_high_dal2 <- c("Dal2_500_plate_vs_Untagged_500_plate.t")
high_dal2 <- as.data.frame(rddal2[, c("einprotId", contrasts_high_dal2)]) |>
    tidyr::pivot_longer(names_to = "contrast_high", 
                        values_to = "tstat_high", -einprotId) |>
    dplyr::mutate(contrast_high = sub("\\.t$", "", contrast_high)) |>
    dplyr::left_join(
        as.data.frame(rddal2[, c("einprotId", sub("\\.t$", ".adj.P.Val",
                                                  contrasts_high_dal2))]) |>
            tidyr::pivot_longer(names_to = "contrast_high", 
                                values_to = "adjp_high", -einprotId) |>
            dplyr::mutate(contrast_high = sub("\\.adj.P.Val$", "", contrast_high))) |>
    dplyr::left_join(
        as.data.frame(rddal2[, c("einprotId", sub("\\.t$", ".logFC",
                                                  contrasts_high_dal2))]) |>
            tidyr::pivot_longer(names_to = "contrast_high", 
                                values_to = "logfc_high", -einprotId) |>
            dplyr::mutate(contrast_high = sub("\\.logFC$", "", contrast_high)))

lowhigh_dal2 <- low_dal2 |> dplyr::full_join(high_dal2) |>
    dplyr::mutate(tstat_low = replace(tstat_low, is.na(tstat_low), 0),
                  tstat_high = replace(tstat_high, is.na(tstat_high), 0),
                  logfc_low = replace(logfc_low, is.na(logfc_low), 0),
                  logfc_high = replace(logfc_high, is.na(logfc_high), 0),
                  adjp_low = replace(adjp_low, is.na(adjp_low), 1),
                  adjp_high = replace(adjp_high, is.na(adjp_high), 1)) |>
    dplyr::mutate(bait = .getProteinNameFromComparison(sub("SPB4775_", "", 
                                                           contrast_low))) |>
    dplyr::mutate(splitvar = .getProteinNameFromComparison(sub("SPB4775_", "",
                                                               contrast_low)))

lowhigh <- lowhigh |>
    dplyr::bind_rows(lowhigh_dal2)

## Subset 1
adjp150vs500 <- 1e-3
logfc150vs500 <- 1

lowhighsub1 <- lowhigh |> 
    dplyr::filter(splitvar %in% c("Ace2", "Rst2", "Pap1")) |>
    dplyr::mutate(splitvar = factor(splitvar, levels = c("Ace2", "Rst2", "Pap1"))) |>
    dplyr::mutate(significant = (adjp_high < adjp150vs500 & logfc_high > logfc150vs500)) |>
    dplyr::mutate(complex = ifelse(einprotId %in% .capitalize(complexes$NuA4$gene_names), 
                                   "NuA4", ifelse(einprotId %in% .capitalize(complexes$SAGA$gene_names),
                                                  "SAGA", NA_character_)))

## Subset 2
lowhighsub2 <- lowhigh |> 
    dplyr::filter(splitvar %in% c("Atf1", "Pcr1", "Moc3", 
                                  "Dal2", "Ntu1", "Ntu2")) |>
    dplyr::mutate(splitvar = factor(splitvar, levels = c("Ntu1", "Ntu2", 
                                                         "Atf1", "Pcr1", 
                                                         "Moc3", "Dal2"))) |>
    dplyr::mutate(significant = (adjp_high < adjp150vs500 & logfc_high > logfc150vs500))
    
(gg_retain1 <- ggplot(lowhighsub1, 
                      aes(x = tstat_high, y = tstat_low)) +
        geom_abline(data = lowhighsub1 |>
                        dplyr::filter(einprotId == bait), 
                    aes(slope = tstat_low / tstat_high, intercept = 0), 
                    linetype = "dashed", color = "grey50") + 
        geom_text_repel(data = lowhighsub1 |>
                            dplyr::mutate(
                                einprotId = ifelse(significant, einprotId, "")),
                        aes(label = einprotId), min.segment.length = 0,
                        max.overlaps = Inf) + 
        geom_point(aes(alpha = !is.na(complex), color = complex), 
                   show.legend = c(alpha = FALSE, color = TRUE)) + 
        scale_color_manual(values = complex_colors, na.value = "black",
                           breaks = c("SAGA", "NuA4")) + 
        scale_alpha_manual(values = c(`TRUE` = 1, `FALSE` = 0.25)) + 
        geom_point(data = lowhighsub1 |> dplyr::filter(einprotId == bait) |>
                       dplyr::mutate(type = "Bait"), 
                   mapping = aes(fill = type), color = main_colors[3], 
                   shape = 21, alpha = 1, size = 2) + 
        scale_fill_manual(values = c(Bait = main_colors[3])) + 
        facet_wrap(~ splitvar, scales = "free", ncol = 1, 
                   labeller = labeller(splitvar = structure(
                       paste0(unique(lowhighsub1$splitvar), " IP-MS"), 
                       names = as.character(unique(lowhighsub1$splitvar))))) + 
        labs(x = "500 mM NaCl\nmod. t-stat.", y = "150 mM NaCl mod. t-stat.") + 
        theme_cowplot(ft) + 
        theme(legend.position = "bottom", 
              strip.background = element_rect(fill = NA, colour = NA), 
              strip.text = element_text(hjust = 0, face = "bold")) + 
        guides(color = guide_legend(nrow = 1, byrow = TRUE, title = ""),
               fill = guide_legend(nrow = 1, byrow = TRUE, title = "")))

(gg_retain2 <- ggplot(lowhighsub2, 
                      aes(x = tstat_high, y = tstat_low)) +
        geom_abline(data = lowhighsub2 |> 
                        dplyr::filter(einprotId == bait), 
                    aes(slope = tstat_low / tstat_high, intercept = 0), 
                    linetype = "dashed", color = "grey50") + 
        geom_text_repel(data = lowhighsub2 |>
                            dplyr::mutate(einprotId = ifelse(significant, einprotId, "")),
                        aes(label = einprotId), min.segment.length = 0,
                        max.overlaps = Inf) + 
        geom_point(aes(alpha = significant, color = significant, size = significant), 
                   show.legend = c(alpha = FALSE, color = TRUE, size = FALSE)) + 
        scale_color_manual(
            values = c(`TRUE` = lighten(main_colors[4], 0.2), `FALSE` = "black"), 
            labels = c(`TRUE` = paste0("adj. p<", adjp150vs500, 
                                       " and logFC>", logfc150vs500, " (500 mM NaCl)"),
                       `FALSE` = paste0("adj. p\U2265", adjp150vs500, 
                                        " or logFC\U2264", logfc150vs500, " (500 mM NaCl)"))) + 
        scale_size_manual(values = c(`TRUE` = 2, `FALSE` = 1)) +
        scale_alpha_manual(values = c(`TRUE` = 1, `FALSE` = 0.25)) + 
        geom_point(data = lowhighsub2 |> 
                       dplyr::filter(einprotId == bait) |>
                       dplyr::mutate(type = "Bait"), 
                   mapping = aes(fill = type), color = main_colors[3], 
                   shape = 21, alpha = 1, size = 2) + 
        scale_fill_manual(values = c(Bait = main_colors[3])) + 
        facet_wrap(~ splitvar, scales = "free", ncol = 2, 
                   labeller = labeller(splitvar = structure(
                       paste0(unique(lowhighsub2$splitvar), " IP-MS"), 
                       names = as.character(unique(lowhighsub2$splitvar))))) + 
        labs(x = "500 mM NaCl mod. t-stat.", y = "150 mM NaCl mod. t-stat.") + 
        theme_cowplot(ft) + 
        theme(legend.position = "bottom", 
              strip.background = element_rect(fill = NA, colour = NA), 
              strip.text = element_text(hjust = 0, face = "bold")) + 
        guides(color = guide_legend(nrow = 2, byrow = TRUE, title = ""),
               fill = guide_legend(nrow = 1, byrow = TRUE, title = "")))

Tube vs plate t-statistics for Atf1 pull-down

The low-salt experiments were done using two different protocols (tube or plate). The Atf1 pull-down was performed with both protocols, and the figure below compares the moderated t-statistics (each obtained by comparing to the complement consisting of all non-Atf1 pull-down samples obtained with the same protocol).

df <- as.data.frame(rowData(sce150)) |>
    dplyr::select(einprotId, Atf1_150_tube_vs_compl_Atf1_150_tube.t, 
                  Atf1_150_plate_vs_compl_Atf1_150_plate.t) |>
    dplyr::mutate(col = ifelse(einprotId == "Atf1", "bait", 
                               ifelse(einprotId == "Pcr1", "Pcr1", "other")))
(ggatf1 <- ggplot(df, aes(x = Atf1_150_tube_vs_compl_Atf1_150_tube.t, 
                          y = Atf1_150_plate_vs_compl_Atf1_150_plate.t)) + 
        geom_abline(slope = 1, intercept = 0) + 
        geom_point(size = 3, aes(color = col, alpha = col)) + 
        scale_color_manual(values = c(bait = main_colors[3], pcr1 = main_colors[4], 
                                      other = "black")) + 
        scale_alpha_manual(values = c(bait = 1, pcr1 = 1, other = 0.5)) + 
        geom_text_repel(data = df |> filter(einprotId %in% c("Atf1", "Pcr1")), 
                        aes(label = einprotId)) + 
        labs(x = "Mod. t-stat. Atf1 vs complement\n(tube, 150 mM NaCl)", 
             y = "Mod. t-stat. Atf1 vs complement\n(plate, 150 mM NaCl)") + 
        coord_fixed() + 
        theme_cowplot() + 
        theme(legend.position = "none"))

Adn2 vs complement volcano plot

md <- metadata(sce500)
volc <- md$testres$tests$Adn2_500_plate_vs_compl_Adn2_500_plate |>
    mutate(signif = adj.P.Val < adjpThreshold & logFC > log2fcThreshold)
(adn2volc <- ggplot(volc, aes(x = logFC, y = mlog10p)) + 
        geom_text_repel(data = volc |> filter(signif),
                        aes(label = pid), min.segment.length = 0,
                        max.overlaps = Inf) + 
        geom_point(aes(alpha = signif, color = signif), 
                   show.legend = c(alpha = FALSE, color = TRUE)) + 
        scale_color_manual(values = c(`TRUE` = main_colors[5], `FALSE` = na_color), 
                           labels = c(`TRUE` = paste0("adj. p < ", adjpThreshold, 
                                                      " and logFC > ", log2fcThreshold)),
                           na.value = "black", breaks = "TRUE") + 
        scale_alpha_manual(values = c(`TRUE` = 1, `FALSE` = 0.5)) + 
        geom_point(data = volc |> dplyr::filter(pid == "Adn2") |>
                       dplyr::mutate(type = "Bait"), 
                   mapping = aes(fill = type), color = main_colors[3], 
                   shape = 21, alpha = 1, size = 2) + 
        scale_fill_manual(values = c(Bait = main_colors[3])) + 
        labs(x = "log2(fold-change)", y = "-log10(p-value)", 
             title = "Adn2 vs complement (500 mM NaCl IP-MS)") + 
        theme_cowplot(ft) + 
        theme(legend.position = "bottom") + 
        guides(color = guide_legend(nrow = 1, byrow = TRUE, title = ""),
               fill = guide_legend(nrow = 1, byrow = TRUE, title = "")))

Histone proteins t-stats

## Get all t-statistics and visualize
res500 <- .getTestCols(sce500, adjp_cutoff = 0.01, logfc_cutoff = 1)
histones <- res500$tstat[c("Hta1", "Htb1", "Hht2", "Hhf2"), ]
colnames(histones) <- .getProteinNameFromComparison(colnames(histones))
histones <- histones[, colnames(histones) != "Untagged"]
histones <- t(histones)
hmhistones <- Heatmap(histones, 
                      cluster_rows = TRUE, 
                      cluster_columns = FALSE, 
                      col = makeHeatmapCol(stringency = "low"), 
                      border = TRUE, 
                      border_gp = gpar(lwd = 0.5),
                      name = "Mod.\nt-stat.",
                      na_col = binary_heatmap_colors["FALSE"],
                      column_names_gp = gpar(fontsize = fl),
                      row_names_gp = gpar(fontsize = fs),
                      show_row_names = TRUE, 
                      show_column_names = TRUE, 
                      use_raster = FALSE,
                      show_heatmap_legend = TRUE, 
                      width = unit(w_ipmsHm / 1.5, "mm"), 
                      heatmap_height = unit(1.3 * h_ipmsHm, "mm"),
                      column_title_gp = gpar(fontsize = ft),
                      heatmap_legend_param = list(
                          title_gp = gpar(fontsize = fl),
                          border = "gray10",
                          border_gp = gpar(lwd = 0.5)
                      ))
set.seed(42L)
hm_histones <- grid.grabExpr(
    hmrad <- draw(hmhistones, merge_legend = TRUE,
                 heatmap_legend_side = "right",
                 annotation_legend_side = "right"),
    width = w_ipmsHm / 1.5, height = 1.3 * h_ipmsHm
)
plot_grid(hm_histones)

Put together

cowplot::plot_grid(
    cowplot::plot_grid(
        hm_150_bait, hm_500_bait,
        nrow = 1, labels = c("A", "B")
    ),
    cowplot::plot_grid(
        NULL,
        legend_bait_1, 
        legend_bait_2,
        NULL,
        nrow = 1,
        rel_widths = c(1, 2, 2, 1)
    ),
    NULL,
    cowplot::plot_grid(
        gg_retain2,
        plot_grid(NULL, hm_150_saga_nua4, gg_retain1,
                  nrow = 1, rel_widths = c(0.05, 0.75, 1)),
        nrow = 1, labels = c("C", "D"), vjust = -0.5
    ),
    ncol = 1,
    rel_heights = c(3.5, 0.5, 0.2, 5.88)
)

Supplementary figure

cowplot::plot_grid(
    cowplot::plot_grid(
        pies, 
        adn2volc, 
        ncol = 1,
        labels = c("A", "B")
    ),
    hm_histones,
    nrow = 1,
    labels = c("", "C")
)

Session info

Session info

sessionInfo()

## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: FlexiBLAS OPENBLAS;  LAPACK version 3.10.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Zurich
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] patchwork_1.2.0             kableExtra_1.4.0           
##  [3] tidyr_1.3.1                 stringr_1.5.1              
##  [5] DT_0.33                     ggrepel_0.9.5              
##  [7] colorspace_2.1-0            ComplexHeatmap_2.18.0      
##  [9] jsonlite_1.8.8              forcats_1.0.0              
## [11] dplyr_1.1.4                 einprot_0.9.4              
## [13] cowplot_1.1.3               ggplot2_3.5.0              
## [15] SingleCellExperiment_1.24.0 SummarizedExperiment_1.32.0
## [17] Biobase_2.62.0              GenomicRanges_1.54.1       
## [19] GenomeInfoDb_1.38.8         IRanges_2.36.0             
## [21] S4Vectors_0.40.2            BiocGenerics_0.48.1        
## [23] MatrixGenerics_1.14.0       matrixStats_1.3.0          
## 
## loaded via a namespace (and not attached):
##   [1] R.methodsS3_1.8.2           ggalt_0.4.0                
##   [3] poweRlaw_0.80.0             Biostrings_2.70.3          
##   [5] vctrs_0.6.5                 digest_0.6.35              
##   [7] png_0.1-8                   shape_1.4.6.1              
##   [9] pcaPP_2.0-4                 iSEE_2.14.0                
##  [11] magick_2.8.3                MASS_7.3-60                
##  [13] reshape2_1.4.4              httpuv_1.6.15              
##  [15] foreach_1.5.2               withr_3.0.0                
##  [17] xfun_0.43                   survival_3.5-8             
##  [19] memoise_2.0.1               hexbin_1.28.3              
##  [21] ggbeeswarm_0.7.2            gmm_1.8                    
##  [23] systemfonts_1.0.6           zoo_1.8-12                 
##  [25] GlobalOptions_0.1.2         gtools_3.9.5               
##  [27] R.oo_1.26.0                 DEoptimR_1.1-3             
##  [29] GGally_2.2.1                KEGGREST_1.42.0            
##  [31] promises_1.3.0              httr_1.4.7                 
##  [33] restfulr_0.0.15             hash_2.2.6.3               
##  [35] rstudioapi_0.16.0           shinyAce_0.4.2             
##  [37] rrcovNA_0.5-1               miniUI_0.1.1.1             
##  [39] generics_0.1.3              babelgene_22.9             
##  [41] zlibbioc_1.48.2             ScaledMatrix_1.10.0        
##  [43] GenomeInfoDbData_1.2.11     SparseArray_1.2.4          
##  [45] xtable_1.8-4                ade4_1.7-22                
##  [47] pracma_2.4.4                doParallel_1.0.17          
##  [49] evaluate_0.23               ExploreModelMatrix_1.14.0  
##  [51] S4Arrays_1.2.1              proDA_1.16.0               
##  [53] hms_1.1.3                   irlba_2.3.5.1              
##  [55] iSEEhex_1.4.0               shinyWidgets_0.8.5         
##  [57] magrittr_2.0.3              readr_2.1.5                
##  [59] later_1.3.2                 viridis_0.6.5              
##  [61] lattice_0.22-6              MsCoreUtils_1.14.1         
##  [63] genefilter_1.84.0           robustbase_0.99-2          
##  [65] XML_3.99-0.16.1             scuttle_1.12.0             
##  [67] pillar_1.9.0                nlme_3.1-164               
##  [69] iterators_1.0.14            caTools_1.18.2             
##  [71] compiler_4.3.2              beachmat_2.18.1            
##  [73] stringi_1.8.3               GenomicAlignments_1.38.2   
##  [75] tmvtnorm_1.6                plyr_1.8.9                 
##  [77] msigdbr_7.5.1               crayon_1.5.2               
##  [79] abind_1.4-5                 BiocIO_1.12.0              
##  [81] scater_1.30.1               chron_2.3-61               
##  [83] bit_4.0.5                   sandwich_3.1-0             
##  [85] pcaMethods_1.94.0           codetools_0.2-19           
##  [87] BiocSingular_1.18.0         crosstalk_1.2.1            
##  [89] bslib_0.7.0                 GetoptLong_1.0.5           
##  [91] plotly_4.10.4               mime_0.12                  
##  [93] MultiAssayExperiment_1.28.0 splines_4.3.2              
##  [95] circlize_0.4.16             Rcpp_1.0.12                
##  [97] sparseMatrixStats_1.14.0    Rttf2pt1_1.3.12            
##  [99] knitr_1.46                  blob_1.2.4                 
## [101] utf8_1.2.4                  clue_0.3-65                
## [103] seqLogo_1.68.0              AnnotationFilter_1.26.0    
## [105] QFeatures_1.12.0            DelayedMatrixStats_1.24.0  
## [107] tibble_3.2.1                sqldf_0.4-11               
## [109] iSEEu_1.14.0                Matrix_1.6-5               
## [111] statmod_1.5.0               tzdb_0.4.0                 
## [113] svglite_2.1.3               pkgconfig_2.0.3            
## [115] tools_4.3.2                 cachem_1.0.8               
## [117] RSQLite_2.3.6               viridisLite_0.4.2          
## [119] DBI_1.2.2                   impute_1.76.0              
## [121] fastmap_1.1.1               rmarkdown_2.26             
## [123] scales_1.3.0                shinydashboard_0.7.2       
## [125] Rsamtools_2.18.0            sass_0.4.9                 
## [127] ggstats_0.6.0               farver_2.1.1               
## [129] mgcv_1.9-1                  gsubfn_0.7                 
## [131] yaml_2.3.8                  rtracklayer_1.62.0         
## [133] cli_3.6.2                   purrr_1.0.2                
## [135] writexl_1.5.0               lifecycle_1.0.4            
## [137] mvtnorm_1.2-4               rintrojs_0.3.4             
## [139] BiocParallel_1.36.0         annotate_1.80.0            
## [141] gtable_0.3.5                rjson_0.2.21               
## [143] parallel_4.3.2              limma_3.58.1               
## [145] colourpicker_1.3.0          TFBSTools_1.40.0           
## [147] bitops_1.0-7                bit64_4.0.5                
## [149] BiocNeighbors_1.20.2        proto_1.0.0                
## [151] CNEr_1.38.0                 jquerylib_0.1.4            
## [153] highr_0.10                  shinyjs_2.1.0              
## [155] imputeLCMD_2.1              R.utils_2.12.3             
## [157] rrcov_1.7-5                 lazyeval_0.2.2             
## [159] shiny_1.8.1.1               htmltools_0.5.8.1          
## [161] proj4_1.0-14                GO.db_3.18.0               
## [163] glue_1.7.0                  STRINGdb_2.14.3            
## [165] TFMPvalue_0.0.9             XVector_0.42.0             
## [167] RCurl_1.98-1.14             ComplexUpset_1.3.3         
## [169] mclust_6.1                  BSgenome_1.70.2            
## [171] motifStack_1.46.0           gridExtra_2.3              
## [173] igraph_2.0.3                extrafontdb_1.0            
## [175] R6_2.5.1                    ggiraph_0.8.9              
## [177] gplots_3.1.3.1              labeling_0.4.3             
## [179] cluster_2.1.4               stringdist_0.9.12          
## [181] DirichletMultinomial_1.44.0 DelayedArray_0.28.0        
## [183] tidyselect_1.2.1            vipor_0.4.7                
## [185] plotrix_3.8-4               ProtGenerics_1.34.0        
## [187] maps_3.4.2                  xml2_1.3.6                 
## [189] ash_1.0-15                  AnnotationDbi_1.64.1       
## [191] rsvd_1.0.5                  munsell_0.5.1              
## [193] KernSmooth_2.23-22          data.table_1.15.4          
## [195] htmlwidgets_1.6.4           RColorBrewer_1.1-3         
## [197] rlang_1.1.3                 extrafont_0.19             
## [199] uuid_1.2-0                  norm_1.0-11.1              
## [201] fansi_1.0.6                 Cairo_1.6-2                
## [203] beeswarm_0.4.0

Figure 5 - IP-MS

Charlotte Soneson, Michael Stadler, Merle Skribbe

2024-08-18

Parameter values

Load required packages and helper functions

Helper functions

Read data

Extract data for heatmaps

Export data for TFexplorer

Facts and figures

Identification of baits in IP-MS

Summary tables

Figure 5

Summary heatmaps using results from the statistical tests (150 mM NaCl)

Bait-vs-bait heatmap

Bait-vs-bait, less stringent adjusted p-value threshold

SAGA/NuA4 heatmap

RNA / DNA binding proteins

Summary heatmaps using results from the statistical tests (500 mM NaCl)

Bait-vs-bait heatmap

Pie charts for bait-vs-bait interactions (within/between DBD families)

Compare t-statistics from low-salt and high-salt conditions

Tube vs plate t-statistics for Atf1 pull-down

Adn2 vs complement volcano plot

Histone proteins t-stats

Put together

Supplementary figure

Session info