• Parameter values
  • Load required packages
  • Helper functions
  • Read data
  • Facts and numbers - ChIP-seq
  • Export peak ChIP data for use in TFexplorer
  • Calculate peak distances to nearest TSS
  • Calculate peak distances to tRNA or rRNA genes
  • Calculate motif similarities
  • Figure 3
    • Fraction of motif genomic hits bound
    • Genome browser tracks of ChIP experiments for selected TFs
    • Motif similarity heatmap
    • Motif alignments
    • TF peak overlaps from TFs in motif clusters
    • TF Autoregulation
      • Heatmap
      • Heatmap (not showing the non-bound TF promoters)
      • Network
      • Network around Atf1
    • Peak types and presence of nearby ncRNAs
    • Put together
    • Supplementary figure
  • Session info

Parameter values

params
## $genomefasta
## [1] "reference/Schizosaccharomyces_pombe.ASM294v2.dna.toplevel.fa"
## 
## $gtf
## [1] "reference/Schizosaccharomyces_pombe.ASM294v2.55.gtf"
## 
## $chiptrackfile
## [1] "data/genome_browser_tracks_chipseq.csv.gz"
## 
## $peakcsv
## [1] "data/fused_peaks_filtered.csv.gz"
## 
## $peakenr
## [1] "data/fused_peaks_filtered_enrichments.csv.gz"
## 
## $peakchipfile
## [1] "data/fused_peaks_filtered_counts-external_chip.csv.gz"
## 
## $dbdtxt
## [1] "data/TF_DBD_annotation.txt"
## 
## $motifrds
## [1] "data/motifs_annotated.rds"
## 
## $txcsv
## [1] "data/promoters-1kb_tes-1kb_annotated.csv.gz"
## 
## $coordsatf1
## [1] "data/visnetwork_coords_atf1.csv"

Load required packages

suppressPackageStartupMessages({
    library(circlize)
    library(ComplexHeatmap)
    library(grid)
    library(ggplot2)
    library(cowplot)
    library(Biostrings)
    library(BSgenome)
    library(GenomicRanges)
    library(rtracklayer)
    library(tibble)
    library(tidyr)
    library(tidygraph)
    library(universalmotif)
    library(parallel)
    library(colorspace)
    library(scattermore)
    library(ggupset)
    library(dplyr)
    library(viridisLite)
    library(SummarizedExperiment)
    library(S4Vectors)
    library(jsonlite)
    library(dendextend)
    library(kableExtra)
    library(forcats)
    library(igraph)
    library(ggraph)
    library(visNetwork)
    library(shiny)
})

source("params/plot_settings.R")
source("params/plottracks_function.R")

# define heatmap dimensions and font sizes
w_peaksHm <-  84 # heatmap body width (mm)
h_peaksHm <- 178 # total heatmap height (mm)
fs <- 8  # small font size
fm <- 9  # medium font size
fl <- 10 # large font size

Helper functions

Here we define a few helper functions for using below:

# make first letter of a string lowercase
.lowercase <- function(x) {
    paste0(tolower(substr(x, 1, 1)), substr(x, 2, nchar(x)))
}

# make first letter of a string uppercase
.capitalize <- function(x) {
    paste0(toupper(substr(x, 1, 1)), substr(x, 2, nchar(x)))
}

# for each region in `from`, calculate the distance to its nearest element in `to`
# if there are no element in `to` on the sequence of `from`, set distance to NA
calc_distance_to_nearest <- function(from, to) {
    stopifnot(exprs = {
        is(from, "GRanges")
        is(to, "GRanges")
    })
    tmp <- distanceToNearest(x = from, subject = to)
    d <- rep(NA, length(from))
    d[queryHits(tmp)] <- mcols(tmp)$distance
    return(d)
}

# create a network (directed graph) from an adjacency matrix
createNetwork <- function(adjmatrix, chippedTFs = colnames(mprom), settings = "igraph") {
    # create igraph graph object
    gr <- graph_from_adjacency_matrix(adjmatrix, mode = "directed")

    # remove singletons
    comps <- components(gr)
    too_small <- which(table(comps$membership) < 2)
    keep <- V(gr)[!(comps$membership %in% too_small)]
    gr <- induced_subgraph(gr, keep)

    # set attributes for vertices
    V(gr)$vertexclass <- ifelse(adjmatrix[cbind(names(V(gr)), names(V(gr)))] > 0,
                                "Autoregulated TF",
                                ifelse(names(V(gr)) %in% chippedTFs,
                                       "ChIP'ed TF", "Other TF"))
    if (settings == "visNetwork") {
        V(gr)$shape <- "triangle"
        V(gr)$size <- 15
        V(gr)$label.cex = 1.0
    } else if (settings == "igraph") {
        V(gr)$shape <- "circle"
    } else {
        stop("Unknown 'settings' parameter")
    }
    
    # highlight edges for reciprocal interactions
    eL <- as_edgelist(gr)
    recipr <- paste0(eL[,1], "-", eL[,2]) %in% paste0(eL[,2], "-", eL[,1]) & eL[,1] != eL[,2]
    E(gr)$weight <- ifelse(recipr, 2, 1)
    E(gr)$edgeclass <- ifelse(recipr, "Reciprocal", "Other")
    E(gr)$color <- ifelse(recipr, gplots::col2hex(main_colors[3]), gplots::col2hex(na_color))

    # return object
    return(gr)
}

Read data

We start by reading all tables and objects needed for this figure. Count tables are in addition normalized, and we also calculate averages over replicates or all samples where needed:

# genome sequence
gnm <- readDNAStringSet(params$genomefasta)
names(gnm) <- sub(" .+$", "", names(gnm))

# `peakgr`: ChIP-seq peaks (as a GRanges object)
peakgr <- as(read.csv(params$peakcsv, row.names = 1), "GRanges")
peakgr$peaktype <- factor(peakgr$peaktype,
                          levels = c("common peaks (ubiquitous)",
                                     "common peaks (frequent)",
                                     "specific peaks"))
peakseq <- getSeq(gnm, peakgr)

# `peakenr`: peak IP-enrichments log2(IP/input)
peakenr <- as.matrix(read.csv(params$peakenr, row.names = 1))


# `chip_genome_tab`: data.frame with alignment densities in selected
#                    genomic regions for selected samples
chip_genome_tab <- read.csv(params$chiptrackfile)

# `genegr`: chromosomal ranges of genes (as GRanges object)
genegr <- rtracklayer::import(params$gtf, feature.type = "gene")

# `motifs`: data.frame with annotated motifs
motifs <- readRDS(params$motifrds)

# `txannot`: data.frame with transcript start site (TSS) and
#            transcript end site (TES) annotation
txannot <- read.csv(params$txcsv, row.names = 1)

# `dbd`: data.frame with information of transcription factors used as
#        IP-MS baits and their DNA binding domains
dbd <- read.delim(params$dbdtxt)

Facts and numbers - ChIP-seq

# Percent of TF-bound gene promoters
round(100 * mean(txannot$prom.number.overlapping.highconf.peaks > 0), 0)
## [1] 32
# Percent of TF-bound gene promoters, by gene biotype
txannot |>
    group_by(gene_biotype) |>
    summarise("Number of genes" = n(),
              "Percent promoters with TF enrichments" = round(
        100 * mean(prom.number.overlapping.highconf.peaks > 0), 0),
        .groups = "drop") |>
    knitr::kable()
gene_biotype Number of genes Percent promoters with TF enrichments
RNase_MRP_RNA 1 100
RNase_P_RNA 1 100
SRP_RNA 1 100
ncRNA 1534 36
protein_coding 5146 27
pseudogene 29 28
rRNA 88 78
snRNA 9 100
snoRNA 81 73
tRNA 379 65
# TF enrichments at promoters of TF genes
txannot |>
    filter(gene_id %in% dbd$PomBaseID) |>
    summarise("Number of TF genes" = n(),
              "Number of TF promoters with at least one enriched TF" =
                  sum(prom.number.distinct.enriched.TFs > 0),
              "Number of TF promoters with multiple enriched TFs" =
                  sum(prom.number.distinct.enriched.TFs > 1),
              "Number of TF promoters without detected TF enrichment" =
                  sum(prom.number.distinct.enriched.TFs == 0),
              "Number of TFs enriched at any promoter" = length(unique(unlist(strsplit(prom.enriched.TFs, ";")))),
              "Number of TFs enriched at any promoter (w/o zas1\U207A promoter)" = length(unique(unlist(strsplit(prom.enriched.TFs[gene_name != "zas1"], ";")))),
              "Number of TFs enriched at zas1\U207A promoter" = length(unique(unlist(strsplit(prom.enriched.TFs[gene_name == "zas1"], ";")))),
              "Number of TFs enriched at their own promoter" = sum(binds.own.promoter),
              "Number of TFs enriched at atf1\U207A promoter" = length(unique(unlist(strsplit(prom.enriched.TFs[gene_name == "atf1"], ";")))),
              "Number of TF promoters with Atf1 enrichment" = length(grep("Atf1", prom.enriched.TFs)),
              .groups = "drop") |>
    pivot_longer(cols = starts_with("Number"),
                 values_to = "Number", names_to = "Set") |>
    knitr::kable()
Set Number
Number of TF genes 89
Number of TF promoters with at least one enriched TF 43
Number of TF promoters with multiple enriched TFs 26
Number of TF promoters without detected TF enrichment 46
Number of TFs enriched at any promoter 59
Number of TFs enriched at any promoter (w/o zas1⁺ promoter) 48
Number of TFs enriched at zas1⁺ promoter 48
Number of TFs enriched at their own promoter 11
Number of TFs enriched at atf1⁺ promoter 11
Number of TF promoters with Atf1 enrichment 11
# Total number of identified DNA-binding motifs
nrow(motifs)
## [1] 67
# Number of unique motifs (based on manual classification)
sum(motifs$unique_version_of_motif)
## [1] 45
# Number of TFs for which at least one motif was identified
length(unique(motifs$tf_name))
## [1] 38
# Number (percent) of common frequent peaks with at least on Atf1.m1 DNA-binding motif (CRE motif)
hits <- universalmotif::scan_sequences(motifs = motifs$motif[motifs$name == "Atf1.m1"],
                                       sequences = peakseq[peakgr$peaktype == "common peaks (frequent)"], 
                                       threshold = 1e-4,
                                       threshold.type = "pvalue",
                                       RC = TRUE,
                                       verbose = 0,
                                       nthreads = 1,
                                       return.granges = TRUE)
length(unique(hits$sequence.i))
## [1] 33
round(100 * length(unique(hits$sequence.i)) / sum(peakgr$peaktype == "common peaks (frequent)"))
## [1] 35

Export peak ChIP data for use in TFexplorer

Starting from the peaks (peakgr) and ChIP enrichments in peaks (peakenr), prepare and export the matrix in json format for use in TFexplorer.

# average replicate ChIP enrichments
grp <- sub("_rep[12]$", "", colnames(peakenr))
peakenrExport <- do.call(cbind,
                         lapply(structure(unique(grp), names = unique(grp)),
                                function(nm) {
                                    rowMeans(peakenr[, grp == nm])
                                }))

# replace all enrichment values below 0 with 0
peakenrExport[peakenrExport < 0] <- 0

# cluster
dist_mat2 <- dist(peakenrExport, method = "euclidian")
hclust_avg2 <- hclust(dist_mat2, method = "average")
peakenrExportClustered <- peakenrExport[hclust_avg2$order, ]

# calculate extend peak coordinates and IGV position
IGVpos <- paste0(seqnames(peakgr), ":",
                 start(peakgr) - 100, "-",
                 end(peakgr) + 100)
IGVposClustered <- IGVpos[hclust_avg2$order]

# create peak position strings
peakPos <- paste0(seqnames(peakgr), ":", start(peakgr), "-", end(peakgr))
peakPosClustered <- peakPos[hclust_avg2$order]

# export peaks and enrichments as json
# ... calculate maximum enrichment value in each row
ChipMax <- apply(peakenrExportClustered, 1, max)
range(ChipMax)
## [1] 0.000000 6.719137
# ... convert to JSON
heatmapDataChip <- toJSON(
    list(
        heatmapMatrix = peakenrExportClustered,
        TFs = colnames(peakenrExportClustered),
        peaks = rownames(peakenrExportClustered),
        peakPos = peakPosClustered,
        IGVpos = IGVposClustered,
        ChipMax = ChipMax
    ), pretty = TRUE)

# ... write into a file for TFexplorer
write(heatmapDataChip, "data/heatmapDataChIP.json")

Calculate peak distances to nearest TSS

Using bound peaks for each TF (without “common peaks (ubiquitous)”), we want to measure the distance of peak mid-points to the nearest TSS, using negative distances to indicate TF binding upstream of the TSS.

tss <- GRanges(seqnames = txannot$seqnames, ranges = IRanges(
    start = txannot$TSS.coordinate, width = 1, names = txannot$tx_id),
    strand = txannot$strand)
peakSel <- peakgr[peakgr$peaktype != "common peaks (ubiquitous)"]
is_enr <- as.matrix(mcols(peakSel)[, grep("^is_enr_in[.]", colnames(mcols(peakSel)))])
colnames(is_enr) <- sub("^is_enr_in.", "", colnames(is_enr))
inclTFs <- colnames(is_enr) != "Untagged" & colSums(is_enr) > 0
peakSelAll <- peakSel[unlist(apply(is_enr[, inclTFs], 2, which))]
d <- distanceToNearest(x = resize(peakSelAll, width = 1, fix = "center"), subject = tss)
isUpstream <- ifelse(strand(tss)[subjectHits(d)] == "+", -1, 1) * sign(start(resize(peakSelAll, width = 1, fix = "center"))[queryHits(d)] - start(tss)[subjectHits(d)]) > 0 
signedDistToTSS <- mcols(d)$distance * (2 * (!isUpstream) - 1)

ggplot(data.frame(d = signedDistToTSS), aes(d)) +
    geom_density() +
    labs(x = "Position of peak mid relative to nearest TSS (bp)",
         y = "Density of peaks",
         caption = "Dashed line: TSS, dotted lines: promoter boundaries") +
    geom_vline(xintercept = 0, linetype = "dashed") +
    geom_vline(xintercept = c(-500, 500), linetype = "dotted", color = main_colors[3]) +
    theme_cowplot()

Calculate peak distances to tRNA or rRNA genes

We want to identify ChIP-seq peaks that are near a tRNA or rRNA gene. We can obtain the coordinates of these genes from genegr and the coordinates of peaks from peakgr, and calculate the distance between nearest peak-gene pairs using distanceToNearest(). Any pair with a distance below maxdist will be classified as “near” in the plots below.

Remark: Nearest distances can be NA in cases where for example a peak resides on a sequence (chromosome) that does not contain any tRNA or rRNA gene, or vice versa, such as the sequence AB325691 (which contains gap-filling sequence between SPBPB21E7.09 and SPBPB10D8.01 in chromosome 2) or the mitochondrial sequence MT.

# distance less than `maxdist` are defined as "near"
maxdist <- 100

# tRNA
# ... PomBase and Ensembl_Fungi annotate 196 and 183 tRNAs, respectively
#     we combine the two and obtain 198 annotated tRNAs
table(genegr$gene_biotype == "tRNA", genegr$source)
##        
##         PomBase Ensembl_Fungi
##   FALSE    6818            71
##   TRUE      196           183
is_tRNA_pombase <- genegr$source == "PomBase" & genegr$gene_biotype == "tRNA"
is_tRNA_ensembl <- genegr$source == "Ensembl_Fungi" & genegr$gene_biotype == "tRNA"
is_tRNA <- is_tRNA_pombase | (is_tRNA_ensembl & !genegr %in% genegr[is_tRNA_pombase])
sum(is_tRNA)
## [1] 198
# ... now we measure distances between nearest pairs
#     either from peak to nearest tRNA
dist.peak2tRNA <- calc_distance_to_nearest(from = peakgr, to = genegr[is_tRNA])
#     or from tRNA to nearest peak
dist.tRNA2peak <- calc_distance_to_nearest(from = genegr[is_tRNA], to = peakgr)

# rRNA
# ... all 36 5S_rRNA genes in our annotation stem from Ensembl_Fungi
table(genegr$gene_name == "5S_rRNA", genegr$source)
##        
##         PomBase Ensembl_Fungi
##   FALSE    4524           218
##   TRUE        0            36
is_rRNA <- !is.na(genegr$gene_name) & genegr$gene_name == "5S_rRNA"
sum(is_rRNA)
## [1] 36
# ... now we measure distances between nearest pairs
#     either from peak to nearest rRNA
dist.peak2rRNA <- calc_distance_to_nearest(from = peakgr, to = genegr[is_rRNA])
#     or from rRNA to nearest peak
dist.rRNA2peak <- calc_distance_to_nearest(from = genegr[is_rRNA], to = peakgr)

Calculate motif similarities

We calculate the similarities between any pair of motifs using the compare_motifs() function with method = "PCC" (Pearson correlation coefficient). We allow for reverse-complements (tryRC = TRUE) and require a minimal overlap between the two motifs of four nucleotides (min.overlap = 4).

motifsim <- compare_motifs(motifs = motifs$motif,
                           method = "PCC", tryRC = TRUE,
                           min.overlap = 4, min.mean.ic = 0.25,
                           normalise.scores = TRUE)

Figure 3

Fraction of motif genomic hits bound

For each motif, we visualize the fraction of sequence hits (sites in the genome that score highly for the motif) which are bound by the corresponding transcription factor (overlap with an enriched peak of that transcription factor). Fractions are shown separately for each peak class.

pd <- motifs |>
    select(name, total_hits, starts_with("bound_hits")) |>
    mutate(frac_bound_hits_any = (bound_hits_common.peaks.ubiquitous +
               bound_hits_common.peaks.frequent + bound_hits_specific.peaks) / total_hits,
           `Common (ubiquitous)` = bound_hits_common.peaks.ubiquitous / total_hits,
           `Common (frequent)` = bound_hits_common.peaks.frequent / total_hits,
           `Specific` = bound_hits_specific.peaks / total_hits) |>
    arrange(desc(frac_bound_hits_any)) |>
    mutate(name = factor(name, levels = name)) |>
    pivot_longer(cols = c("Common (ubiquitous)", "Common (frequent)",
                          "Specific"), names_to = "peak_set", values_to = "fraction_bound") |>
    mutate(peak_set = factor(peak_set, levels = c("Common (ubiquitous)", "Common (frequent)",
                          "Specific")))

(p_fracbound <- ggplot(pd, aes(name, fraction_bound, fill = peak_set)) +
        geom_col() +
        scale_y_continuous(expand = expansion(mult = 0),
                           labels = scales::label_percent()) +
        scale_fill_manual(values = peakset_colors) +
        labs(x = element_blank(),
             y = "Percent of motif sites",
             fill = element_blank()) +
        theme_cowplot(12) +
        theme(legend.position = "inside",
              legend.position.inside = c(0.95, 0.75),
              legend.justification.inside = c(1, 1),
              axis.text.x = element_text(angle = 90, hjust = 1, size = 8)) +
        guides(fill = guide_legend(ncol = 1)))

Genome browser tracks of ChIP experiments for selected TFs

We want to illustrate ChIP and Input read densities for selected TFs around their gene locus.

gnmtracks1 <- plottracks(df = chip_genome_tab,
                         gns = c("atf1", "pcr1", "cdc10", "thi5", "reb1", "toe3"),
                         cols = c(atf1 = main_colors[3], pcr1 = main_colors[2],
                                  cdc10 = main_colors[1], thi5 = "#66A61E",
                                  reb1 = main_colors[5], toe3 = main_colors[4]))
gnmtracks1

gnmtracks2 <- plottracks(df = chip_genome_tab,
                         gns = c("atf1", "pcr1", "cdc10", "thi5", "reb1", "toe3",
                                 "fkh2", "zas1", "SPCC320.03", "SPCC777.02", "prz1"),
                         cols = c(atf1 = main_colors[3], pcr1 = main_colors[2],
                                  cdc10 = main_colors[1], thi5 = "#66A61E",
                                  reb1 = main_colors[5], toe3 = main_colors[4],
                                  fkh2 = main_colors[3], zas1 = main_colors[2],
                                  prz1 = main_colors[5], `SPCC320.03` = "#66A61E",
                                  `SPCC777.02` = main_colors[1]),
                         font_size = 9)
gnmtracks2

Motif similarity heatmap

The following creates a heatmap of pairwise motif similarities (motifsim), hierarchically clustered and grouped into n_split clusters. Specific clusters are are highlighted on the right and motifs in these clusters are labeled.

# define heatmap colors
cols <- hcl.colors(64, "viridis")

# use squared pairwise motif similarities without diagonal entries (self)
motifsim2 <- motifsim^2
diag(motifsim2) <- NA

# value range for color scale
rng <- range(motifsim2, na.rm = TRUE)

# hierarchical clustering of motifs (cut tree to produce `n_split` clusters)
motifcl <- hclust(dist(motifsim2, method = "euclidean"), method = "complete")
n_split <- 17
motifcl_num <- cutree(motifcl, k = n_split)
motifcl <- dendextend::rotate(as.dendrogram(motifcl),
                              c(motifcl_num[motifcl_num != 3],
                                motifcl_num[motifcl_num == 3]))

# prepare heatmap annotation data
#   `selMotifCol`: colors for highlighted motif clusters
#   `selMotifIndex`: row index of motifs in highlighted clusters
#   `nMotifs`: number of motifs identified per transcription factor
selMotifCol <- c("0" = bg_color, "3" = "black", "13" = "black", "16" = "black")
selMotifIndex <- which(motifcl_num[rownames(motifsim2)] %in% names(selMotifCol))
nMotifs <- unclass(table(sub("[.]m[0-9]+$", "", rownames(motifsim2))))[
    sub("[.]m[0-9]+$", "", rownames(motifsim))
]

# prepare heatmap annotations
#   `motifAnnot` for motifs (columns), top
#   `selMotifAnnot` for motif (rows), right side
motifAnnot <- HeatmapAnnotation(
    "Number of\nmotifs per TF" = nMotifs,
    col = list(
        "Number of\nmotifs per TF" = circlize::colorRamp2(
            colors = colorRampPalette(c("white", main_colors[4]))(max(nMotifs)),
            breaks = seq.int(max(nMotifs)))
    ),
    show_legend = TRUE, show_annotation_name = TRUE,
    annotation_name_side = "right",
    annotation_name_gp = gpar(fontsize = fl),
    annotation_legend_param = list(at = seq.int(max(nMotifs)),
                                   labels_gp = gpar(fontsize = fl),
                                   border = "gray10",
                                   legend_direction = "horizontal",
                                   legend_width = unit(18, "mm"),
                                   title_position = "lefttop",
                                   title_gp = gpar(fontsize = fl)))
selMotifAnnot <- rowAnnotation(
    highlight = ifelse(motifcl_num %in% names(selMotifCol),
                       as.character(motifcl_num), "0"),
    motifnames = anno_mark(which = "row", side = "right",
                           at = selMotifIndex, 
                           labels = rownames(motifsim2)[selMotifIndex],
                           labels_gp = gpar(fontsize = fl)),
    col = list(highlight = selMotifCol),
    show_legend = FALSE, show_annotation_name = FALSE)

# create main heatmap with annotations
hm4 <- Heatmap(matrix = motifsim2, name = "Motif\nsimilarity",
               col = circlize::colorRamp2(breaks = seq(rng[1], rng[2],
                                                       length.out = 64),
                                          colors = cols),
               cluster_rows = motifcl,
               show_row_dend = TRUE, row_dend_width = unit(10, "mm"),
               cluster_columns = motifcl, show_column_dend = FALSE,
               row_split = n_split, column_split = n_split,
               column_title = " ",
               row_gap = unit(0.2, "mm"), column_gap = unit(0.2, "mm"),
               show_row_names = FALSE,
               column_title_gp = gpar(fontsize = fl),
               row_title = " ",
               show_column_names = FALSE,
               top_annotation = motifAnnot,
               right_annotation = selMotifAnnot,
               heatmap_legend_param = list(
                   at = round(c(rng[1], mean(rng), rng[2]), 2),
                   labels_gp = gpar(fontsize = fl),
                   border = "gray10",
                   legend_direction = "horizontal",
                   legend_width = unit(18, "mm"),
                   title_position = "lefttop",
                   title_gp = gpar(fontsize = fl)),
               width = unit(w_peaksHm, "mm"), # heatmap body width
               height = unit(w_peaksHm, "mm")) # heatmap body height

# draw the heatmap and grab as a grid graphics `grob`
# (needed for combining figure panels)
# use defined random number seed to make clustering deterministic
set.seed(42L)
hm_motifs_small <- grid.grabExpr(
    hm4 <- draw(hm4, merge_legend = TRUE,
                heatmap_legend_side = "bottom",
                annotation_legend_side = "bottom"),
    width = w_peaksHm, height = w_peaksHm
)

plot_grid(hm_motifs_small)

We create the same heatmap in long format, allowing to label all rows:

hm4_long <- Heatmap(matrix = motifsim2, name = "Motif\nsimilarity",
               col = circlize::colorRamp2(breaks = seq(rng[1], rng[2], length.out = 64),
                                          colors = cols),
               cluster_rows = motifcl,
               show_row_dend = TRUE, row_dend_width = unit(10, "mm"),
               cluster_columns = motifcl, show_column_dend = FALSE,
               row_split = n_split, column_split = n_split,
               column_title = " ",
               row_gap = unit(0.2, "mm"), column_gap = unit(0.2, "mm"),
               show_row_names = TRUE, row_names_side = "right",
               column_title_gp = gpar(fontsize = fl),
               row_title = " ",
               row_names_gp = gpar(fontsize = fs),
               show_column_names = FALSE,
               heatmap_legend_param = list(
                   at = round(c(rng[1], mean(rng), rng[2]), 2),
                   labels_gp = gpar(fontsize = fl),
                   border = "gray10",
                   legend_direction = "horizontal",
                   legend_width = unit(18, "mm"),
                   title_position = "lefttop",
                   title_gp = gpar(fontsize = fl)),
               width = unit(w_peaksHm * 0.5, "mm"), # heatmap body width
               height = unit(h_peaksHm * 1.15, "mm")) # heatmap body height

set.seed(42L)
hm_motifs_long <- grid.grabExpr(
    hm4_long <- draw(hm4_long, merge_legend = FALSE,
                     heatmap_legend_side = "bottom",
                     annotation_legend_side = "bottom"),
    width = w_peaksHm * 0.5, height = h_peaksHm * 1.15
)

plot_grid(hm_motifs_long)

Here is the complete list of motifs for each of the motif clusters, in the order as they appear in the heatmap (from top to bottom):

# get the row-order of the clustered heatmap
ro <- row_order(hm4)

# print the motif names in each cluster
motifs_in_cluster_list <- lapply(seq.int(n_split), function(i) {
    motifs_in_cluster <- rownames(motifsim2)[ro[[i]]]
    cat("Motif cluster", i, ":",
        paste(motifs_in_cluster, collapse = ", "), "\n")
    motifs_in_cluster
})
## Motif cluster 1 : Adn2.m2, Adn3.m1 
## Motif cluster 2 : Ace2.m2, Reb1.m1, Fhl1.m2, Teb1.m1 
## Motif cluster 3 : Sak1.m1, Ste11.m4 
## Motif cluster 4 : Php3.m1, Fhl1.m1, Fkh2.m2, Pcr1.m3, SPAC3F10.12c.m1, Mca1.m1, Mca1.m3, Deb1.m1, Fep1.m1, Hsf1.m1, Hsf1.m3 
## Motif cluster 5 : Atf1.m1, Pcr1.m1 
## Motif cluster 6 : Cdc10.m3, Res1.m3 
## Motif cluster 7 : Ace2.m1, Sep1.m2 
## Motif cluster 8 : Esc1.m1, Reb1.m5 
## Motif cluster 9 : SPBC16G5.17.m2, Toe3.m1, Toe1.m1, SPAC3H8.08c.m2 
## Motif cluster 10 : Zip1.m1, Zip1.m5, Zip1.m4, SPBC56F2.05c.m4 
## Motif cluster 11 : SPAC11D3.17.m1, SPAC11D3.17.m3, Loz1.m1, Prt1.m3, Adn2.m4, Toe2.m1, Toe3.m4 
## Motif cluster 12 : Prr1.m1, Prr1.m2 
## Motif cluster 13 : Pho7.m2, Prz1.m1, Adn3.m2, Adn3.m3, Rsv2.m1 
## Motif cluster 14 : Ste11.m2, Sep1.m1, Fkh2.m1, Sak1.m2, Sak1.m3 
## Motif cluster 15 : Zas1.m4, Ams2.m2, Zas1.m2 
## Motif cluster 16 : Zas1.m3, Ams2.m3, Ams2.m4, Zas1.m1, Ams2.m1 
## Motif cluster 17 : Res1.m2, Res2.m2, Cdc10.m1, Res1.m1, Res2.m1

Motif alignments

The following creates aligned sequence logos for the motifs in the highlighted clusters from the motif similarity heatmap.

# `pL`: list of plots
pL <- list()

# for each highlighted motif cluster...
for (cl in setdiff(names(selMotifCol), c("0", "3"))) {
    # exctract the motif matrices
    selmotifs <- motifs$motif[match(rownames(motifsim2)[motifcl_num == cl], motifs$name)]
    # align and visualize as sequence logos
    pL[[cl]] <- universalmotif::view_motifs(
        motifs = selmotifs,
        method = "PCC", tryRC = TRUE,
        min.overlap = 4,
        min.mean.ic = 0.25,
        normalise.scores = TRUE,
        show.positions = FALSE,
        names.pos = "right",
        text.size = 10)
}

# for each motif row index, get the corresponding cluster
indexToClustername <- structure(rep(seq.int(n_split),
                                    lengths(row_order(hm4))),
                                names = unlist(row_order(hm4)))

# `pL2`: list of plots similar to `pL` but with added empty plots for white space 
pL2 <- rep(list(NULL), 2 * length(pL) + 1)
midx <- seq(2, length(pL2), by = 2)
pL2[midx] <- pL
relh_motifs <- c(0.4, rep(0.8, length(pL2) - 1))
relh_motifs[midx] <- lengths(
    split(selMotifIndex, motifcl_num[selMotifIndex])[
        setdiff(names(selMotifCol), c("0", "3"))
    ])

# combine the individual plots
motifs_aligned <- plot_grid(plotlist = pL2, align = "none", ncol = 1,
                            hjust = -0.1, vjust = 1.1,
                            label_size = 10,
                            rel_heights = relh_motifs)

print(motifs_aligned)

TF peak overlaps from TFs in motif clusters

Create “upset” plots showing the overlap of transcription factor ChIP-seq peaks among the factors in a highlighted motif cluster.

# select specific and common-frequent peaks only
levels(peakgr$peaktype) <- c("Common (ubiquitous)", # use shorter names
                             "Common (frequent)",
                             "Specific peaks")
peakgrSel <- peakgr[peakgr$peaktype %in% c("Specific peaks", "Common (frequent)")]

# `pL3`: list of individual upset plots
pL3 <- list()
# for each highlighted motif cluster...
for (cl in setdiff(names(selMotifCol), c("0", "3"))) {
    motifnms <- rownames(motifsim2)[motifcl_num == cl]
    cltfs <- unique(sub("[.]m[0-9]+$", "", motifnms))
    ovL <- as.list(mcols(peakgrSel)[, paste0("is_enr_in.", cltfs)])
    ovL <- lapply(ovL, function(i) names(peakgrSel)[i])
    names(ovL) <- sub("^is_enr_in.", "", names(ovL))
    nm <- unique(indexToClustername[as.character(which(motifcl_num == cl))])

    pd <- stack(ovL) |>
        left_join(y = data.frame(values = names(peakgr),
                                 type = peakgr$peaktype),
                  join_by(values)) |>
        group_by(values) |>
        summarise(ind = list(ind),
                  type = unique(type))
    pL3[[paste0("cluster ", nm)]] <-
        ggplot(pd, aes(x = ind, fill = type)) +
        geom_bar() +
        scale_x_upset() +
        scale_fill_manual(values = peakset_colors) +
        labs(x = element_blank(),
             y = "Number of\npeaks",
             fill = element_blank()) +
        theme_cowplot(fm) +
        theme(axis.ticks.x = element_blank(),
              legend.position = ifelse(cl == "16", "inside", "none"),
              legend.position.inside = c(0.95, 0.95),
              legend.justification.inside = c(1, 1)) +
        theme_combmatrix(combmatrix.label.text = element_text(colour = "black",
                                                              size = fm),
                         combmatrix.panel.point.size = 2.0,
                         combmatrix.panel.line.size = 0.8,
                         combmatrix.label.extra_spacing = 5,
                         combmatrix.label.make_space = TRUE,
                         combmatrix.label.width = unit(12, "mm"),
                         plot.margin = unit(c(7, 7, 1, 7), "points")) # top, right, bottom, left

}

# `pL4`: list of plots similar to `pL3` but with added empty plots for white space 
pL4 <- rep(list(NULL), 2 * length(pL) + 1)
pL4[midx] <- pL3
relh_upsets <- relh_motifs
relh_upsets[midx] <- relh_upsets[midx] + relh_upsets[midx + 1] - 0.01
relh_upsets[midx + 1] <- 0.01

# combine the individual plots
upsets_aligned <- plot_grid(plotlist = pL4, align = "hv", axis = "l", ncol = 1,
                            rel_heights = relh_upsets)
print(upsets_aligned)

TF Autoregulation

We want to know which transcription factors are binding their own promoter regions (1kb windows centered on transcription start sites) and thus are possibly regulating their own transcription.

Heatmap

For visualization, we classify known transcription factors (rows) into three groups (the ones that do or do not bind their own promoter, and the ones that were not ChIP’ed in this study) and indicate in a binary heatmap the ChIP-seq experiments (columns) that showed enrichment at the promoter.

# prepare data
# ... transcription factor genes in ChIP-seq experiments (columns)
tfnms <- colnames(mcols(peakgr))[grep("^is_enr_in[.]", colnames(mcols(peakgr)))]
tfnms <- sub("^is_enr_in[.]", "", tfnms)
tfnms <- ifelse(grepl("^SP", tfnms), tfnms, .lowercase(tfnms))

# ... transcription factors in `txannot` (rows)
tfnms_all <- txannot$unique_einprot_id[match(dbd$PomBaseID, txannot$gene_id)]
tfnms_all <- tfnms_all[order(match(tfnms_all, tfnms))] # order as in `tfnms`
tx_sel <- ifelse(tfnms_all %in% txannot$gene_id,
                 match(tfnms_all, txannot$gene_id),
                 match(tfnms_all, txannot$gene_name))
stopifnot(all(!duplicated(txannot[tx_sel, "gene_id"]))) # exactly one transcript per gene

# ... transcription factors that were ChIP'ed in this study
tfnms_chip <- tfnms_all[txannot[tx_sel, "ChIP.this.study"]]

# ... create promoter binding matrix
mprom <- do.call(rbind, lapply(tx_sel, function(i) {
    tfnms_i <- strsplit(x = txannot[i, "prom.enriched.TFs"], split = ";")[[1]]
    as.numeric(tfnms %in% ifelse(grepl("^SP", tfnms_i), tfnms_i, .lowercase(tfnms_i)))
}))
dimnames(mprom) <- list(tfnms_all, tfnms)

stopifnot(all.equal(sum(txannot$binds.own.promoter),
                    sum(diag(mprom) > 0)))

diag(mprom[tfnms_chip, tfnms_chip]) <- ifelse(diag(mprom[tfnms_chip, tfnms_chip]) > 0,
                                              2, diag(mprom[tfnms_chip, tfnms_chip]))

# generate binary promoter binding heatmap
# ... label autoregulatory TFs
selLabels <- tfnms_chip[diag(mprom[tfnms_chip, tfnms_chip]) > 0]
stopifnot(all(selLabels %in% rownames(mprom)))
selLabelsPlot <- selLabels
selLabelsPlot[match("zas1", selLabels)] <- "zas1*"
rowLabels <- rowAnnotation(
    TFnames = anno_mark(which = "row", side = "right",
                        at = match(selLabels, rownames(mprom)), 
                        labels = selLabelsPlot,
                        labels_gp = gpar(fontsize = fl)))

# ... group TFs by promoter type
promtype <- factor(ifelse(txannot[tx_sel, "ChIP.this.study"],
                          ifelse(rowSums(mprom) > 0,
                                 "Bound\nTF promoters", "Non-bound\nTF promoters"),
                          "Not\nChIP'ed"),
                   levels = c("Bound\nTF promoters", "Non-bound\nTF promoters",
                              "Not\nChIP'ed"))

# ... order rows according to `promtype` and auto-regulation
is_auto <- function(nm, x = mprom) {
    d <- diag(x[match(nm, rownames(x)),
                match(nm, colnames(x))])
    !is.na(d) & d > 0
}
o <- order(c(3, 2, 1)[as.numeric(promtype)] * 100 +
               is_auto(rownames(mprom), mprom) * 20 +
               rowSums(mprom), decreasing = TRUE)
oc <- match(intersect(rownames(mprom)[o], colnames(mprom)), colnames(mprom))

# ... create main heatmap with annotations
hm5 <- Heatmap(mprom[o, oc], name = "hm5",
               col = circlize::colorRamp2(
                   breaks = seq(0, 2, length.out = 64),
                   colors = colorRampPalette(c(binary_heatmap_colors["FALSE"],
                                               main_colors[1],
                                               binary_heatmap_colors["TRUE"]))(64)),
               border = TRUE, border_gp = gpar(lwd = 0.5),
               cluster_rows = FALSE, show_row_dend = FALSE,
               cluster_columns = FALSE, show_column_dend = FALSE,
               row_split = promtype[o], row_title_gp = gpar(fontsize = fl),
               column_title = "TF ChIP-seq experiments",
               column_title_gp = gpar(fontsize = fl),
               show_row_names = FALSE, show_column_names = FALSE,
               right_annotation = rowLabels[o],
               use_raster = TRUE, show_heatmap_legend = FALSE,
               width = unit(w_peaksHm, "mm"), # heatmap body width
               height = unit(w_peaksHm, "mm"))

hm_prom <- grid.grabExpr({
        hm5 <- draw(hm5, merge_legend = TRUE,
                    heatmap_legend_side = "bottom",
                    annotation_legend_side = "bottom")
        decorate_heatmap_body("hm5", {
            grid.lines(c(mean(grepl("^Bound", promtype[match(tfnms, tfnms_all)])), 0),
                       c(0, 1), gp = gpar(lty = 2, lwd = 0.5))
        }, slice = 1)
        decorate_heatmap_body("hm5", {
            grid.lines(c(1, mean(grepl("^Bound", promtype[match(tfnms, tfnms_all)]))),
                       c(0, 1), gp = gpar(lty = 2, lwd = 0.5))
        }, slice = 2)
    },
    width = w_peaksHm, height = w_peaksHm
)
plot_grid(hm_prom)

Heatmap (not showing the non-bound TF promoters)

We can also create the same heatmap, but without showing TF promoters (rows) from the group “Non-bound TF promoters”.

# ... remove "Non-bound"
keep <- promtype[o] != "Non-bound\nTF promoters"
o2 <- o[keep]

# ... create main heatmap with annotations
hm6 <- Heatmap(mprom[o2, oc], name = "hm6",
               col = circlize::colorRamp2(
                   breaks = seq(0, 2, length.out = 64),
                   colors = colorRampPalette(c(binary_heatmap_colors["FALSE"],
                                               main_colors[1],
                                               binary_heatmap_colors["TRUE"]))(64)),
               border = TRUE, border_gp = gpar(lwd = 0.5),
               cluster_rows = FALSE, show_row_dend = FALSE,
               cluster_columns = FALSE, show_column_dend = FALSE,
               row_split = droplevels(promtype[o2]), row_title_gp = gpar(fontsize = fl),
               column_title = "TF ChIP-seq experiments",
               column_title_gp = gpar(fontsize = fl),
               show_row_names = FALSE, show_column_names = FALSE,
               right_annotation = rowLabels[o2],
               use_raster = TRUE, show_heatmap_legend = FALSE,
               width = unit(w_peaksHm, "mm"), # heatmap body width
               height = unit(w_peaksHm * mean(keep), "mm"))

# manually create a legend
hm_legend <- Legend(labels = c("TFs binding to another TF promoter",
                               "TFs binding to their own promoter"),
                    title = " ",
                    legend_gp = gpar(fill = c(main_colors[1],
                                              binary_heatmap_colors["TRUE"])))

hm_prom_bound <- grid.grabExpr({
        hm6 <- draw(hm6,
                    annotation_legend_side = "bottom",
                    annotation_legend_list = packLegend(hm_legend))
        decorate_heatmap_body("hm6", {
            grid.lines(c(mean(grepl("^Bound", promtype[match(tfnms, tfnms_all)])), 0),
                       c(0, 1), gp = gpar(lty = 2, lwd = 0.5))
        }, slice = 1)
    },
    width = w_peaksHm, height = w_peaksHm * mean(keep)
)
plot_grid(hm_prom_bound)

Network

We construct a network in which the nodes represent TFs (the ones that were ChIP’ed in this study are indicated by color), and the edges represent detected binding events. An edge starting at a TF A and ending at a TF B means that A targets the promoter of B.

We exclude the untagged experiment, TFs without targets and for the overall network also the TFs zas1 and SPCC320.03. zas1 has 48 bound TFs in its promoter (likely an artifact), and SPCC320.03 has 35, and if these were included, the network would be hard to visualize clearly.

## Create graph
## ... select TFs
to_remove <- c("zas1", "SPCC320.03", "untagged")
sel_tfs <- setdiff(unique(rownames(mprom), colnames(mprom)), to_remove)
cat("Number of TFs in network:", length(sel_tfs), "\n")
## Number of TFs in network: 87
## ... create adjacency matrix (row: from TF, column: to TF promoter)
adjm <- matrix(0, nrow = length(sel_tfs), ncol = length(sel_tfs),
               dimnames = list(sel_tfs, sel_tfs))
adjm[setdiff(colnames(mprom), to_remove),
     setdiff(rownames(mprom), to_remove)] <- t(mprom[setdiff(rownames(mprom), to_remove),
                                                     setdiff(colnames(mprom), to_remove)] > 0)
gr <- createNetwork(adjm)

## Visualize
set.seed(123)
node_padding <- margin(.2, .2, .2, .2, "mm")
ggr <- ggraph(as_tbl_graph(gr), 
              layout = "igraph", algorithm = "fr") + 
    geom_edge_link(aes(start_cap = label_rect(node1.name, padding = node_padding),
                       end_cap = label_rect(node2.name, padding = node_padding),
                       color = edgeclass),
                   arrow = arrow(length = unit(2, 'mm'))) +
    geom_edge_loop(aes(start_cap = label_rect(node1.name, padding = node_padding),
                       end_cap = label_rect(node2.name, padding = node_padding)),
                   arrow = arrow(length = unit(2, 'mm'))) +  # Handle self-loops
    scale_edge_color_manual(values = c(Reciprocal = main_colors[3], 
                                       Other = gplots::col2hex(na_color)),
                            name = "") + 
    geom_node_point(size = 4, aes(fill = vertexclass, shape = vertexclass), 
                    color = "black", stroke = 0.1) + 
    scale_shape_manual(values = c("Other TF" = 21, "ChIP'ed TF" = 21,
                                  "Autoregulated TF" = 22), name = "") +
    scale_fill_manual(values = c("Other TF" = gplots::col2hex(bg_color),
                                 "ChIP'ed TF" = gplots::col2hex(main_colors[1]),
                                 "Autoregulated TF" = gplots::col2hex(binary_heatmap_colors["TRUE"])),
                      name = "") + 
    geom_node_text(aes(label = name), size = 3.5, repel = TRUE, vjust = 1.8) + 
    theme_graph(base_family = "sans", plot_margin = margin(30, 10, 40, 10)) + 
    guides(fill = guide_legend(override.aes = list(size = 4, alpha = 1))) + 
    coord_cartesian(clip = "off") 
print(ggr)

Network around Atf1

For clearer visualization, we create a sub-network including only the connections to and from Atf1 (but now also including SPCC320.03):

## Create graph
## ... select TFs
to_remove <- c("zas1", "untagged")
sel_tfs <- setdiff(unique(rownames(mprom), colnames(mprom)), to_remove)

## ... create adjacency matrix (row: from TF, column: to TF promoter)
adjm <- matrix(0, nrow = length(sel_tfs), ncol = length(sel_tfs),
               dimnames = list(sel_tfs, sel_tfs))
adjm[setdiff(colnames(mprom), to_remove),
     setdiff(rownames(mprom), to_remove)] <- t(mprom[setdiff(rownames(mprom), to_remove),
                                                     setdiff(colnames(mprom), to_remove)] > 0)
## ... identify TFs connected to atf1
to_keep <- rownames(adjm)[adjm["atf1", ] > 0 | adjm[, "atf1"] > 0]
cat("Number of TFs in network:", length(to_keep), "\n")
## Number of TFs in network: 19
## ... create adjacency matrix (row: from TF, column: to TF promoter)
adjm2 <- adjm[to_keep, to_keep]
gr2 <- createNetwork(adjm2)

## Visualize
if (file.exists(params$coordsatf1)) {
    # create graph with manually optimized coordinates
    coordsAtf1 <- read.csv(params$coordsatf1)
    coordsAtf1$y <- -coordsAtf1$y

    ggr2 <- ggraph(
        as_tbl_graph(gr2), 
        layout = as.matrix(coordsAtf1[match(names(V(gr2)), coordsAtf1$id), c("x", "y")]))
} else {
    # create graph using automated layout
    set.seed(234)
    ggr2 <- ggraph(as_tbl_graph(gr2), 
                   layout = "igraph", algorithm = "fr")

}

node_padding <- margin(.2, .2, .2, .2, "mm")
ggr2 <- ggr2 +
    geom_edge_link(aes(start_cap = label_rect(node1.name, padding = node_padding),
                       end_cap = label_rect(node2.name, padding = node_padding),
                       color = edgeclass),
                   arrow = arrow(length = unit(2, 'mm'))) +
    geom_edge_loop(aes(start_cap = label_rect(node1.name, padding = node_padding),
                       end_cap = label_rect(node2.name, padding = node_padding)),
                   arrow = arrow(length = unit(2, 'mm'))) +  # Handle self-loops
    scale_edge_color_manual(values = c(Reciprocal = main_colors[3], 
                                       Other = gplots::col2hex(na_color)),
                            name = "") + 
    geom_node_point(size = 4, aes(fill = vertexclass, shape = vertexclass), 
                    color = "black", stroke = 0.1) + 
    scale_shape_manual(values = c("Other TF" = 21, "ChIP'ed TF" = 21,
                                  "Autoregulated TF" = 22), name = "") +
    scale_fill_manual(values = c("Other TF" = gplots::col2hex(bg_color),
                                 "ChIP'ed TF" = gplots::col2hex(main_colors[1]),
                                 "Autoregulated TF" = gplots::col2hex(binary_heatmap_colors["TRUE"])),
                      name = "") + 
    geom_node_text(aes(label = name), size = 3.5, repel = TRUE, vjust = 1.8) + 
    theme_graph(base_family = "sans", plot_margin = margin(30, 10, 40, 10)) +
    theme(legend.position = "bottom") +
    guides(fill = guide_legend(override.aes = list(size = 4, alpha = 1))) + 
    coord_cartesian(clip = "off")

print(ggr2)

Below here is code that can be run manually and was used to manually optimize the network display and export a csv file with node coordinates (data/visnetwork_coords_atf1.csv):

Peak types and presence of nearby ncRNAs

The following barplots show the numbers of ChIP-seq peaks that are near a tRNA or 5S rRNA gene (top panel), or the numbers of tRNA/5S rRNA genes that are distal or near a ChIP-seq peak (bottom panel).

# prepare `data.frame`s with plotting data
peaktype <- factor(gsub("[()]", "", sub(" ", "\n", peakgr$peaktype)),
                   levels = c("Common\nubiquitous",
                              "Common\nfrequent",
                              "Specific\npeaks"))
sel_peaksEnr <- rowSums(as.matrix(mcols(peakgr)[, grep("^is_enr_in[.]", colnames(mcols(peakgr)))])) > 0
near_ncRNA <- factor(ifelse(!is.na(dist.peak2tRNA) & dist.peak2tRNA < maxdist,
                            "tRNA",
                            ifelse(!is.na(dist.peak2rRNA) & dist.peak2rRNA < maxdist,
                                   "5S rRNA", "none")),
                         levels = c("tRNA", "5S rRNA", "none"))
df_peaks <- data.frame(peaktype = peaktype[sel_peaksEnr],
                       near_ncRNA = near_ncRNA[sel_peaksEnr])
df_ncRNA <- data.frame(ncRNAtype = factor(
    rep(c("tRNA", "5S rRNA"), c(sum(is_tRNA), sum(is_rRNA))),
    levels = c("tRNA", "5S rRNA")),
                       near_peak = !is.na(c(dist.tRNA2peak, dist.rRNA2peak)) &
                           c(dist.tRNA2peak, dist.rRNA2peak) < maxdist)
df_ncRNA$group <- factor(paste0(df_ncRNA$ncRNAtype, " ", 
                                c("TRUE" = "near", "FALSE" = "distal")[as.character(df_ncRNA$near_peak)]),
                         levels = c("tRNA distal","tRNA near",
                                    "5S rRNA distal", "5S rRNA near"))

# generate plots
cols_ncRNA <- c("tRNA" = main_colors[5], "5S rRNA" = main_colors[3],
                "none" = bg_color)
# ... numbers of peaks near genes
p_ncRNA1 <- ggplot(df_peaks, aes(peaktype, fill = near_ncRNA)) +
    geom_bar() +
    scale_fill_manual(values = cols_ncRNA) +
    labs(x = element_blank(), y = "Number of peaks", fill = "Near ncRNA:") +
    theme_cowplot(8) +
    theme(legend.position = "inside",
          legend.position.inside = c(0.05, 0.95),
          legend.justification = c(0, 1))

# ... numbers of genes near peaks
p_ncRNA2 <- ggplot(df_ncRNA, aes(ncRNAtype, fill = group)) +
    geom_bar() +
    scale_fill_manual(values = c(`tRNA near` = cols_ncRNA[["tRNA"]],
                                 `tRNA distal` = lighten(cols_ncRNA["tRNA"], 0.2),
                                 `5S rRNA near` = cols_ncRNA[["5S rRNA"]],
                                 `5S rRNA distal` = lighten(cols_ncRNA["5S rRNA"], 0.2))) +
    labs(x = element_blank(), y = "Number of genes", fill = "Relative\npeak location:") +
    theme_cowplot(8) +
    theme(legend.position = "inside",
          legend.position.inside = c(0.95, 0.95),
          legend.justification = c(1, 1))

# ... combine the two panels
(p_ncRNA <- plot_grid(p_ncRNA1, p_ncRNA2, align = "v", nrow = 2))

Put together

Assemble the panels into Figure 3:

cowplot::plot_grid(
    cowplot::plot_grid(
        cowplot::plot_grid(
            hm_prom_bound,
            cowplot::plot_grid(gnmtracks1, scale = 0.9),
            nrow = 2,
            labels = c("A", "C"),
            rel_heights = c(0.75, 1)
        ),
        ggr2,
        nrow = 1,
        labels = c("", "B")
    ),
    cowplot::plot_grid(
        hm_motifs_small,
        cowplot::plot_grid(
            motifs_aligned, upsets_aligned,
            nrow = 1, rel_widths = c(1.25, 1)),
        nrow = 1,
        labels = c("D", "E")
    ),
    nrow = 2,
    labels = c("", ""),
    rel_heights = c(1.75 * (w_peaksHm + 10), w_peaksHm + 20)
)

Supplementary figure

Assemble the panels into Supplementary Figure 3:

cowplot::plot_grid(
    cowplot::plot_grid(NULL, NULL, # space for labels
                       nrow = 1, labels = c("A", "B"),
                       rel_widths = c(1, 1)),
    cowplot::plot_grid(
        cowplot::plot_grid(gnmtracks2, NULL, ncol = 1, rel_heights = c(20, 1)),
        hm_motifs_long,
        nrow = 1
    ),
    p_fracbound,
    nrow = 3,
    labels = c("", "", "C"),
    rel_heights = c(0.3, 9.7, 5.0)
)

Session info

Session info
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: FlexiBLAS OPENBLAS;  LAPACK version 3.10.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Zurich
## tzcode source: system (glibc)
## 
## attached base packages:
##  [1] parallel  stats4    grid      stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] lubridate_1.9.3             stringr_1.5.1              
##  [3] purrr_1.0.2                 readr_2.1.5                
##  [5] tidyverse_2.0.0             shiny_1.8.1.1              
##  [7] visNetwork_2.1.2            ggraph_2.2.1               
##  [9] igraph_2.0.3                forcats_1.0.0              
## [11] kableExtra_1.4.0            dendextend_1.17.1          
## [13] jsonlite_1.8.8              SummarizedExperiment_1.32.0
## [15] Biobase_2.62.0              MatrixGenerics_1.14.0      
## [17] matrixStats_1.3.0           viridisLite_0.4.2          
## [19] dplyr_1.1.4                 ggupset_0.3.0              
## [21] scattermore_1.2             colorspace_2.1-0           
## [23] universalmotif_1.20.0       tidygraph_1.3.1            
## [25] tidyr_1.3.1                 tibble_3.2.1               
## [27] BSgenome_1.70.2             rtracklayer_1.62.0         
## [29] BiocIO_1.12.0               GenomicRanges_1.54.1       
## [31] Biostrings_2.70.3           GenomeInfoDb_1.38.8        
## [33] XVector_0.42.0              IRanges_2.36.0             
## [35] S4Vectors_0.40.2            BiocGenerics_0.48.1        
## [37] cowplot_1.1.3               ggplot2_3.5.0              
## [39] ComplexHeatmap_2.18.0       circlize_0.4.16            
## 
## loaded via a namespace (and not attached):
##  [1] RColorBrewer_1.1-3       rstudioapi_0.16.0        shape_1.4.6.1           
##  [4] magrittr_2.0.3           magick_2.8.3             farver_2.1.1            
##  [7] rmarkdown_2.26           GlobalOptions_0.1.2      zlibbioc_1.48.2         
## [10] vctrs_0.6.5              memoise_2.0.1            Cairo_1.6-2             
## [13] Rsamtools_2.18.0         RCurl_1.98-1.14          htmltools_0.5.8.1       
## [16] S4Arrays_1.2.1           SparseArray_1.2.4        sass_0.4.9              
## [19] KernSmooth_2.23-22       bslib_0.7.0              htmlwidgets_1.6.4       
## [22] cachem_1.0.8             GenomicAlignments_1.38.2 mime_0.12               
## [25] lifecycle_1.0.4          iterators_1.0.14         pkgconfig_2.0.3         
## [28] Matrix_1.6-5             R6_2.5.1                 fastmap_1.1.1           
## [31] GenomeInfoDbData_1.2.11  clue_0.3-65              digest_0.6.35           
## [34] labeling_0.4.3           fansi_1.0.6              timechange_0.3.0        
## [37] polyclip_1.10-6          abind_1.4-5              compiler_4.3.2          
## [40] withr_3.0.0              doParallel_1.0.17        BiocParallel_1.36.0     
## [43] viridis_0.6.5            highr_0.10               gplots_3.1.3.1          
## [46] ggforce_0.4.2            MASS_7.3-60              DelayedArray_0.28.0     
## [49] rjson_0.2.21             caTools_1.18.2           gtools_3.9.5            
## [52] tools_4.3.2              httpuv_1.6.15            glue_1.7.0              
## [55] restfulr_0.0.15          promises_1.3.0           cluster_2.1.4           
## [58] generics_0.1.3           gtable_0.3.5             tzdb_0.4.0              
## [61] hms_1.1.3                xml2_1.3.6               utf8_1.2.4              
## [64] ggrepel_0.9.5            foreach_1.5.2            pillar_1.9.0            
## [67] later_1.3.2              tweenr_2.0.3             lattice_0.22-6          
## [70] tidyselect_1.2.1         knitr_1.46               gridExtra_2.3           
## [73] svglite_2.1.3            xfun_0.43                graphlayouts_1.1.1      
## [76] stringi_1.8.3            yaml_2.3.8               evaluate_0.23           
## [79] codetools_0.2-19         cli_3.6.2                xtable_1.8-4            
## [82] systemfonts_1.0.6        munsell_0.5.1            jquerylib_0.1.4         
## [85] Rcpp_1.0.12              png_0.1-8                XML_3.99-0.16.1         
## [88] bitops_1.0-7             scales_1.3.0             crayon_1.5.2            
## [91] GetoptLong_1.0.5         rlang_1.1.3