params
## $genomefasta
## [1] "reference/Schizosaccharomyces_pombe.ASM294v2.dna.toplevel.fa"
##
## $motifrds
## [1] "data/motifs_annotated.rds"
##
## $peakcsv
## [1] "data/fused_peaks_filtered.csv.gz"
##
## $ncpu
## [1] 24
suppressPackageStartupMessages({
library(circlize)
library(ComplexHeatmap)
library(grid)
library(ggplot2)
library(cowplot)
library(dplyr)
library(tidyr)
library(universalmotif)
library(TFBSTools)
library(GenomicRanges)
library(parallel)
library(colorspace)
library(forcats)
library(monaLisa)
library(ggrepel)
library(Biostrings)
library(ggupset)
})
source("params/plot_settings.R")
# capitalize the first letter of a string
.capitalize <- function(x) {
paste0(toupper(substr(x, 1, 1)), substr(x, 2, nchar(x)))
}
We start by reading all tables and objects needed for this figure. Factor levels are defined in the order they should appear in the figure panels, and peak sequences are extracted from the genome.
Remark: Motifs are read from the data/motifs_annotated.rds file, which is in .rds format that can be used by R. The motifs are also available in the file data/motifs.meme, which is a plain text file with the motifs in the MEME format: https://meme-suite.org/meme/doc/meme-format.html.
# `motifs`: table with annotated motif
motifs <- readRDS(params$motifrds)
motifs$altname <- motifs$name
# ... extract motifs as TFBSTools::PFMatrixList
umL <- universalmotif::to_list(motifs)
pfmL <- do.call(TFBSTools::PFMatrixList,
lapply(seq.int(nrow(motifs)), function(i) {
universalmotif::convert_motifs(motifs = umL[[i]],
class = "TFBSTools-PFMatrix")
}))
# `gnm`: S. pombe reference genome sequence
gnm <- readDNAStringSet(params$genomefasta)
names(gnm) <- sub(" .*$", "", names(gnm))
# `peakgr`: ChIP-seq peaks as GRanges object
peakgr <- as(read.csv(params$peakcsv, row.names = 1), "GRanges")
peakgr$peaktype <- factor(.capitalize(peakgr$peaktype),
levels = c("Common peaks (ubiquitous)",
"Common peaks (frequent)",
"Specific peaks"))
peakgr_specific <- peakgr[peakgr$peaktype == "Specific peaks"]
# ... extract peak enrichment matrix
is_enr <- as.matrix(mcols(peakgr)[, grep("^is_enr_in[.]", colnames(mcols(peakgr)))])
colnames(is_enr) <- sub("^is_enr_in.", "", colnames(is_enr))
is_enr_specific <- as.matrix(mcols(peakgr_specific)[, grep("^is_enr_in[.]", colnames(mcols(peakgr_specific)))])
colnames(is_enr_specific) <- sub("^is_enr_in.", "", colnames(is_enr_specific))
# ... extract peak sequences from genome
peakseqs <- getSeq(gnm, peakgr)
peakseqs_specific <- getSeq(gnm, peakgr_specific)
Here we define some helper functions that are used to generate this figure:
calcKmerEnr: Counts the frequencies of k-mer words of length k in a set of sequences (seqs) that are grouped into foreground/background sets by fg, and calculates enrichment values for each k-mer word comparing the foreground frequencies to either the background sequences (bg_type = "other") or to a Markov model of order Markov_order that is estimated from the foreground sequences.kmerEnrPlot: Calls calcKmerEnr to obtain k-mer enrichments and visualizes them in a scatter plot (each point corresponding to a k-mer), with x- and y-axis coordinates corresponding to the log k-mer frequency in foreground sequences and the log k-mer enrichment, respectively.create_cobound_peaks_plot: Upset plot showing co-bound peaks for a set of TFs.calcKmerEnr <- function(k = 5, # k-mer length
seqs, # DNAStringSet
fg, # logical vector of foreground in `seqs`
bg_type = c("other", "Markov"),
Markov_order = 1,
include_revcomp = TRUE,
... # forwarded to monaLisa::getKmerFreq
) {
# digest arguments
bg_type <- match.arg(bg_type)
stopifnot(exprs = {
is.numeric(k) && length(k) == 1L
is(seqs, "DNAStringSet")
is.logical(fg) && length(fg) == length(seqs)
is.numeric(Markov_order) && length(Markov_order) == 1L
})
# fg
fg_kmers <- monaLisa::getKmerFreq(seqs[fg], kmerLen = k,
MMorder = Markov_order,
includeRevComp = include_revcomp,
...)
fg_freq <- fg_kmers$freq.obs
kmers <- names(fg_freq)
# bg
if (identical(bg_type, "other")) {
bg_kmers <- monaLisa::getKmerFreq(seqs[!fg], kmerLen = k,
MMorder = Markov_order,
includeRevComp = include_revcomp,
...)
bg_freq <- bg_kmers$freq.obs
} else if (bg_type == "Markov") {
bg_freq <- fg_kmers$freq.exp
}
# norm
N <- min(sum(fg_freq), sum(bg_freq))
fg_freq_norm <- fg_freq / sum(fg_freq) * N
bg_freq_norm <- bg_freq / sum(bg_freq) * N
# return results
return(list(kmers = names(fg_freq),
fg_kmers = fg_kmers,
fg_freq = fg_freq,
fg_freq_norm = fg_freq_norm,
bg_kmers = bg_kmers,
bg_freq = bg_freq,
bg_freq_norm = bg_freq_norm,
pearsonresid = (fg_freq_norm - bg_freq_norm) /
sqrt(bg_freq_norm),
nCG = vcountPattern("C", kmers) + vcountPattern("G", kmers)))
}
kmerEnrPlot <- function(k = 5, # k-mer length
seqs, # DNAStringSet
fg, # logical vector of foreground in `seqs`
bg_type = c("other", "Markov"),
Markov_order = 1,
colorByMotif = NULL,
nlabel = 0,
title = NULL,
auto_subtitle = FALSE,
jitter_x = FALSE,
include_revcomp = TRUE,
font_size = 14,
... # forwarded to monaLisa::getKmerFreq
) {
# digest arguments
stopifnot(exprs = {
is.null(colorByMotif) || is(colorByMotif, "PFMatrixList")
is.numeric(nlabel) && length(nlabel) == 1L
is.null(title) || (is.character(title) && length(title) == 1L)
})
# calculate k-mer enrichments
enr <- calcKmerEnr(k = k,
seqs = seqs,
fg = fg,
bg_type = bg_type,
Markov_order = Markov_order,
include_revcomp = include_revcomp)
# enrichment plot
jitter_amount <- if (jitter_x) 0.5 else 0
pd <- data.frame(kmer = enr$kmers,
fg_freq = enr$fg_freq,
bg_freq = enr$bg_freq,
fg_freq_norm = enr$fg_freq_norm,
bg_freq_norm = enr$bg_freq_norm,
pearsonresid = enr$pearsonresid,
nCG = enr$nCG,
jitter_x = runif(length(enr$kmers),
min = -jitter_amount, max = jitter_amount))
if (!is.null(colorByMotif)) {
pd$score <- monaLisa::motifKmerSimilarity(x = colorByMotif, kmerLen = k,
includeRevComp = include_revcomp)[1, ]
}
# ... base plot
p <- ggplot(pd, aes(fg_freq + 1 + jitter_x, pearsonresid)) +
scale_x_log10() +
labs(title = if (is.null(title)) element_blank() else title,
x = paste0(k, "-mer frequency in peaks + 1"),
y = paste0(k, "-mer enrichment")) +
theme_cowplot(font_size) +
theme(legend.position = "none")
# ... subtitle
if (auto_subtitle) {
if (identical(bg_type, "Markov")) {
p <- p + labs(subtitle = paste0(k, "-mer enrichments (background: ",
Markov_order,
"th-order Markov model)"))
} else if (identical(bg_type, "other")) {
p <- p + labs(subtitle = paste0(k, "-mer enrichments (background: other peaks)"))
}
}
# ... points
if (is.null(colorByMotif)) {
p <- p + geom_point(color = "gray62", alpha = 0.5, size = 1.5 * font_size / 14) +
geom_hline(yintercept = 0, linetype = "dashed")
} else {
p <- p +
geom_point(mapping = aes(color = score^0.4), alpha = 0.5) +
scale_colour_gradientn(colors = hcl.colors(11, "Oslo")[-1]) +
geom_hline(yintercept = 0, linetype = "dashed")
}
# ... point labels
if (nlabel > 0) {
p <- p + geom_text_repel(
# data = pd[order(pd$pearsonresid,
# decreasing = TRUE)[seq.int(nlabel)], ],
data = pd |>
arrange(desc(pearsonresid)) |>
mutate(label_i = seq_along(pearsonresid),
kmerrc = as.character(reverseComplement(DNAStringSet(kmer))),
labelrc_i = match(kmer, kmerrc),
label_str = ifelse(label_i <= nlabel & label_i <= labelrc_i, kmer, "")),
mapping = aes(label = label_str), min.segment.length = 0,
size = font_size / ggplot2::.pt, color = "black",
max.overlaps = Inf)
}
# return plot
return(p)
}
create_cobound_peaks_plot <- function(sel_tfs) {
stopifnot(all(sel_tfs %in% colnames(is_enr)))
ovL <- as.list(as.data.frame(is_enr[, sel_tfs]))
ovL <- lapply(ovL, function(i) rownames(is_enr)[i])
pd <- stack(ovL) |>
group_by(values) |>
summarise(ind = list(ind)) |>
mutate(peaktype = peakgr[values]$peaktype)
ggplot(pd, aes(x = ind)) +
geom_bar(aes(fill = peaktype)) +
scale_fill_manual(values = peakset_colors) +
scale_x_upset() +
labs(x = element_blank(), y = "Number of peaks", fill = "Peak type: ") +
theme_cowplot(12) +
theme(axis.ticks.x = element_blank(),
legend.position = "bottom") +
theme_combmatrix(combmatrix.label.extra_spacing = 5,
combmatrix.label.make_space = TRUE,
combmatrix.label.width = unit(18, "mm")) +
guides(fill = guide_legend(nrow = 3))
}
Frequency and enrichments of 6-mer sequence words in ChIP-seq peaks from transcription factors of the Zn(II)2Cys6 (GAL4) family:
# select motifs of TFs from the Zn(II)2Cys6 family
sel_motifs <- motifs[motifs$family == "Zn(II)2Cys6", "name"]
tf_names <- sub("[.]m[0-9]+$", "", sel_motifs)
# select a single TF and motif of interest ("Toe3.m1")
sel_motif_idx_1 <- 7
motif_name1 <- sel_motifs[sel_motif_idx_1]
# plot 6-mer enrichments in enriched peaks from which the motif was identified
ZnII2Cys6_kmer_1 <- kmerEnrPlot(k = 6,
seqs = peakseqs_specific,
fg = is_enr_specific[, tf_names[sel_motif_idx_1]],
bg_type = "other",
title = sel_motifs[sel_motif_idx_1],
nlabel = 9,
colorByMotif = pfmL[match(sel_motifs[sel_motif_idx_1], name(pfmL))],
font_size = 10)
print(ZnII2Cys6_kmer_1)
CCG, CGG enrichments (TF peaks from 11 selected motifs)# select Zn(II)2Cys6 TFs
tf_names <- c("Cha4", "Mca1", "Moc3", "Pho7", "Prt1",
"Grt1", "Gsf1", "Thi1", "Thi5", "Toe1",
"Toe2", "Toe3", "Toe4", "SPAC11D3.11c", "SPAC1327.01c",
"SPAC1F7.11c", "SPAC25B8.11", "SPAC2H10.01", "SPAC3C7.04",
"SPAC3H8.08c", "SPBC1348.12", "Ntu1", "SPBC16G5.17",
"SPBC1773.12", "SPBC1773.16c", "Ntu2", "SPBC56F2.05c",
"SPCC320.03", "SPCC417.09c", "SPCC757.04", "SPCC777.02",
"SPCC965.10")
length(tf_names)
## [1] 32
# filter out not ChIP'ed TFs or TFs with less than `min_peaks_specific` specific peaks
min_peaks_specific <- 3
n_peaks_specific <- colSums(
is_enr_specific[, intersect(colnames(is_enr_specific), tf_names)]
)[tf_names]
tf_names <- tf_names[!is.na(n_peaks_specific) & n_peaks_specific >= min_peaks_specific]
length(tf_names)
## [1] 20
# calculate 3-mer enrichments
enr_list_3mers <- list()
for (i in seq_along(tf_names)) {
enr_list_3mers[[i]] <- calcKmerEnr(k = 3,
seqs = peakseqs_specific,
fg = is_enr_specific[, tf_names[i]],
bg_type = "other")
}
enr_3mers <- do.call(rbind, lapply(enr_list_3mers, "[[", "pearsonresid"))
rownames(enr_3mers) <- tf_names
# plot heatmap
# ... define heatmap font sizes
fs <- 5.5 # small
fl <- 8 # large
ft <- 10 # title
# ... calculate data range for mapping to colors
mx <- max(abs(enr_3mers))
qs <- quantile(abs(enr_3mers), .99)
# ... prepare heatmap annotations:
# nCG: number of G+C bases in the 3-mer word
# highlight: emphasize CCG, CGG, GGC and GCC
kmer_annot <- columnAnnotation(
nCG = vcountPattern("S", DNAStringSet(colnames(enr_3mers)), fixed = FALSE),
highlight = ifelse(colnames(enr_3mers) %in% c("CCG", "CGG", "GGC", "GCC"), "1", "0"),
col = list(highlight = c("0" = bg_color, "1" = "black"),
nCG = structure(hcl.colors(5, "Greens")[-5], names = 3:0)),
show_legend = c(TRUE, FALSE), show_annotation_name = c(FALSE, FALSE),
annotation_name_gp = gpar(fontsize = fl),
annotation_legend_param = list(labels_gp = gpar(fontsize = fl),
ncol = 4,
title = "Number of G+C ",
title_position = "lefttop",
title_gp = gpar(fontsize = fl)),
simple_anno_size = unit(3, "mm"))
# ... generate main heatmap with annotations
hm_3mers <- Heatmap(enr_3mers, name = "Pearson's resid.",
col = circlize::colorRamp2(
breaks = c(-mx, seq(-qs, qs, length.out = 62), mx),
colors = colorRampPalette(enrichment_heatmap_colors)(64)),
cluster_rows = TRUE, show_row_dend = FALSE,
column_dend_reorder = match(colnames(enr_3mers),
c("GCG", "CCG", "GGC", "GGG"),
nomatch = 100),
cluster_columns = TRUE, column_dend_height = unit(10, "mm"),
row_names_gp = gpar(fontsize = fl, fontfamily = "sans"),
column_names_gp = gpar(fontsize = fs, fontfamily = "mono"),
row_title = "Binuclear zinc cluster TFs",
row_title_side = "left", column_title_gp = gpar(fontsize = ft),
border = TRUE, border_gp = gpar(lwd = 0.5),
show_row_names = TRUE, show_column_names = TRUE,
use_raster = FALSE, show_heatmap_legend = TRUE,
bottom_annotation = kmer_annot,
heatmap_legend_param = list(at = round(c(-mx, 0, mx), 1),
labels_gp = gpar(fontsize = fl),
border = "gray10",
legend_direction = "horizontal",
legend_width = unit(25, "mm"),
title = "3-mer enrichment ",
title_position = "lefttop",
title_gp = gpar(fontsize = fl)),
width = unit(130, "mm"), # heatmap body width
height = unit(3 * nrow(enr_3mers), "mm"))
# draw the heatmap and grab as a grid graphics `grob`
# (needed for combining figure panels)
# use defined random number seed to make clustering deterministic
set.seed(42L)
hm_ZnII2Cys6_sel_3mers <- grid.grabExpr(
hm_3mers <- draw(hm_3mers, merge_legend = TRUE,
heatmap_legend_side = "bottom",
annotation_legend_side = "right"),
width = 40, height = 140
)
plot_grid(hm_ZnII2Cys6_sel_3mers)
# get enrichment calls for specific peaks that are
# bound by at least one of the binuclear zinc cluster TFs used above
pd <- as.data.frame(is_enr_specific[, tf_names]) |>
tibble::rownames_to_column("peak_name") |>
pivot_longer(cols = all_of(tf_names),
values_to = "enriched",
names_to = "tf_name") |>
filter(enriched) |>
group_by(peak_name) |>
summarise(tfs_enr = list(tf_name))
# create upset plot
upset_ZnII2Cys6_sel <- ggplot(pd, aes(x = tfs_enr)) +
geom_bar() +
scale_x_upset() +
labs(x = element_blank(),
y = "Number of enriched peaks") +
theme_cowplot()
upset_ZnII2Cys6_sel
Display information for selected motifs (sel_motifs, containing examples for both previously known and novel motifs):
# select motifs for detailed visualization
sel_motifs <- c( "Toe2.m1", "Esc1.m1", "SPAC3F10.12c.m1",
"Ams2.m1", "Zip1.m1", "Adn2.m4")
tf_names <- sub("[.]m[0-9]+$", "", sel_motifs)
# create motif visualizations
# ... setup plotting parameters
fgbg_col <- c(main_colors[1], main_colors[5]) # colors for foreground and background
gnm_col <- bg_color # color for NA or "other"
w <- 2500 # width in bases around peak-center for meta-profiles
fs <- 9 # font size
nlabel <- 1 # number of k-mer words to label in enrichment plot
# ... iterate over motifs
motif_characterization_plots <- list()
for (i in seq_along(sel_motifs)) {
# get motif info
m1 <- motifs$motif[match(sel_motifs[i], motifs$name)]
is_fg_specific <- is_enr_specific[, tf_names[i]]
fg_peak_names <- names(peakgr)[is_enr[, tf_names[i]]]
bg_peak_names <- setdiff(names(peakgr), fg_peak_names)
fgbg_num <- c(length(fg_peak_names), length(bg_peak_names))
fgbg_names <- sprintf("%s (%d)", c("foreground", "background"), fgbg_num)
fgbg_names_short <- sprintf("%d", fgbg_num)
# scan genome sequence with motif
hits_genome <- universalmotif::scan_sequences(motifs = m1,
sequences = gnm,
threshold = 1e-4,
threshold.type = "pvalue",
RC = TRUE,
verbose = if (interactive()) 3 else 0,
nthreads = params$ncpu,
return.granges = TRUE)
# seqlogo
gg1_seqlogo <- universalmotif::view_motifs(
motifs = m1,
show.positions = FALSE) +
labs(title = sel_motifs[i],
subtitle = paste0(sum(is_fg_specific), " specific peaks")) +
theme_cowplot(fs) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line.x = element_blank(),
legend.position = "none")
# k-mer enrichments
gg1_kmers <- kmerEnrPlot(k = 6,
seqs = peakseqs_specific,
fg = is_enr_specific[, tf_names[i]],
bg_type = "other",
nlabel = nlabel,
colorByMotif = pfmL[match(sel_motifs[i], name(pfmL))],
font_size = fs) +
labs(x = "6-mer freq. + 1",
y = "6-mer enr.") +
theme(legend.position = "none")
# motif location relative to peak mid
nrst <- distanceToNearest(x = resize(hits_genome, width = 1L, fix = "center"),
subject = resize(peakgr, width = 1L, fix = "center"))
nrst_fg <- nrst[subjectHits(nrst) %in% match(fg_peak_names, names(peakgr))]
nrst_bg <- nrst[subjectHits(nrst) %in% match(bg_peak_names, names(peakgr))]
relpos_fg <- mid(ranges(hits_genome))[queryHits(nrst_fg)] - mid(ranges(peakgr))[subjectHits(nrst_fg)]
relpos_bg <- mid(ranges(hits_genome))[queryHits(nrst_bg)] - mid(ranges(peakgr))[subjectHits(nrst_bg)]
pd_loc <- data.frame(motif = rep(sel_motifs[i], length(nrst_fg) + length(nrst_bg)),
relpos = c(relpos_fg, relpos_bg),
type = factor(rep(c(fgbg_names[1], fgbg_names[2]),
c(length(nrst_fg), length(nrst_bg))),
levels = fgbg_names))
gg1_loc <- ggplot(pd_loc |> filter(abs(relpos) < w + 500),
aes(relpos / 1000, color = type)) +
geom_density(linewidth = 1) +
geom_vline(xintercept = 0, linetype = "dotted") +
scale_y_continuous(n.breaks = 3) +
scale_x_continuous(limits = c(-w, w) / 1000, expand = expansion(mult = 0)) +
scale_color_manual(values = structure(fgbg_col, names = fgbg_names)) +
annotate("text", x = w / 1000, y = Inf, hjust = 1, vjust = 1,
label = paste0(c("", "\n"), fgbg_names_short),
colour = fgbg_col, fontface = "bold", size = fs / ggplot2::.pt) +
labs(x = "Position relative to peak center (kb)",
y = "Density of motifs") +
theme_cowplot(fs) +
theme(legend.position = "none",
legend.justification = c(1, 1))
# motif hit enrichment in genomic regions
ov <- findOverlaps(query = hits_genome, subject = peakgr)
ov_type <- rep("non-peak", length(hits_genome))
ov_type[queryHits(ov)] <-
ifelse(subjectHits(ov) %in% match(fg_peak_names, names(peakgr)),
fgbg_names[1], fgbg_names[2])
ov_type <- factor(ov_type,
levels = c("non-peak", fgbg_names[2], fgbg_names[1]))
n_ov_obs <- unclass(table(hits_genome$motif, ov_type))
n_ov_exp <- unclass(table(hits_genome$motif)) %*%
t(structure(c(sum(seqlengths(hits_genome)) - sum(width(peakgr)),
sum(width(peakgr[bg_peak_names])),
sum(width(peakgr[fg_peak_names]))),
names = colnames(n_ov_obs)) / sum(seqlengths(hits_genome)))
pd_counts <- full_join(as.data.frame(n_ov_obs) |>
mutate(motif = "motif") |>
pivot_longer(cols = all_of(levels(ov_type)),
names_to = "type",
values_to = "observed"),
as.data.frame(n_ov_exp) |>
mutate(motif = "motif") |>
pivot_longer(cols = all_of(levels(ov_type)),
names_to = "type",
values_to = "expected"),
by = join_by(motif, type)) |>
dplyr::mutate(enr = observed / expected) |>
pivot_longer(cols = all_of(c("observed", "expected")),
names_to = "obsexp", values_to = "val") |>
dplyr::mutate(type = factor(type, levels = c("non-peak", fgbg_names[2:1]))) |>
group_by(motif, obsexp) |>
mutate(tot = sum(val),
percentage = val / tot) |>
ungroup()
pd_counts <- rbind(pd_counts |>
filter(obsexp == "observed") |>
dplyr::select(motif, type, enr, percentage),
pd_counts |>
filter(obsexp == "expected" & motif == pd_counts[[1,"motif"]]) |>
mutate(motif = "expected") |>
dplyr::select(motif, type, enr, percentage)
) |> mutate(motif = relevel(factor(motif), "expected"))
gg_ov_counts <- ggplot(pd_counts, aes(percentage, motif, fill = type)) +
geom_col(position = position_stack()) +
scale_x_continuous(labels = scales::label_percent(),
expand = expansion(mult = c(0, 0.15))) +
scale_fill_manual(values = structure(c(gnm_col, fgbg_col[2:1]),
names = levels(pd_counts$type))) +
geom_text(data = pd_counts |> filter(grepl("foreground", type)),
aes(x = 1, label = sprintf("%.1f%%", percentage * 100)),
colour = fgbg_col[1], hjust = -0.1, vjust = 0.5,
size = fs / ggplot2::.pt,
fontface = "bold") +
labs(x = "Percent of motif sites",
y = element_blank(),
fill = "Genomic region: ") +
theme_cowplot(fs) +
theme(legend.position = "none") +
guides(fill = guide_legend(nrow = 3))
pd_enr <- pd_counts |> filter(motif != "expected") |>
dplyr::select(c("motif","type","enr")) |> unique()
levels(pd_enr$type) <- c("non-peak", "backgr.", "foregr.")
gg_ov_enr <- ggplot(pd_enr, aes(enr, type, fill = type)) +
geom_col(position = position_dodge2()) +
scale_x_continuous(expand = expansion(mult = c(0, 0.15))) +
geom_vline(xintercept = 1, linetype = "dotted") +
scale_fill_manual(values = structure(c(gnm_col, fgbg_col[2:1]),
names = levels(pd_enr$type))) +
# facet_wrap(~ motif, nrow = 1) +
labs(x = "Motif site enrichment (obs/exp)",
y = element_blank(),
fill = "Genomic region: ") +
theme_cowplot(fs) +
theme(legend.position = "bottom") +
guides(fill = guide_legend(nrow = 3))
gg_ov_enr_legend <- cowplot::get_legend(gg_ov_enr)
gg_ov_enr <- gg_ov_enr + theme(legend.position = "none")
# put panels together
motif_characterization_plots[[i]] <- cowplot::plot_grid(
cowplot::plot_grid(gg1_seqlogo, NULL, ncol = 1, rel_heights = c(0.87, 0.13)),
gg1_kmers,
gg1_loc,
cowplot::plot_grid(gg_ov_counts,
gg_ov_enr,
align = "v", axis = "l",
nrow = 2,
rel_heights = c(1, 1.2)),
# align = "h", axis = "tb",
nrow = 1,
rel_widths = c(1, 1, 1, 1.4)
)
print(motif_characterization_plots[[i]])
}
# add legend
motif_characterization_plots[[length(motif_characterization_plots) + 1]] <-
cowplot::plot_grid(NULL, NULL, NULL, gg_ov_enr_legend,
nrow = 1, rel_widths = c(1, 1, 1, 1.4))
(p_cobound_1 <- create_cobound_peaks_plot(sel_tfs = c("Ams2", "Zas1", "Teb1")))
(p_cobound_2 <- create_cobound_peaks_plot(sel_tfs = c("Esc1", "SPAC3F10.12c")))
Assemble the panels into Figure 4:
cowplot::plot_grid(
cowplot::plot_grid(
cowplot::plot_grid(ZnII2Cys6_kmer_1, scale = 0.85),
hm_ZnII2Cys6_sel_3mers,
labels = c("A", "B"),
rel_widths = c(1.9, 3.3),
nrow = 1
),
cowplot::plot_grid(
plotlist = motif_characterization_plots,
ncol = 1,
rel_heights = c(rep(1, length(motif_characterization_plots) - 1), 0.4)),
nrow = 2,
labels = c("", "C"),
align = "vh",
axis = "b",
rel_heights = c(1.4, 3.2)
)
Assemble the panels into Supplementary Figure 4:
cowplot::plot_grid(
p_cobound_2,
nrow = 1,
labels = c("A")
)
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: FlexiBLAS OPENBLAS; LAPACK version 3.10.1
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Zurich
## tzcode source: system (glibc)
##
## attached base packages:
## [1] parallel stats4 grid stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggupset_0.3.0 Biostrings_2.70.3 XVector_0.42.0 ggrepel_0.9.5 monaLisa_1.8.1
## [6] forcats_1.0.0 colorspace_2.1-0 GenomicRanges_1.54.1 GenomeInfoDb_1.38.8 IRanges_2.36.0
## [11] S4Vectors_0.40.2 BiocGenerics_0.48.1 TFBSTools_1.40.0 universalmotif_1.20.0 tidyr_1.3.1
## [16] dplyr_1.1.4 cowplot_1.1.3 ggplot2_3.5.0 ComplexHeatmap_2.18.0 circlize_0.4.16
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 jsonlite_1.8.8 shape_1.4.6.1 magrittr_2.0.3
## [5] magick_2.8.3 farver_2.1.1 rmarkdown_2.26 GlobalOptions_0.1.2
## [9] BiocIO_1.12.0 zlibbioc_1.48.2 vctrs_0.6.5 Cairo_1.6-2
## [13] memoise_2.0.1 Rsamtools_2.18.0 RCurl_1.98-1.14 htmltools_0.5.8.1
## [17] S4Arrays_1.2.1 CNEr_1.38.0 SparseArray_1.2.4 sass_0.4.9
## [21] pracma_2.4.4 bslib_0.7.0 plyr_1.8.9 zoo_1.8-12
## [25] cachem_1.0.8 GenomicAlignments_1.38.2 lifecycle_1.0.4 iterators_1.0.14
## [29] pkgconfig_2.0.3 Matrix_1.6-5 R6_2.5.1 fastmap_1.1.1
## [33] GenomeInfoDbData_1.2.11 MatrixGenerics_1.14.0 clue_0.3-65 digest_0.6.35
## [37] TFMPvalue_0.0.9 AnnotationDbi_1.64.1 stabs_0.6-4 RSQLite_2.3.6
## [41] seqLogo_1.68.0 labeling_0.4.3 fansi_1.0.6 httr_1.4.7
## [45] abind_1.4-5 compiler_4.3.2 bit64_4.0.5 withr_3.0.0
## [49] doParallel_1.0.17 BiocParallel_1.36.0 DBI_1.2.2 highr_0.10
## [53] R.utils_2.12.3 MASS_7.3-60 poweRlaw_0.80.0 DelayedArray_0.28.0
## [57] rjson_0.2.21 gtools_3.9.5 caTools_1.18.2 tools_4.3.2
## [61] R.oo_1.26.0 glue_1.7.0 restfulr_0.0.15 cluster_2.1.4
## [65] reshape2_1.4.4 generics_0.1.3 gtable_0.3.5 BSgenome_1.70.2
## [69] tzdb_0.4.0 R.methodsS3_1.8.2 sm_2.2-6.0 hms_1.1.3
## [73] utf8_1.2.4 foreach_1.5.2 pillar_1.9.0 stringr_1.5.1
## [77] splines_4.3.2 lattice_0.22-6 survival_3.5-8 rtracklayer_1.62.0
## [81] bit_4.0.5 annotate_1.80.0 tidyselect_1.2.1 DirichletMultinomial_1.44.0
## [85] GO.db_3.18.0 vioplot_0.4.0 knitr_1.46 SummarizedExperiment_1.32.0
## [89] xfun_0.43 Biobase_2.62.0 matrixStats_1.3.0 stringi_1.8.3
## [93] yaml_2.3.8 evaluate_0.23 codetools_0.2-19 tibble_3.2.1
## [97] cli_3.6.2 xtable_1.8-4 munsell_0.5.1 jquerylib_0.1.4
## [101] Rcpp_1.0.12 png_0.1-8 XML_3.99-0.16.1 readr_2.1.5
## [105] blob_1.2.4 bitops_1.0-7 glmnet_4.1-8 scales_1.3.0
## [109] purrr_1.0.2 crayon_1.5.2 GetoptLong_1.0.5 rlang_1.1.3
## [113] KEGGREST_1.42.0