Workaround for Plotting Dendrograms
I think I have a solution to get decent enough dendrograms without fussing with base graphics.
The overview of my workaround is to cluster with pvclust
, extract $hclust
, plot it as a dendrogram, coerce into a ggplot. This makes it easy enough to replicate the functionality of the colored_bars()
function by making additional plots. The function below makes a few plots in addition to the dendrogram. If you end up working with base graphics anyway, dendextend
is still worth a look.
Here’s an example:
# needed
library(pvclust)
library(tidyverse)
library(dendextend) # for color_labels
library(ggnewscale) # to accommodate two fill scales https://github.com/eliocamp/ggnewscale
# recommended
# install.packages("palmerpenguins")
library(palmerpenguins)
library(patchwork)
library(scales) # for overiding scientific notation on dendrogram y axis
# Make a demo dataset
<- select(palmerpenguins::penguins,
tux
species,
bill_length_mm, bill_depth_mm,
flipper_length_mm, body_mass_g)
<- tux[complete.cases(tux), ]
tux
set.seed(54646)
<- tux[sample(1:nrow(tux), 30), ] # for faster demo clustering
tux
# Example use
<-
o mk_hclust_plts(
df = mutate(tux, uid = paste(seq(1, nrow(tux)), species, sep = "-")),
cluster_by = c("bill_length_mm", "bill_depth_mm",
"flipper_length_mm", "body_mass_g"),
uid_col = "uid",
n_clusters = 3,
true_groups = "species",
true_colors = RColorBrewer::brewer.pal(3, "Set2"),
cluster_colors = RColorBrewer::brewer.pal(3, "Set1")
)
# Patchwork to arrange the output plots
$dendrogram_both+
(oscale_y_continuous(limits = c(-.0001, 0.00022), labels = scales::comma)
/
)$group_compare_tile+
(o# lims(y = c(2, -2))+ # y axis can be flipped like so
theme(legend.position = "")
/
) $heatmap_raw + theme(legend.position = "right")) /
(o$heatmap_z + theme(legend.position = "right")) +
(o::plot_layout(heights = c(5, .3, 1.25, 1.25))
patchwork
# example 2
# o <-
# mk_hclust_plts(
# df = mutate(iris, uid = paste(seq(1, nrow(iris)), Species, sep = "-")),
# cluster_by = c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"),
# uid_col = "uid",
# n_clusters = 3,
# true_groups = "Species",
# true_colors = RColorBrewer::brewer.pal(3, "Set2"),
# cluster_colors = RColorBrewer::brewer.pal(3, "Set1")
# )
ps, it’s worth checking your code on sample datasets (penguins
,iris
, mpg
, etc. ). That’ll help iron out weird behavior sooner rather than later.
(2021-2-21) Edit: The original function here depended on the factor levels of the clusters and true groups to make the color’s consistent between plots (e.g. factors ordered abcdABCD not aAbBcCdD). This breaks down with some cases (e.g. if you start group names with a number (e.g. 0hours)). The below edit uses ggnewscale
to fix this.
<- function(
mk_hclust_plts df = unite(M_winxiqr, uid, Experiment, Cell, sep = "-"),
cluster_by = c("vrest", "r11", "r1", "Ihtk.0", "Ihtk.Slope", "Ia.0", "Ia.Slope"),
uid_col = "uid",
n_clusters = 3,
true_groups = "Condition",
true_colors = RColorBrewer::brewer.pal(3, "Set2"),
cluster_colors = RColorBrewer::brewer.pal(3, "Set1")
){<- as.data.frame(df)
df
if (!exists("cluster_colors")){
= rainbow(n_clusters)
cluster_colors
}## prep
# move uid to rowname
row.names(df) <- df[[uid_col]]
<- select(df, all_of(true_groups))
df_groups <- df[, cluster_by]
df
## Cluster
<- pvclust(t(df),
cluster method.hclust = "ward.D2",
method.dist = "correlation",
use.cor = "pairwise.complete.obs")
## make dendrogram ####
<- cluster$hclust %>%
dend as.dendrogram()
# iteratively coloring the labels is a workaround to get the "true" groups shown
<- rownames_to_column(df_groups, var = "rownames")[, ]
dend_labs <- full_join(data.frame(rownames = labels(dend)), dend_labs)
dend_labs for(i in seq_along(unique(df_groups[[true_groups]]))){
<- unique(df_groups[[true_groups]])[i]
true_group <- true_colors[i]
true_color
<- dend %>%
dend ::color_labels(
dendextendcol = true_color,
labels = dend_labs[dend_labs[[true_groups]] == true_group, "rownames"])
}
<- dend %>%
dend set("branches_k_color",
k = n_clusters,
value = cluster_colors
%>%
) set("branches_lwd", 0.7) %>%
set("labels_cex", 0.6)
<- dend %>%
dend_cluster_only set("labels_colors",
k = n_clusters,
value = cluster_colors) %>%
as.ggdend()
<- dend %>%
dend as.ggdend()
<- ggplot(dend_cluster_only)+
plt_dend_cluster_only theme(axis.ticks.y = element_line(),
axis.text.y = element_text(),
axis.line.y = element_line())
<- ggplot(dend)+
plt_dend theme(axis.ticks.y = element_line(),
axis.text.y = element_text(),
axis.line.y = element_line())
## Add reality ribbon with or without clustering result ####
<- full_join(
groups_to_plt as.data.frame(dend$labels),
rownames_to_column(var = "label", df_groups))
<- groups_to_plt %>%
plt_grouping ggplot(aes_string(x="x", y="0", fill = true_groups))+
geom_tile()+
scale_fill_manual(values = true_colors)+
theme_void()+
labs(x = "", y = "")+
theme(legend.position = "left")
<- ggplot()+
plt_grouping_contrast geom_tile(data = groups_to_plt, aes_string(x="x", y="0.5", fill = true_groups))+
scale_fill_manual(values = true_colors)+
::new_scale("fill") +
ggnewscalegeom_tile(data = data.frame(x = seq_along(dend_cluster_only$labels$col),
cluster_groups = as.character(as.numeric(as.factor(dend_cluster_only$labels$col)))
),aes_string(x="x", y= "-0.5", fill = "cluster_groups"),
+
)scale_fill_manual(values = cluster_colors)+
theme_void()+
labs(x = "", y = "")+
theme(legend.position = "left")
## Add heatmap ####
<- full_join(
data_to_plt as.data.frame(dend$labels),
rownames_to_column(var = "label", df))
<-
data_to_plt %>%
data_to_plt gather("key", "value",
names(data_to_plt)[
!(names(data_to_plt) %in% c("x", "y",
"label", "col", "cex",
true_groups))
])
<- data_to_plt %>%
plt_heatmap_raw ggplot(aes(x,
y = key,
fill = value))+
geom_tile()+
scale_fill_viridis_c()+
labs(x = "", y = "")+
theme(panel.background = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
legend.position = "left")
<- data_to_plt %>%
plt_heatmap_z group_by(key) %>%
mutate(mean = mean(value, na.rm = T),
sd = sd(value, na.rm = T)) %>%
mutate(value = ((value - mean)/sd)) %>% # Now Z scores
ggplot(aes(x,
y = key,
fill = value))+
geom_tile()+
scale_fill_viridis_c()+
labs(x = "", y = "")+
theme(panel.background = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
legend.position = "left")
## Return plots, manually tweak layout ####
return(
list(
pvclust_out = cluster,
dendrogram_clusters = plt_dend_cluster_only,
dendrogram_both = plt_dend,
group_tile = plt_grouping,
group_compare_tile = plt_grouping_contrast,
heatmap_raw = plt_heatmap_raw,
heatmap_z = plt_heatmap_z
)
) }