I am trying to create subdendrograms using dendextend, this works until I change the names of the labels of the leaves in the dendrogram. I want the labels to be 'Y' or 'B' based on a column in my dataframe. If I do this it causes several of the subdendrograms to be NULL.
Here is some reproducible code
library(dplyr)
library(dendextend)
new_labels <- c("Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "B", "Y", "Y", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y",
"Y", "B", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y")
new_colors <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 8,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1)
# define dendrogram object to play with:
dend <- iris[, -5] %>%
dist() %>%
hclust() %>%
as.dendrogram() %>%
# change the color of the labels
set('labels_colors', new_colors[order.dendrogram(.)]) %>%
# change the labels to 'Y' or 'B'
set('labels', new_labels[order.dendrogram(.)]) %>%
color_branches(k = 5)
dend_list <- get_subdendrograms(dend, 5)
# Plotting the result
par(mfrow = c(2, 3))
plot(dend, main = "Original dendrogram")
sapply(dend_list, plot)
If I run this without the line
set('labels', new_labels[order.dendrogram(.)]) %>%
then it works fine, in that I get a plot with the main dendrogram and 5 subdendrograms, and the labels are colored (black or grey).
However, if I run with this line
set('labels', new_labels[order.dendrogram(.)]) %>%
then I get NULL types in the dend_list, and only 1 of 5 subdendrograms is added to the plot. The one that is in the plot is formatted correctly with 'Y' and 'B' labels.
I have tried lots of things but I'm not sure how to fix this. Does anybody know why this might be happening?
I now have the answer (with some help from Bing Chat)
The solution was to change the labels of the main dendrogram, and of the sub dendrograms after the sub dendrograms were extracted. Updated code:
library(dplyr)
library(dendextend)
new_labels <- c("Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "B", "Y", "Y", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y",
"Y", "B", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "B", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y")
new_colors <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 8,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1)
# define dendrogram object to play with:
dend <- iris[, -5] %>%
dist() %>%
hclust() %>%
as.dendrogram() %>%
# color the labels according to their values
set('labels_colors', new_colors[order.dendrogram(.)]) %>%
color_branches(k = 5)
dend_list <- get_subdendrograms(dend, 5)
# after extracting the sub dendrograms: add labels to the original dend
labels(dend) <- paste(new_labels[order.dendrogram(dend)])
# and add labels to the sub dendrograms
dend_list <- lapply(dend_list, function(x) set(x, 'labels', new_labels[order.dendrogram(x)]))
# Plotting the result
par(mfrow = c(2, 3))
plot(dend, main = "Original dendrogram")
sapply(dend_list, plot)