rggplot2legenddendrogramggdendro

remove one legend entry in legend of dendrogram


I am experiencing issues with the legend entries of a dendrogram. I am utilizing the code example provided here.

library(ggplot2)
library(ggdendro)
library(plyr)
library(zoo)

df <- USArrests                       # really bad idea to muck up internal datasets
labs <- paste("sta_", 1:50, sep = "") # new labels
rownames(df) <- labs                  # set new row names

cut <- 4    # Number of clusters
hc <- hclust(dist(df), "ave")              # hierarchical clustering
dendr <- dendro_data(hc, type = "rectangle") 
clust <- cutree(hc, k = cut)               # find 'cut' clusters
clust.df <- data.frame(label = names(clust), cluster = clust)

# Split dendrogram into upper grey section and lower coloured section
height <- unique(dendr$segments$y)[order(unique(dendr$segments$y), decreasing = TRUE)]
cut.height <- mean(c(height[cut], height[cut-1]))
dendr$segments$line <- ifelse(dendr$segments$y == dendr$segments$yend &
                                dendr$segments$y > cut.height, 1, 2)
dendr$segments$line <- ifelse(dendr$segments$yend  > cut.height, 1, dendr$segments$line)

# Number the clusters
dendr$segments$cluster <- c(-1, diff(dendr$segments$line))
change <- which(dendr$segments$cluster == 1)
for (i in 1:cut) dendr$segments$cluster[change[i]] = i + 1
dendr$segments$cluster <-  ifelse(dendr$segments$line == 1, 1, 
                                  ifelse(dendr$segments$cluster == 0, NA, dendr$segments$cluster))
dendr$segments$cluster <- na.locf(dendr$segments$cluster) 

# Consistent numbering between segment$cluster and label$cluster
clust.df$label <- factor(clust.df$label, levels = levels(dendr$labels$label))
clust.df <- arrange(clust.df, label)
clust.df$cluster <- factor((clust.df$cluster), levels = unique(clust.df$cluster), labels = (1:cut) + 1)
dendr[["labels"]] <- merge(dendr[["labels"]], clust.df, by = "label")

# Plot the dendrogram
ggplot() + 
  geom_segment(data = segment(dendr), 
               aes(x=x, y=y, xend=xend, yend=yend, size=factor(line), colour=factor(cluster)), 
               lineend = "square", show.legend = FALSE) + 
  scale_colour_manual(values = c("grey60", rainbow(cut)),
                      labels = c("remove this entry", paste("cluster", (1:cut)))) +
  scale_size_manual(values = c(0.5, 0.5)) +
  geom_text(data = label(dendr), aes(x, y, label = label, colour = factor(cluster)), 
            hjust = -0.2, size = 3, key_glyph = "rect") +
  scale_y_reverse(expand = c(0.2, 0)) + 
  labs(x = NULL, y = "Height") +
  coord_flip() +
  theme(axis.line.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        panel.background = element_rect(fill = "white"),
        panel.grid = element_blank())

too many entries

I am receiving one more entry than the number of clusters. The purpose of the line scale_colour_manual(values = c("grey60", rainbow(cut))) is to color the clusters only.

Adding

  scale_colour_manual(values = c("grey60", rainbow(cut)),
                      breaks = c(factor(1:cut+1)),
                      labels = c(paste("cluster", (1:cut))))

does not work as intended.

still wrong entries

Is there a way to remove the grey entry?


Solution

  • Yes, just pass the values you want to keep to the limits argument of scale_color_manual:

    ggplot() + 
      geom_segment(data = segment(dendr), 
                   aes(x, y, xend = xend, yend = yend, size = factor(line), 
                       colour = factor(cluster)), 
                   lineend = "square", show.legend = FALSE) + 
      scale_colour_manual('Cluster', limits = factor(2:5), values = rainbow(cut),
                          labels = paste('cluster', 1:4)) +
      scale_size_manual(values = c(0.5, 0.5)) +
      geom_text(data = label(dendr), aes(x, y, label = label, 
                                         colour = factor(cluster)), 
                hjust = -0.2, size = 3, key_glyph = "rect") +
      scale_y_reverse(expand = c(0.2, 0)) + 
      labs(x = NULL, y = "Height") +
      coord_flip() +
      theme(axis.line.y = element_blank(),
            axis.ticks.y = element_blank(),
            axis.text.y = element_blank(),
            axis.title.y = element_blank(),
            panel.background = element_rect(fill = "white"),
            panel.grid = element_blank())
    

    enter image description here