rhierarchical-clusteringdendextend

How to set a label for a cluster in a dendrogram in R?


I want to insert a label in a box and in each cluster as shown in here:

enter image description here

But, What I obtained is:

enter image description here

Where the name of the cluster is in the specie label, not into the corresponding cluster as shown in the first figure.

My data is:

Specie;Cluster;A;B
1;4;0.003594057;0.003594057
2;4;0.004548265;0.004548265
3;4;0.004551177;0.004551177
4;4;0.004889955;0.004889955
5;4;0.004645822;0.004645822
6;3;0.158136546;0.005911143
7;4;0.004827345;0.004827345
8;3;0.181887896;0.006387762
9;3;0.155627259;0.005777185
10;4;0.005248148;0.005248148
11;4;0.004480801;0.004480801
12;4;0.004781236;0.004781236
13;1;1;0.001377924
14;3;0.164309781;0.007946238
15;2;0.437294513;0.003118894
16;3;0.181133169;0.010087141
17;4;0.028744201;0.028744201
18;4;0.004867629;0.004867629
19;4;0.006461536;0.006461536
20;3;0.172580237;0.010015308
21;3;0.189448277;0.008443727
22;4;0.004569135;0.004569135
23;4;0.009534806;0.009534806
24;3;0.183054691;0.0126338
25;3;0.199946514;0.01640938
26;4;0.01003181;0.01003181
27;4;0.009964831;0.009964831
28;3;0.177578426;0.011115123
29;3;0.203733742;0.017803806
30;4;0.009477534;0.009477534
31;3;0.186869099;0.013588009
32;3;0.194106718;0.015397414
33;4;0.009655174;0.009655174
34;3;0.195003655;0.015621648
35;4;0.012924528;0.012924528
36;4;0.009473652;0.009473652
37;4;0.01203293;0.01203293
38;5;0.048146768;0.048146768
39;4;0.009969199;0.009969199
40;2;0.470963129;0.009127108
41;4;0.009641584;0.009641584
42;4;0.008821819;0.008821819
43;3;0.211170842;0.013712745
44;4;0.00940376;0.00940376
45;3;0.20177873;0.017315053
46;4;0.0041726;0.0041726
47;4;0.004170173;0.004170173
48;4;0.009588681;0.009588681
49;4;0.005260282;0.005260282
50;3;0.179906191;0.005831545
51;4;0.009565384;0.009565384
52;4;0.019591855;0.019591855
53;4;0.010158973;0.010158973
54;4;0.011264613;0.011264613
55;3;0.192672008;0.015038251
56;5;0.081005326;0.081005326
57;5;0.058880887;0.058880887
58;4;0.006573653;0.006573653
59;4;0.005261738;0.005261738
60;4;0.004243947;0.004243947
61;4;0.009413953;0.009413953
62;4;0;0
63;4;0.00824279;0.00824279
64;4;0.013707406;0.013707406
65;4;0.02907036;0.02907036

My code is (Adapted):

setwd("C:/Users/david/OneDrive/Documents/R_Carpeta/2022_mid/2022_end")
iris<-read.csv(file="Dendrograma_Ds_2022_Nor-Grupos.csv", header=TRUE, sep=";")
Dg <- iris[,-1:-2]
row.names(Dg) <- iris[,1]
rest.dist <-dist(Dg,method="minkowski")
hc1_5 <- hclust(rest.dist, method = "average")
dend1_5 <- as.dendrogram(hc1_5)
#Color
iris_species <- rev(levels(iris[,2]))
dend1_5_c <- color_branches(dend1_5,k=5, groupLabels=iris_species)
is.character(labels(dend1_5_c)) # labels are no longer "integer"
labels_colors(dend1_5_c) <-
  rainbow_hcl(3)[sort_levels_values(
    as.numeric(iris[,2])[order.dendrogram(dend1_5_c)]
  )]
#Group labels
labels(dend1_5_c) <- paste(as.character(iris[,2])[order.dendrogram(dend1_5_c)],
                           "(",labels(dend1_5_c),")", 
                           sep = "")
dend1_5_c_L <- hang.dendrogram(dend1_5_c,hang_height=0.1)
dend1_5_c_L <- assign_values_to_leaves_nodePar(dend1_5_c_L, 0.5, "lab.cex")
par(mar = c(3,3,3,7))
plot(dend1_5_c_L, horiz =  FALSE,  nodePar = list(cex = .007))

Solution

  • In order to obtain a plot with branch labels, you need to provide a character vector as the groupLabels argument of color_branches of the same length of the number of clusters (in this case k = 5).

    library(dendextend)
    library(colorspace)
    
    ## import the dataset to `tab'
    
    ## set rownames
    rownames(tab) <- with(tab, sprintf("%d (%d)", Cluster, Specie))
    
    hc <-
        tab[,-c(1:2)] %>%
        dist(method = "minkowski") %>%
        hclust(method = "average") %>%
        as.dendrogram() %>%
        color_branches(k =5, groupLabels = LETTERS[1:5]) %>%
        assign_values_to_leaves_nodePar(nodePar = "lab.cex", value = 0.5) %>%
        hang.dendrogram(0.1)
    
    plot(hc)
    
    

    cluster