rplotcluster-analysisdendrogramdendextend

Changing the Tip Labels on a Dendrogram "Type = Fan" Based on a Defined Group in R Using the Ape Package


Issue:

I have a data frame called Sub_Whistle_Count (see below). I'm trying to do a hierarchical analysis using hclust(), but I can't customise the right label names Whistle_Type_Sub (28 different whistle subtypes produced by dolphins) to appear in the dendrogram, only the numerical identifier (see below).

 #Data frame structure

'data.frame':   62 obs. of  3 variables:
 $ Country         : Factor w/ 3 levels "Italy","Turkey",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ Whistle_Type_Sub: Factor w/ 28 levels "A","AA","AA1",..: 1 24 25 11 2 18 7 5 9 13 ...
 $ N      

     : int  25 64 31 5 4 5 3 10 2 1 ...

I'm trying to change the labels in the dendrogram (type fan) created from the Ape package with the rows Whistle_Type_Sub and to cluster N by Country - . In this case, I'm attempting to incorporate colour using the RColorBrewer package and the Dark2 palette for the edges (lines) of a phylogeny-type plot using the plot(as.phylo(), type = "fan") plot.

#Desired labels

mylabels<- c("A", "E", "EA", "BE", "AA","D", "B", "AD", "BC", "CA", "AA1", "DD1", "ED", "DC", "C", "AC", "ADC", "DE", 
             "EA",  "A", "E", "DE", "F", "BE", "D", "EE1", "B", "CA", "DB",  "BB", "AA1", "ED", "AD",  "DD1", "AA", 
             "A","ED", "E","DD", "DD1","CDC", "C", "AC", "D",  "F", "EE1","BCB", "DC", "ADC", "DE",  "CA", "AA",  "BE",  
             "CBC", "B", "EA",  "AA1", "AD",  "BB",  "CD", "CB",  "DB")

I'm also trying to add colour to the edges (lines), branches and leaf tips to highlight the different clusters of whistle subtypes between countries (Country). I want to colour the 'Whistle_Type_Sub` names in the dendrogram in accordance with the group they cluster with i.e Turkey, Montenegro or Italy and for the edges to match the terminal tip colours when that branch within the dendrogram is associated with a given group.

Adding colour to the tips (labels) based on a set of groups (Whistle Sub Type and Country) appears to not be a big problem using the tip.color command, although, I am not one hundred per cent sure that I've done this part correctly.

However, when I try to customise the labels Whistle_Type_Sub for the dendrogram, I cannot solve the problem as the command show.tip.label literally requires a true and false response (please see my desired output below).

Would anyone be able to lend a hand?

Any help is always appreciated

I tried:

#Attempt 1
#Create labels 
labels <- def(Sub_Whistle_Count$tip.label, Sub_Whistle_Count$Whistle_Type_Sub)
(mylabels<-brewer.pal(3, "Dark2")[labels])
character(0)

#Attempt 2
labels <- def(dend$tip.label, Sub_Whistle_Count$Whistle_Type_Sub)
Warning messages:
1: In get(results[[i]], packages[[i]]) :
  restarting interrupted promise evaluation
2: In get(results[[i]], packages[[i]]) :
  internal error -3 in R_decompress1

R-Code:

library(data.table)
library(cluster) #agnes function
library(usedist) #change label names
library(ape) #create fan data frame
library('dendextend')
library(RColorBrewer) #Customise the colour palette
library(phytools)

#DENDROGRAM 
Cluster.Country.Dist<-dist(cbind(Sub_Whistle_Count[1, 3], 2*(as.numeric(Sub_Whistle_Count$Whistle_Type_Sub)-2)))
Cluster.Country.Dist

Cluster.Country.hcl<-hclust(Cluster.Country.Dist)
Cluster.Country.hcl

dend = as.dendrogram(Cluster.Country.hcl) 
dend 
#Connecting label color with the country label
geo <- factor(c("Montenegro", "Turkey", "Italy"))
(mycol<-brewer.pal(3, "Dark2")[geo])

#Select the colour of the branch
X <- brewer.pal(3, "Dark2")

#Open a new graphics window
dev.new()

#Plot the dednrogram
plot(as.phylo(dend), type="fan", cex=0.9, label.offset = 0.8,
     edge.color = sample(X, length(Sub_Whistle_Count$Country)/2, replace = TRUE),
     edge.width = sample(2:3, length(Sub_Whistle_Count$Country)/2, replace = TRUE),
     show.tip.label = sample(2:3, length(Sub_Whistle_Count$Whistle_Type_Sub)/2, replace = TRUE), 
     show.node.label = TRUE, 
     tip.color=mycol, lwd=1)

Output from R Code

enter image description here

Desired Output

enter image description here

Data

structure(list(Country = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Italy", "Turkey", 
"Montenegro"), class = "factor"), Whistle_Type_Sub = structure(c(1L, 
24L, 25L, 11L, 2L, 18L, 7L, 5L, 9L, 13L, 3L, 22L, 26L, 20L, 12L, 
4L, 6L, 23L, 25L, 1L, 24L, 23L, 28L, 11L, 18L, 27L, 7L, 13L, 
19L, 8L, 3L, 26L, 5L, 22L, 2L, 1L, 26L, 24L, 21L, 22L, 17L, 12L, 
4L, 18L, 28L, 27L, 10L, 20L, 6L, 23L, 13L, 2L, 11L, 15L, 7L, 
25L, 3L, 5L, 8L, 16L, 14L, 19L), .Label = c("A", "AA", "AA1", 
"AC", "AD", "ADC", "B", "BB", "BC", "BCB", "BE", "C", "CA", "CB", 
"CBC", "CD", "CDC", "D", "DB", "DC", "DD", "DD1", "DE", "E", 
"EA", "ED", "EE1", "F"), class = "factor"), N = c(25L, 64L, 31L, 
5L, 4L, 5L, 3L, 10L, 2L, 1L, 2L, 3L, 2L, 1L, 1L, 3L, 2L, 4L, 
26L, 54L, 20L, 10L, 18L, 7L, 7L, 10L, 2L, 3L, 2L, 2L, 2L, 1L, 
1L, 1L, 1L, 23L, 1L, 13L, 10L, 5L, 4L, 8L, 9L, 9L, 20L, 1L, 1L, 
9L, 1L, 9L, 2L, 6L, 3L, 1L, 10L, 9L, 2L, 3L, 1L, 2L, 2L, 3L)), row.names = c(NA, 
-62L), class = "data.frame")

Solution

  • R Code

    library(data.table)
    library(cluster) #agnes function
    library(usedist) #change label names
    library(ape) #create fan data frame
    library('dendextend')
    library(RColorBrewer) #Customise the colour palette
    library(phytools)
    
    #Create a data frame object
    Sub_Whistle_Count<-as.data.frame(Yeo.Whistle.Count_Reorder)
    Sub_Whistle_Count
    
    #Check the structure of Yeo.Whistle.Count
    str(Sub_Whistle_Count)
    
    #DENDROGRAM 
    Cluster.Country.Dist<-dist(cbind(Sub_Whistle_Count[1:2], 2*(as.numeric(Sub_Whistle_Count$Whistle_Type_Sub)-2)))
    Cluster.Country.Dist
    
    ##tip labels
    mylabels<- c("A", "E", "EA", "BE", "AA","D", "B", "AD", "BC", "CA", "AA1", "DD1", "ED", "DC", "C", "AC", "ADC", "DE", 
                 "EA",  "A", "E", "DE", "F", "BE", "D", "EE1", "B", "CA", "DB",  "BB", "AA1", "ED", "AD",  "DD1", "AA", 
                 "A","ED", "E","DD", "DD1","CDC", "C", "AC", "D",  "F", "EE1","BCB", "DC", "ADC", "DE",  "CA", "AA",  "BE",  
                 "CBC", "B", "EA",  "AA1", "AD",  "BB",  "CD", "CB",  "DB")
    
    
    #Change the rows and column label names in the dist object
    NewLabels<-dist_setNames(Cluster.Country.Dist, mylabels[1:62])
    NewLabels
    
    #Hierarchical Clustering algorithm on the dataset using hclust() 
    Cluster.Country.hcl<-hclust(NewLabels)
    Cluster.Country.hcl
    
    #Create a dendrogram object
    dend = as.dendrogram(Cluster.Country.hcl) 
    dend 
    
    #Connecting label color with the country label
    geo <- factor(c("Montenegro", "Turkey", "Italy"))
    (mycol<-brewer.pal(3, "Dark2")[geo])
    
    #Select the color of the branch
    X <- brewer.pal(3, "Dark2")
    
    #Open a new graphics window
    dev.new()
    
    #Plot the dendrogram
    plot(as.phylo(dend), type="fan", cex=0.9, label.offset = 0.8,
         edge.color = sample(X, length(Sub_Whistle_Count$Country)/2, replace = TRUE),
         edge.width = sample(1:2, length(Sub_Whistle_Count$Country)/2, replace = TRUE),
         show.tip.label = TRUE, 
         tip.color=mycol, lwd=1)
    

    Diagram

    enter image description here