Issue:
I have a data frame called Sub_Whistle_Count
(see below). I'm trying to do a hierarchical analysis using hclust()
, but I can't customise the right label names Whistle_Type_Sub
(28 different whistle subtypes produced by dolphins) to appear in the dendrogram, only the numerical identifier (see below).
#Data frame structure
'data.frame': 62 obs. of 3 variables:
$ Country : Factor w/ 3 levels "Italy","Turkey",..: 1 1 1 1 1 1 1 1 1 1 ...
$ Whistle_Type_Sub: Factor w/ 28 levels "A","AA","AA1",..: 1 24 25 11 2 18 7 5 9 13 ...
$ N
: int 25 64 31 5 4 5 3 10 2 1 ...
I'm trying to change the labels in the dendrogram (type fan
) created from the Ape package
with the rows Whistle_Type_Sub
and to cluster N
by Country -
. In this case, I'm attempting to incorporate colour
using the RColorBrewer
package and the Dark2
palette for the edges (lines) of a phylogeny-type plot using the plot(as.phylo(), type = "fan") plot
.
#Desired labels
mylabels<- c("A", "E", "EA", "BE", "AA","D", "B", "AD", "BC", "CA", "AA1", "DD1", "ED", "DC", "C", "AC", "ADC", "DE",
"EA", "A", "E", "DE", "F", "BE", "D", "EE1", "B", "CA", "DB", "BB", "AA1", "ED", "AD", "DD1", "AA",
"A","ED", "E","DD", "DD1","CDC", "C", "AC", "D", "F", "EE1","BCB", "DC", "ADC", "DE", "CA", "AA", "BE",
"CBC", "B", "EA", "AA1", "AD", "BB", "CD", "CB", "DB")
I'm also trying to add colour to the edges (lines)
, branches
and leaf
tips to highlight the different clusters of whistle subtypes between countries (Country
). I want to colour the 'Whistle_Type_Sub` names in the dendrogram in accordance with the group they cluster with i.e Turkey, Montenegro or Italy and for the edges to match the terminal tip colours when that branch within the dendrogram is associated with a given group.
Adding colour to the tips (labels)
based on a set of groups (Whistle Sub Type and Country)
appears to not be a big problem using the tip.color
command, although, I am not one hundred per cent sure that I've done this part correctly.
However, when I try to customise the labels Whistle_Type_Sub
for the dendrogram, I cannot solve the problem as the command show.tip.label
literally requires a true and false
response (please see my desired output below).
Would anyone be able to lend a hand?
Any help is always appreciated
I tried:
#Attempt 1
#Create labels
labels <- def(Sub_Whistle_Count$tip.label, Sub_Whistle_Count$Whistle_Type_Sub)
(mylabels<-brewer.pal(3, "Dark2")[labels])
character(0)
#Attempt 2
labels <- def(dend$tip.label, Sub_Whistle_Count$Whistle_Type_Sub)
Warning messages:
1: In get(results[[i]], packages[[i]]) :
restarting interrupted promise evaluation
2: In get(results[[i]], packages[[i]]) :
internal error -3 in R_decompress1
R-Code:
library(data.table)
library(cluster) #agnes function
library(usedist) #change label names
library(ape) #create fan data frame
library('dendextend')
library(RColorBrewer) #Customise the colour palette
library(phytools)
#DENDROGRAM
Cluster.Country.Dist<-dist(cbind(Sub_Whistle_Count[1, 3], 2*(as.numeric(Sub_Whistle_Count$Whistle_Type_Sub)-2)))
Cluster.Country.Dist
Cluster.Country.hcl<-hclust(Cluster.Country.Dist)
Cluster.Country.hcl
dend = as.dendrogram(Cluster.Country.hcl)
dend
#Connecting label color with the country label
geo <- factor(c("Montenegro", "Turkey", "Italy"))
(mycol<-brewer.pal(3, "Dark2")[geo])
#Select the colour of the branch
X <- brewer.pal(3, "Dark2")
#Open a new graphics window
dev.new()
#Plot the dednrogram
plot(as.phylo(dend), type="fan", cex=0.9, label.offset = 0.8,
edge.color = sample(X, length(Sub_Whistle_Count$Country)/2, replace = TRUE),
edge.width = sample(2:3, length(Sub_Whistle_Count$Country)/2, replace = TRUE),
show.tip.label = sample(2:3, length(Sub_Whistle_Count$Whistle_Type_Sub)/2, replace = TRUE),
show.node.label = TRUE,
tip.color=mycol, lwd=1)
Output from R Code
Desired Output
Data
structure(list(Country = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("Italy", "Turkey",
"Montenegro"), class = "factor"), Whistle_Type_Sub = structure(c(1L,
24L, 25L, 11L, 2L, 18L, 7L, 5L, 9L, 13L, 3L, 22L, 26L, 20L, 12L,
4L, 6L, 23L, 25L, 1L, 24L, 23L, 28L, 11L, 18L, 27L, 7L, 13L,
19L, 8L, 3L, 26L, 5L, 22L, 2L, 1L, 26L, 24L, 21L, 22L, 17L, 12L,
4L, 18L, 28L, 27L, 10L, 20L, 6L, 23L, 13L, 2L, 11L, 15L, 7L,
25L, 3L, 5L, 8L, 16L, 14L, 19L), .Label = c("A", "AA", "AA1",
"AC", "AD", "ADC", "B", "BB", "BC", "BCB", "BE", "C", "CA", "CB",
"CBC", "CD", "CDC", "D", "DB", "DC", "DD", "DD1", "DE", "E",
"EA", "ED", "EE1", "F"), class = "factor"), N = c(25L, 64L, 31L,
5L, 4L, 5L, 3L, 10L, 2L, 1L, 2L, 3L, 2L, 1L, 1L, 3L, 2L, 4L,
26L, 54L, 20L, 10L, 18L, 7L, 7L, 10L, 2L, 3L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 23L, 1L, 13L, 10L, 5L, 4L, 8L, 9L, 9L, 20L, 1L, 1L,
9L, 1L, 9L, 2L, 6L, 3L, 1L, 10L, 9L, 2L, 3L, 1L, 2L, 2L, 3L)), row.names = c(NA,
-62L), class = "data.frame")
R Code
library(data.table)
library(cluster) #agnes function
library(usedist) #change label names
library(ape) #create fan data frame
library('dendextend')
library(RColorBrewer) #Customise the colour palette
library(phytools)
#Create a data frame object
Sub_Whistle_Count<-as.data.frame(Yeo.Whistle.Count_Reorder)
Sub_Whistle_Count
#Check the structure of Yeo.Whistle.Count
str(Sub_Whistle_Count)
#DENDROGRAM
Cluster.Country.Dist<-dist(cbind(Sub_Whistle_Count[1:2], 2*(as.numeric(Sub_Whistle_Count$Whistle_Type_Sub)-2)))
Cluster.Country.Dist
##tip labels
mylabels<- c("A", "E", "EA", "BE", "AA","D", "B", "AD", "BC", "CA", "AA1", "DD1", "ED", "DC", "C", "AC", "ADC", "DE",
"EA", "A", "E", "DE", "F", "BE", "D", "EE1", "B", "CA", "DB", "BB", "AA1", "ED", "AD", "DD1", "AA",
"A","ED", "E","DD", "DD1","CDC", "C", "AC", "D", "F", "EE1","BCB", "DC", "ADC", "DE", "CA", "AA", "BE",
"CBC", "B", "EA", "AA1", "AD", "BB", "CD", "CB", "DB")
#Change the rows and column label names in the dist object
NewLabels<-dist_setNames(Cluster.Country.Dist, mylabels[1:62])
NewLabels
#Hierarchical Clustering algorithm on the dataset using hclust()
Cluster.Country.hcl<-hclust(NewLabels)
Cluster.Country.hcl
#Create a dendrogram object
dend = as.dendrogram(Cluster.Country.hcl)
dend
#Connecting label color with the country label
geo <- factor(c("Montenegro", "Turkey", "Italy"))
(mycol<-brewer.pal(3, "Dark2")[geo])
#Select the color of the branch
X <- brewer.pal(3, "Dark2")
#Open a new graphics window
dev.new()
#Plot the dendrogram
plot(as.phylo(dend), type="fan", cex=0.9, label.offset = 0.8,
edge.color = sample(X, length(Sub_Whistle_Count$Country)/2, replace = TRUE),
edge.width = sample(1:2, length(Sub_Whistle_Count$Country)/2, replace = TRUE),
show.tip.label = TRUE,
tip.color=mycol, lwd=1)
Diagram