Issue
I have produced a PCA biplot using the packages ggbiplot/ggplot2
. I have rescaled and lengthened the loadings (arrows) using the function geom_segment()
. I used the argument var.axes = FALSE
to remove the shorter loadings (arrows) because my original labels were overlapping and unreadable see here; however, this has also removed the text labels for each variable from the PCA biplot.
I am struggling to manually add the loadings labels to the biplot and I have been trying now for nearly a week to add the labels neatly to the end of the arrowheads with geom_text()
. Afterwards, I want to give the labels a grey background using the function geom_label()
. I would like my desired plot to look like diagram 1
. I keep on getting this error message:-
Error
Error in `check_aesthetics()`:
! Aesthetics must be either length 1 or the same as the data (8): x and y
---
Backtrace:
1. base (local) `<fn>`(x)
2. ggplot2:::print.ggplot(x)
4. ggplot2:::ggplot_build.ggplot(x)
5. ggplot2 (local) by_layer(function(l, d) l$compute_aesthetics(d, plot))
6. ggplot2 (local) f(l = layers[[i]], d = data[[i]])
7. l$compute_aesthetics(d, plot)
8. ggplot2 (local) f(..., self = self)
9. ggplot2:::check_aesthetics(evaled, n)
Run `rlang::last_trace()` to see the full context.
What am I missing? I'm sure that I'm making silly mistakes, but I can't figure this conundrum out. If anyone can help, I would be really grateful.
Many thanks in advance
R-code
#PCA biplots
install.packages("remotes")
remotes::install_github("vqv/ggbiplot")
install_github("vqv/ggbiplot")
#install.packages("devtools")
library(devtools)
library(ggbiplot)
library(remotes)
#You can do a PCA to visualize the difference between the groups using the standardised box cox data
PCA=prcomp(Whistle_Parameters[2:18], center = TRUE, scale=TRUE, retx = T)
PCA
#Labels
# Extract loadings of the variables
PCAloadings <- data.frame(Variables = rownames(PCA$rotation), PCA$rotation)
#We also need to extract the data for the variable contributions to each of the pc axes.
#PCA1
Whistle_Parameters$PCA1<-PCA$x[,1]
PCA1<-Whistle_Parameters$PCA1
#PCA2
Whistle_Parameters$PCA2<-PCA$x[,2]
PCA2<-Whistle_Parameters$PCA2
#We also need to extract the data for the variable contributions to each of the pc axes
PCA_Vars<- PCA$rotation
pca.vars1<- as.data.frame(PCA_Vars)
pca.vars2<-rownames(pca.vars1)
#Produce Biplot
PCA_plot2<-ggbiplot::ggbiplot(PCA, ellipse=TRUE, circle=TRUE, varname.adjust = 2.5, groups=Country,
var.axes=FALSE, var.scale = 1) +
ggtitle("PCA of Acoustic Parameters") +
theme(plot.title = element_text(hjust = 0.5)) +
theme_minimal() +
theme(panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank()) +
geom_vline(xintercept = 0, lty = 2) +
geom_hline(yintercept = 0, lty = 2) +
theme(axis.line.x = element_line(color="black", size = 0.8),
axis.line.y = element_line(color="black", size = 0.8)) +
geom_segment(data = PCAloadings, aes(x = 0, y = 0, xend = (PC1*4.6),
yend = (PC2*4.6)), arrow = arrow(length = unit(1/2, "picas")),
color = "black", alpha=0.60) +
scale_color_manual(values=c('#E69F00', '#56B4E9')) +
guides(color = guide_legend(title = "Country"), shape = guide_legend(title = "Country")) +
scale_shape_manual(values = c(17, 16)) +
geom_point(size = 1.0) +
geom_text(data = pca.vars2, aes(x = PCA1*1.15, y = PCA2*1.15,
label = c("Low.Freq", "High.Freq", "Start.Freq", "End.Freq", "Peak.Freq", "Center.Freq",
"Delta.Freq", "Delta.Time")),
check_overlap = F, size = 0.8)
PCA_plot2
Diagram 1
Data 'Whistle Parameters'
structure(list(Country = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("France", "Germany"
), class = "factor"), Low.Freq = c(1229.098358827, 759.408910773596,
627.156561121131, 857.914227798394, 816.020512657709, 726.252107003186,
603.388640229802, 1120.43591201848, 521.916239230762, 66.4277073927702,
842.827028732445, 1548.00685289626, 743.586603639966, 982.298813187027,
938.898554484786, 908.067281510105, 1650.51840217509, 435.837013213028,
905.64518713548, 621.693057238002, 408.874626715846, 763.284854056395,
1163.36397892984, 1267.90558781017, 1561.05494859439, 269.110242829792,
1628.20258277437, 1381.52801863709, 1259.64885050619, 582.429604337893
), High.Freq = c(270.037998321385, 673.100410969792, 1354.51641087434,
582.818682820139, 1949.42791374982, 533.072062804075, 1462.73353623344,
1475.85981044777, 1672.72713391206, 1360.85064740235, 1027.62671423916,
1637.72929840934, 555.708652550379, 683.537132648398, 1714.01010661954,
267.117743854174, 738.883902818488, 842.919932827166, 124.511854388999,
1940.70836004547, 991.37814311059, 1959.73951887933, 435.882938574683,
223.944759894009, 827.050231552967, 1929.9835959516, 731.983627515309,
934.515637669084, 1381.80407878684, 1735.12129509753), Start.Freq = c(209.223178720873,
1243.93824398519, 714.942866646311, 1230.88587487336, 1133.38920481274,
655.640254812419, 176.783487591076, 566.793710992312, 1259.4101411541,
135.19626803044, 1188.65745695622, 1055.06564740433, 145.269654935287,
994.102001940972, 611.97046714505, 1239.25416627405, 273.254811174704,
1187.0983873612, 532.369927415851, 730.144132713145, 306.959091815357,
761.432150933258, 833.35964575595, 633.492439842001, 1011.91529244509,
1002.6837164403, 874.223664731894, 1039.77329580107, 571.716041690428,
358.734914494325), End.Freq = c(3100.50977989246, 2865.99128764993,
3749.07057886566, 1662.75251781181, 3469.5453928947, 1223.29004528624,
1571.69393815622, 3877.90064918956, 3467.9046701139, 2812.46639335828,
3344.48917919081, 823.479192696172, 3587.72640978872, 1943.42597579601,
3726.46938122543, 904.270327650973, 2199.36865174236, 1608.75061469279,
1233.86988042306, 3573.54022639883, 3840.13648049746, 2696.92512488242,
2152.58952962537, 2225.74334558365, 2836.8576276391, 3909.86535579565,
2642.3514330105, 3830.39875611625, 2532.59589574087, 2047.68204963624
), Peak.Freq = c(615.103200058515, 781.386010343022, 1254.22250479111,
1042.32081012698, 1785.4136370848, 609.196990586287, 936.529532621528,
628.617072934145, 1138.00887772997, 965.752651960148, 331.381776986669,
831.243197072226, 1396.09323352817, 796.57855885715, 1434.02692184993,
581.91826512844, 1482.84787412806, 712.229765737013, 711.849861782499,
379.432018940052, 1495.87429192735, 1375.38825516007, 1568.51147252198,
39.6849748542959, 254.973241980045, 526.048000326837, 1599.41223732841,
1723.40465012645, 461.157566614546, 847.403323972557), Center.Freq = c(-0.00396318509300687,
0.377462792184857, 1.66350671788962, -0.152573382048654, 0.438259482923988,
1.62149800844459, 0.501892326424285, -0.166579179714419, 1.06081611813746,
1.39199162769052, 1.52164843383928, -0.389958351497529, 0.00261034688899059,
0.0726410215179534, 1.00473421813784, 1.27072495569536, 1.41569796343226,
0.737375815997266, 0.412628778604207, 0.51099123600198, 1.65512836540775,
-1.12408230668747, 0.438260531725931, -1.11347230908714, 1.09021071848368,
1.26465014876586, -0.663254496003035, 0.64384027394782, 1.29816899903361,
0.0302328674903059), Delta.Freq = c(2374.48934930825, 2535.28648042237,
930.363518659463, 2372.94461226817, 2578.50041236941, 1652.93682378145,
2412.64071270543, 1643.35808756239, 1597.6988634255, 2347.87731769764,
1545.35983248752, 417.894712991398, 676.404759114593, 2717.74464723351,
2750.52013318133, 1387.50061490775, 1088.18301844773, 208.885548316239,
982.856603814324, 1304.55461743298, 2064.83914948351, 1454.17493801179,
1975.72909682146, 1340.40119652782, 1358.81720189322, 398.974468430338,
1807.83210129773, 197.995771350184, 1458.91300578134, 2459.54002342707
), Delta.Time = c(1.52332103330495, -0.729369599299347, 0.5446606158259,
-0.0806278952890181, -1.03355982391612, 0.381391555011319, -0.710006011318096,
0.184876103317229, -0.0939796220798944, 0.878826387745255, 0.889598364118577,
0.929698941247702, 0.734996499853458, -0.43364546563554, -0.176575903721404,
0.556057576098353, -0.31543237357059, 1.31950129257089, 1.08676447814548,
-1.08756351145615, -0.163851619861579, -0.945982375537661, 0.473134073749239,
-0.231569591521918, -0.565159893817776, 1.14721196081124, -1.14555651287826,
1.60486934195338, -1.00704726744845, 1.14020903183312)), row.names = c(NA,
30L), class = "data.frame")
Your Example data has less variables than your code indicates, so it should be changed to:
PCA=prcomp(Whistle_Parameters[,2:9], center = TRUE, scale=TRUE, retx = T)
It's also missing a call to
Country <- Whistle_Parameters$Country
The next lines are ok:
#Labels
# Extract loadings of the variables
PCAloadings <- data.frame(Variables = rownames(PCA$rotation), PCA$rotation)
#We also need to extract the data for the variable contributions to each of the pc axes.
#PCA1
Whistle_Parameters$PCA1<-PCA$x[,1]
PCA1<-Whistle_Parameters$PCA1
#PCA2
Whistle_Parameters$PCA2<-PCA$x[,2]
PCA2<-Whistle_Parameters$PCA2
Because you're scaling the arrows by a factor of 4.6 in the plot, the x and y coordinates for the labels should be multiplied by a number somewhere around that too. A bit more maybe, say 4.8:
#We also need to extract the data for the variable contributions to each of the pc axes
pca.vars1<- as.data.frame(PCA$rotation)
pca.vars1$labels <- rownames(pca.vars1)
pca.vars1$PC1 <- pca.vars1$PC1 * 4.8
pca.vars1$PC2 <- pca.vars1$PC2 * 4.8
You will need to play around with the exact values for pca.vars1$PC1 and pca.vars1$PC2 a bit to avoid any overlap between arrows and labels, but in general that should work.
Then the plot, most of your code stays the same
PCA_plot2<-ggbiplot::ggbiplot(PCA, ellipse=TRUE, circle=TRUE, varname.adjust = 2.5, groups=Country,
var.axes=FALSE, var.scale = 1) +
ggtitle("PCA of Acoustic Parameters") +
theme(plot.title = element_text(hjust = 0.5)) +
theme_minimal() +
theme(panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank()) +
geom_vline(xintercept = 0, lty = 2) +
geom_hline(yintercept = 0, lty = 2) +
theme(axis.line.x = element_line(color="black", size = 0.8),
axis.line.y = element_line(color="black", size = 0.8)) +
geom_segment(data = PCAloadings, aes(x = 0, y = 0, xend = (PC1*4.6),
yend = (PC2*4.6)), arrow = arrow(length = unit(1/2, "picas")),
color = "black", alpha=0.60) +
scale_color_manual(values=c('#E69F00', '#56B4E9')) +
guides(color = guide_legend(title = "Country"), shape = guide_legend(title = "Country")) +
scale_shape_manual(values = c(17, 16)) +
geom_point(size = 1.0)
Except for the call to geom_text:
PCA_plot2 +
geom_text(
data = pca.vars1, # pca.vars1 here instead of the vector pca.vars2
aes(x = PC1, y = PC2, label = labels), # change to label = labels because I added the variable to pca.vars1
check_overlap = F, size = 2 # A bit bigger, 0.8 is very small.
)