can you please help me with my pca? I would like to change the shapes in that way that each species has a different color and all 2-3 organisms for each species have 2-3 different symbols. It should look like this: enter image description here
So far I tried the following code:
setwd("~/Schwarze Johannisbeeren/SJ Wein mit nicht Sc/PCA/stackoverflow frage")
results = read.csv("results.csv", sep = ";", encoding = "UTF-8", header=TRUE, check.names=FALSE)
results.pca <- prcomp(results[,c(3:7)],
center = TRUE,
scale. = TRUE)
#grouping by organism
results.organism <- results[, 1]
#by species
results.species <- results[, 2]
summary(results.pca)
library(ggplot2)
library(ggbiplot)
ggbiplot(results.pca, alpha=0, obs.scale = 1, var.scale = 1 ,ellipse = TRUE,ellipse.prob=0.68, circle = F, varname.size=0,
var.axes = F, groups=results$species) +
theme_bw()+
geom_point(aes( colour=factor(results.species)), size=2)+
scale_shape_manual(values= c("Mt1"= 1, "Mt2" =2, "Al1"= 1, "Al2" =2, "Bg1" =1, "Bg2"=2, "Bg3" =3, "Cs1"= 1, "Cs2" =2, "Cs3" =3, "Df1"= 1, "Df2" =2, "Df3" =3))+
#scale_color_brewer(name= "organism", type = "qual", palette = 2)+
#scale_x_continuous (limits = c (-1, 9))+
theme(axis.text.x = element_text(size = 12, colour = "black", vjust = 0.5, hjust = 1, face= "bold"),
axis.title.y = element_text(size = 12, face = "bold"),
axis.title.x = element_text(size = 12, face = "bold"),
axis.text.y = element_text(colour = "black", size = 12, face = "bold"))
and that´s my data
> results
organism species lactones cyanides alcohols ethers acids
1 Mt1 Mt 23435.167 166.4 137653.9 4040.1 1131.52
2 Mt1 Mt 23303.111 168.9 153511.0 4529.1 1148.52
3 Mt1 Mt 22340.556 176.6 150719.9 3255.8 1200.88
4 Mt2 Mt 51519.222 175.9 173401.1 3890.1 1196.12
5 Mt2 Mt 48824.500 166.5 171614.4 3694.1 1132.20
6 Mt2 Mt 50427.278 165.4 168865.1 3693.2 1124.72
7 Al1 Al 25260.222 162.0 211737.4 9563.9 1101.60
8 Al1 Al 23177.556 161.5 199886.7 10403.3 1098.20
9 Al1 Al 27903.000 156.2 240088.4 11897.1 1062.16
10 Al2 Al 5993.722 180.4 289334.9 6673.3 1226.72
11 Al2 Al 7307.389 169.7 275631.1 8333.4 1153.96
12 Al2 Al 9419.167 147.5 277924.5 9622.2 1003.00
13 Bg1 Bg 58216.944 132.4 92275.3 4099.5 900.32
14 Bg1 Bg 69860.222 147.4 105654.9 4080.6 1002.32
15 Bg1 Bg 72809.333 145.8 111731.3 4014.6 991.44
16 Bg2 Bg 51584.611 142.9 105548.2 6450.1 971.72
17 Bg2 Bg 57738.056 141.2 117728.9 6332.4 960.16
18 Bg2 Bg 53356.056 142.7 110260.2 6506.2 970.36
19 Bg3 Bg 41983.389 130.8 103799.4 4781.8 889.44
20 Bg3 Bg 46930.722 148.3 113944.6 5151.6 1008.44
21 Bg3 Bg 49487.611 139.4 121976.5 5318.3 947.92
22 Cs1 Cs 7155.056 161.6 221538.8 8356.0 1098.88
23 Cs1 Cs 8153.611 151.0 179823.0 7961.2 1026.80
24 Cs1 Cs 7445.722 168.6 176978.0 8196.5 1146.48
25 Cs2 Cs 10771.556 126.4 144314.1 8634.6 859.52
26 Cs2 Cs 12239.556 142.6 142913.7 9471.9 969.68
27 Cs2 Cs 13788.611 136.1 131506.7 9390.4 925.48
28 Cs3 Cs 12082.111 152.0 171730.0 6259.6 1033.60
29 Cs3 Cs 14331.556 143.3 141748.7 7532.8 974.44
30 Cs3 Cs 14123.056 158.2 150303.0 7755.8 1075.76
31 Df1 Df 26906.778 156.2 310203.9 5505.5 1062.16
32 Df1 Df 20689.111 163.5 214322.9 5315.6 1111.80
33 Df1 Df 22872.722 154.1 197572.9 4627.7 1047.88
34 Df2 Df 18838.222 159.2 125167.6 12372.9 1082.56
35 Df2 Df 18218.667 155.8 127077.2 11182.0 1059.44
36 Df2 Df 18545.389 156.2 154400.4 10543.6 1062.16
37 Df3 Df 19924.111 156.4 199472.6 4452.3 1063.52
38 Df3 Df 22504.056 158.0 196343.0 3994.1 1074.40
39 Df3 Df 16907.278 151.5 185052.9 4084.6 1030.20
>
By the way, is it possible to have only PC1( x %) instead of PC1(x % explained var.) for the axis labeling?
One approach to achieve your desired result would be to first create shape and color palettes which map organism names to shapes and colors. Second, inside your geom_point
extend the data by adding a column with the organism
for which I use dplyr::bind_cols
. Doing so allows to map the organism
on the shape
and the color
aes. Finally, get rid of the color legend for the groups using scale_color_discrete(guide = "none")
and add a second color scale via ggnewscale::new_scale_color
and a scale_color_manual
:
Note: Easy fix for the axis titles would be to set them manually using +labs(x = ..., y = ...)
.
library(ggplot2)
library(ggbiplot)
pal_shape <- gsub("^.*?(.)$", "\\1", results$organism)
pal_shape <- scales::shape_pal()(3)[as.integer(pal_shape)]
names(pal_shape) <- results$organism
pal_color <- gsub("^(.*?).$", "\\1", results$organism)
pal_color <- setNames(scales::hue_pal()(5), sort(unique(results$species)))[pal_color]
names(pal_color) <- results$organism
ggbiplot(results.pca,
alpha = 0, obs.scale = 1, var.scale = 1, ellipse = TRUE, ellipse.prob = 0.68, circle = F, varname.size = 0,
var.axes = F, groups = results$species
) +
scale_color_discrete(guide = "none") +
ggnewscale::new_scale_color() +
geom_point(data = ~ dplyr::bind_cols(.x, organism = results$organism),
aes(shape = organism, colour = organism),
size = 2) +
scale_shape_manual(values = pal_shape) +
scale_color_manual(values = pal_color) +
theme_bw() +
theme(
axis.text.x = element_text(size = 12, colour = "black", vjust = 0.5, hjust = 1, face = "bold"),
axis.title.y = element_text(size = 12, face = "bold"),
axis.title.x = element_text(size = 12, face = "bold"),
axis.text.y = element_text(colour = "black", size = 12, face = "bold")
)
DATA
results <- structure(list(organism = c(
"Mt1", "Mt1", "Mt1", "Mt2", "Mt2",
"Mt2", "Al1", "Al1", "Al1", "Al2", "Al2", "Al2", "Bg1", "Bg1",
"Bg1", "Bg2", "Bg2", "Bg2", "Bg3", "Bg3", "Bg3", "Cs1", "Cs1",
"Cs1", "Cs2", "Cs2", "Cs2", "Cs3", "Cs3", "Cs3", "Df1", "Df1",
"Df1", "Df2", "Df2", "Df2", "Df3", "Df3", "Df3"
), species = c(
"Mt",
"Mt", "Mt", "Mt", "Mt", "Mt", "Al", "Al", "Al", "Al", "Al", "Al",
"Bg", "Bg", "Bg", "Bg", "Bg", "Bg", "Bg", "Bg", "Bg", "Cs", "Cs",
"Cs", "Cs", "Cs", "Cs", "Cs", "Cs", "Cs", "Df", "Df", "Df", "Df",
"Df", "Df", "Df", "Df", "Df"
), lactones = c(
23435.167, 23303.111,
22340.556, 51519.222, 48824.5, 50427.278, 25260.222, 23177.556,
27903, 5993.722, 7307.389, 9419.167, 58216.944, 69860.222, 72809.333,
51584.611, 57738.056, 53356.056, 41983.389, 46930.722, 49487.611,
7155.056, 8153.611, 7445.722, 10771.556, 12239.556, 13788.611,
12082.111, 14331.556, 14123.056, 26906.778, 20689.111, 22872.722,
18838.222, 18218.667, 18545.389, 19924.111, 22504.056, 16907.278
), cyanides = c(
166.4, 168.9, 176.6, 175.9, 166.5, 165.4, 162,
161.5, 156.2, 180.4, 169.7, 147.5, 132.4, 147.4, 145.8, 142.9,
141.2, 142.7, 130.8, 148.3, 139.4, 161.6, 151, 168.6, 126.4,
142.6, 136.1, 152, 143.3, 158.2, 156.2, 163.5, 154.1, 159.2,
155.8, 156.2, 156.4, 158, 151.5
), alcohols = c(
137653.9, 153511,
150719.9, 173401.1, 171614.4, 168865.1, 211737.4, 199886.7, 240088.4,
289334.9, 275631.1, 277924.5, 92275.3, 105654.9, 111731.3, 105548.2,
117728.9, 110260.2, 103799.4, 113944.6, 121976.5, 221538.8, 179823,
176978, 144314.1, 142913.7, 131506.7, 171730, 141748.7, 150303,
310203.9, 214322.9, 197572.9, 125167.6, 127077.2, 154400.4, 199472.6,
196343, 185052.9
), ethers = c(
4040.1, 4529.1, 3255.8, 3890.1,
3694.1, 3693.2, 9563.9, 10403.3, 11897.1, 6673.3, 8333.4, 9622.2,
4099.5, 4080.6, 4014.6, 6450.1, 6332.4, 6506.2, 4781.8, 5151.6,
5318.3, 8356, 7961.2, 8196.5, 8634.6, 9471.9, 9390.4, 6259.6,
7532.8, 7755.8, 5505.5, 5315.6, 4627.7, 12372.9, 11182, 10543.6,
4452.3, 3994.1, 4084.6
), acids = c(
1131.52, 1148.52, 1200.88,
1196.12, 1132.2, 1124.72, 1101.6, 1098.2, 1062.16, 1226.72, 1153.96,
1003, 900.32, 1002.32, 991.44, 971.72, 960.16, 970.36, 889.44,
1008.44, 947.92, 1098.88, 1026.8, 1146.48, 859.52, 969.68, 925.48,
1033.6, 974.44, 1075.76, 1062.16, 1111.8, 1047.88, 1082.56, 1059.44,
1062.16, 1063.52, 1074.4, 1030.2
)), class = "data.frame", row.names = c(
"1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
"36", "37", "38", "39"
))