rpcafactoextra

Customise colour and shape of points in PCA using different qualitative properties


I'm quite new to R - I want to colour code my points according to qualitative variable called "Fraction" which I have achieved, however, simultaneously I also want to change the shape of the points based on a different qualitative variable called "Landuse"

Currently I have:

fviz_pca_biplot(SXR_PCA,
                label="var",
                habillage = SXR_input$Fraction,
                pointshape = 19, 
                col.var = "black",
                addEllipses = TRUE, ellipse.type = "norm",
                legend.title = "Fraction",
                mean.point = FALSE,
                ggtheme = theme_classic()) +
  ggtitle("")+
  scale_color_manual(values=c("#782ED1","#D1782E","#2ED178"))

Ive been able to change shape based on the "Fraction" variable, and have been able to change all points to the same shape, but struggling to find anything that enables me to change shape based on a different variable


Solution

  • I think you need to add your own geom_point() in order to change the shapes, e.g.

    library(factoextra)
    #> Loading required package: ggplot2
    #> Welcome! Want to learn more? See two factoextra-related books
    
    data(iris)
    res.pca <- prcomp(iris[, -5],  scale = TRUE)
    
    # add a new variable
    iris$Landuse <- c(rep(paste("var", 1:5, sep = "_"), each = 30))
    head(iris)
    #>   Sepal.Length Sepal.Width Petal.Length Petal.Width Species Landuse
    #> 1          5.1         3.5          1.4         0.2  setosa   var_1
    #> 2          4.9         3.0          1.4         0.2  setosa   var_1
    #> 3          4.7         3.2          1.3         0.2  setosa   var_1
    #> 4          4.6         3.1          1.5         0.2  setosa   var_1
    #> 5          5.0         3.6          1.4         0.2  setosa   var_1
    #> 6          5.4         3.9          1.7         0.4  setosa   var_1
    tail(iris)
    #>     Sepal.Length Sepal.Width Petal.Length Petal.Width   Species Landuse
    #> 145          6.7         3.3          5.7         2.5 virginica   var_5
    #> 146          6.7         3.0          5.2         2.3 virginica   var_5
    #> 147          6.3         2.5          5.0         1.9 virginica   var_5
    #> 148          6.5         3.0          5.2         2.0 virginica   var_5
    #> 149          6.2         3.4          5.4         2.3 virginica   var_5
    #> 150          5.9         3.0          5.1         1.8 virginica   var_5
    
    fviz_pca_biplot(res.pca,
                    label="var",
                    habillage = iris$Species,
                    pointshape = NA, 
                    col.var = "black",
                    addEllipses = TRUE, ellipse.type = "norm",
                    legend.title = "Fraction",
                    mean.point = FALSE,
                    ggtheme = theme_classic()) +
      ggtitle("")+
      geom_point(aes(shape = iris$Landuse)) +
      scale_color_manual(values=c("#782ED1","#D1782E","#2ED178")) +
      scale_shape_manual(name = "Landuse", values = c("var_1" = 2, "var_2" = 3,
                                                      "var_3" = 4, "var_4" = 5,
                                                      "var_5" = 6))
    #> Warning: Removed 150 rows containing missing values (`geom_point()`).
    

    Created on 2024-02-21 with reprex v2.1.0