I have a shapefile and a dataframe that I downloaded from open source site. By joining them, the data does not seem to match any more with the polygon.
library(sf)
library(leaflet)
library(dplyr)
library(leaflet.extras)
#Create a dataframe from https://www.data.gouv.fr/fr/datasets/r/576fc1c9-ec6e-44bd-817f-f21bc9ebf3a3
raw_data <- read.csv("fr-en-dnb-par-etablissement.csv",sep=";") %>%
filter(ï..session == 2021)%>%
filter(code_departement == "075")%>%
select(patronyme,taux_de_reussite)
#Create a shapefile from https://parisdata.opendatasoft.com/api/explore/v2.1/catalog/datasets/secteurs-scolaires-colleges/exports/shp?lang=fr&timezone=Europe%2FBerlin
my_shapefile <- st_read("secteurs-scolaires-colleges")
#Create a SpatialPolygon object and a copy to be able to compare
spdf <- as_Spatial(my_shapefile)
spdf_copy <- as_Spatial(my_shapefile)
#Join data from the dataframe to one of the SpatialPolygondataframe and print the map
spdffiltered <- spdf[spdf$annee_scol == "2023-2024",]
spdffiltered@data <- spdffiltered@data %>%
left_join(raw_data, by = c("libelle" = "patronyme"))
nc_pal1 <- colorFactor("magma", domain = spdffiltered@data$etiquette)
spdffiltered %>%
leaflet() %>%
addProviderTiles("CartoDB") %>%
addPolygons(
weight = 1,
color = ~nc_pal1(etiquette),
label = ~paste0("Area : ", etiquette),
highlightOptions = highlightOptions(weight = 5, color = "white", bringToFront = TRUE)
)
#Print the map of the other SpatialPolygon object without joining
spdffiltered <- spdf_copy[ spdf_copy$annee_scol == "2023-2024",]
nc_pal1 <- colorFactor("magma", domain = spdffiltered@data$etiquette)
spdffiltered %>%
leaflet() %>%
addProviderTiles("CartoDB") %>%
addPolygons(
weight = 1,
color = ~nc_pal1(etiquette),
label = ~paste0("Area : ", etiquette),
highlightOptions = highlightOptions(weight = 5, color = "white", bringToFront = TRUE)
)
On the map without joining, I have for example "BUFFON" at the position showed on the image which is right.
On the map with joining, I have "JEAN-BAPTISTE SAY" at the same position showed on the image which is wrong.
And we can find the same mismatches on some other positions (but not all).
Could you please help me to understand and correct it?
It's because after left_join the data
slot in SpatialPolygons are shifted/mixed somehow.
#Create a SpatialPolygon object and a copy to be able to compare
spdf <- as_Spatial(my_shapefile)
spdf_copy <- as_Spatial(my_shapefile)
#Join data from the dataframe to one of the SpatialPolygondataframe and print the map
spdffiltered <- spdf[spdf$annee_scol == "2023-2024",]
which(spdffiltered@data$libelle == "BUFFON")
#> [1] 46
Till now BUFFON data is in row 46, and corresponding polygon is in row 46. However after join:
spdffiltered@data <- spdffiltered@data %>%
left_join(raw_data, by = c("libelle" = "patronyme"))
which(spdffiltered@data$libelle == "BUFFON")
#> [1] 47
But the polygon remained in row 46. You may need to sort them appropriately after the join.
There is no need to convert it to SpatialPolygons
just for joining/subseting, it can be done within {sf} directly:
library(sf)
library(leaflet)
library(dplyr)
library(leaflet.extras)
raw_data <- read.csv("fr-en-dnb-par-etablissement.csv",sep=";") %>%
filter(session == 2021)%>%
filter(code_departement == "075")%>%
select(patronyme,taux_de_reussite)
my_shapefile <- st_read("secteurs-scolaires-colleges.shp")
spdffiltered <- my_shapefile |>
subset(annee_scol == "2023-2024")
spdffiltered %>%
leaflet() %>%
addProviderTiles("CartoDB") %>%
addPolygons(
weight = 1,
color = ~nc_pal1(etiquette),
label = ~paste0("Area : ", etiquette),
highlightOptions = highlightOptions(weight = 5, color = "white", bringToFront = TRUE)
)
Now let's join the data and check geometry for BUFFON in both datasets/frames:
spdffiltered_with_join <- spdffiltered |>
dplyr::left_join(raw_data, by = c("libelle" = "patronyme"))
sf::st_geometry(spdffiltered_with_join[spdffiltered_with_join$libelle == "BUFFON", ,]) ==
sf::st_geometry(spdffiltered[spdffiltered$libelle == "BUFFON", ,])
#> [1] TRUE
spdffiltered_with_join %>%
leaflet() %>%
addProviderTiles("CartoDB") %>%
addPolygons(
weight = 1,
color = ~nc_pal1(etiquette),
label = ~paste0("Area : ", etiquette, " / percentage: ", taux_de_reussite),
highlightOptions = highlightOptions(weight = 5, color = "white", bringToFront = TRUE)
)
Created on 2024-10-10 with reprex v2.1.0