rgeospatialdistancer-sfnearest-neighbor

Mean of geospatial distance between two datasets of different lengths


I want to compute the mean distance between all points of the toy1 dataset with the closest point of the toy2 dataset.

So for each point of the toy1 dataset, I want to find the closest point among the points of the toy2 dataset, and then compute a mean with all the distances.

I want the distances to be in meters.

enter image description here

the structure of toy1 dataset :

structure(list(random = c(0.424676549155265, 7.62225863989443, 
2.49231160385534, 8.09510331135243, 9.83170835534111, 0.697977161034942
), geometry = structure(list(structure(c(1219790.00733145, 6050277.88033381
), class = c("XY", "POINT", "sfg")), structure(c(1217790.00733145, 
6050777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1218290.00733145, 
6050777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1219290.00733145, 
6050777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1219790.00733145, 
6050777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1220290.00733145, 
6050777.88033381), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT", 
"sfc"), precision = 0, bbox = structure(c(xmin = 1217790.00733145, 
ymin = 6050277.88033381, xmax = 1220290.00733145, ymax = 6050777.88033381
), class = "bbox"), crs = structure(list(input = NA_character_, 
    wkt = "PROJCS[\"RGF_1993_Lambert_93\",\n    GEOGCS[\"GCS_RGF_1993\",\n        DATUM[\"Reseau_Geodesique_Francais_1993\",\n            SPHEROID[\"GRS_1980\",6378137.0,298.257222101]],\n        PRIMEM[\"Greenwich\",0.0],\n        UNIT[\"Degree\",0.0174532925199433]],\n    PROJECTION[\"Lambert_Conformal_Conic_2SP\"],\n    PARAMETER[\"False_Easting\",700000.0],\n    PARAMETER[\"False_Northing\",6600000.0],\n    PARAMETER[\"Central_Meridian\",3.0],\n    PARAMETER[\"Standard_Parallel_1\",49.0],\n    PARAMETER[\"Standard_Parallel_2\",44.0],\n    PARAMETER[\"Latitude_Of_Origin\",46.5],\n    UNIT[\"Meter\",1.0]]"), class = "crs"), n_empty = 0L), 
    long = c(1219790.00733145, 1217790.00733145, 1218290.00733145, 
    1219290.00733145, 1219790.00733145, 1220290.00733145), lat = c(6050277.88033381, 
    6050777.88033381, 6050777.88033381, 6050777.88033381, 6050777.88033381, 
    6050777.88033381)), row.names = c(NA, -6L), sf_column = "geometry", agr = structure(c(random = NA_integer_, 
long = NA_integer_, lat = NA_integer_), .Label = c("constant", 
"aggregate", "identity"), class = "factor"), class = c("sf", 
"tbl_df", "tbl", "data.frame"))

The structure of toy2 dataset :

structure(list(random = c(8.91735465731472, 1.18594053201377, 
3.9330403204076, 9.10003933124244, 9.85382732702419, 7.3299086955376, 
7.75479080388322, 0.621909373439848, 9.07300168415532, 2.00036991853267
), geometry = structure(list(structure(c(1215290.00733145, 6053277.88033381
), class = c("XY", "POINT", "sfg")), structure(c(1212790.00733145, 
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1216790.00733145, 
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1217290.00733145, 
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1217790.00733145, 
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1218290.00733145, 
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1219290.00733145, 
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1215790.00733145, 
6054277.88033381), class = c("XY", "POINT", "sfg")), structure(c(1216790.00733145, 
6054277.88033381), class = c("XY", "POINT", "sfg")), structure(c(1221290.00733145, 
6054277.88033381), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT", 
"sfc"), precision = 0, bbox = structure(c(xmin = 1212790.00733145, 
ymin = 6053277.88033381, xmax = 1221290.00733145, ymax = 6054277.88033381
), class = "bbox"), crs = structure(list(input = NA_character_, 
    wkt = "PROJCS[\"RGF_1993_Lambert_93\",\n    GEOGCS[\"GCS_RGF_1993\",\n        DATUM[\"Reseau_Geodesique_Francais_1993\",\n            SPHEROID[\"GRS_1980\",6378137.0,298.257222101]],\n        PRIMEM[\"Greenwich\",0.0],\n        UNIT[\"Degree\",0.0174532925199433]],\n    PROJECTION[\"Lambert_Conformal_Conic_2SP\"],\n    PARAMETER[\"False_Easting\",700000.0],\n    PARAMETER[\"False_Northing\",6600000.0],\n    PARAMETER[\"Central_Meridian\",3.0],\n    PARAMETER[\"Standard_Parallel_1\",49.0],\n    PARAMETER[\"Standard_Parallel_2\",44.0],\n    PARAMETER[\"Latitude_Of_Origin\",46.5],\n    UNIT[\"Meter\",1.0]]"), class = "crs"), n_empty = 0L), 
    long = c(1215290.00733145, 1212790.00733145, 1216790.00733145, 
    1217290.00733145, 1217790.00733145, 1218290.00733145, 1219290.00733145, 
    1215790.00733145, 1216790.00733145, 1221290.00733145), lat = c(6053277.88033381, 
    6053777.88033381, 6053777.88033381, 6053777.88033381, 6053777.88033381, 
    6053777.88033381, 6053777.88033381, 6054277.88033381, 6054277.88033381, 
    6054277.88033381)), row.names = c(NA, -10L), sf_column = "geometry", agr = structure(c(random = NA_integer_, 
long = NA_integer_, lat = NA_integer_), .Label = c("constant", 
"aggregate", "identity"), class = "factor"), class = c("sf", 
"tbl_df", "tbl", "data.frame"))

I found some topics related, but as the two datasets do not have the same number of points I got errors in the functions. The function distHaversine from geospatial package seems to require the same number of points in the two arguments...


Solution

  • one approach:

    library(dplyr)
    library(sf)
    
    toy_1_with_distances <- 
      toy_1 |>
      rowwise() |>
      mutate(closest_point_row_toy_2 = st_nearest_feature(geometry, toy_2),
             closest_point = toy_2[closest_point_row_toy_2,],
             dist_to_closest = st_distance(geometry, closest_point)
             ) 
    
    > toy1_with_distances |> pull(dist_to_closest) |> mean()
    3123.199 [m]