I want to compute the mean distance between all points of the toy1 dataset with the closest point of the toy2 dataset.
So for each point of the toy1 dataset, I want to find the closest point among the points of the toy2 dataset, and then compute a mean with all the distances.
I want the distances to be in meters.
the structure of toy1 dataset :
structure(list(random = c(0.424676549155265, 7.62225863989443,
2.49231160385534, 8.09510331135243, 9.83170835534111, 0.697977161034942
), geometry = structure(list(structure(c(1219790.00733145, 6050277.88033381
), class = c("XY", "POINT", "sfg")), structure(c(1217790.00733145,
6050777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1218290.00733145,
6050777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1219290.00733145,
6050777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1219790.00733145,
6050777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1220290.00733145,
6050777.88033381), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 1217790.00733145,
ymin = 6050277.88033381, xmax = 1220290.00733145, ymax = 6050777.88033381
), class = "bbox"), crs = structure(list(input = NA_character_,
wkt = "PROJCS[\"RGF_1993_Lambert_93\",\n GEOGCS[\"GCS_RGF_1993\",\n DATUM[\"Reseau_Geodesique_Francais_1993\",\n SPHEROID[\"GRS_1980\",6378137.0,298.257222101]],\n PRIMEM[\"Greenwich\",0.0],\n UNIT[\"Degree\",0.0174532925199433]],\n PROJECTION[\"Lambert_Conformal_Conic_2SP\"],\n PARAMETER[\"False_Easting\",700000.0],\n PARAMETER[\"False_Northing\",6600000.0],\n PARAMETER[\"Central_Meridian\",3.0],\n PARAMETER[\"Standard_Parallel_1\",49.0],\n PARAMETER[\"Standard_Parallel_2\",44.0],\n PARAMETER[\"Latitude_Of_Origin\",46.5],\n UNIT[\"Meter\",1.0]]"), class = "crs"), n_empty = 0L),
long = c(1219790.00733145, 1217790.00733145, 1218290.00733145,
1219290.00733145, 1219790.00733145, 1220290.00733145), lat = c(6050277.88033381,
6050777.88033381, 6050777.88033381, 6050777.88033381, 6050777.88033381,
6050777.88033381)), row.names = c(NA, -6L), sf_column = "geometry", agr = structure(c(random = NA_integer_,
long = NA_integer_, lat = NA_integer_), .Label = c("constant",
"aggregate", "identity"), class = "factor"), class = c("sf",
"tbl_df", "tbl", "data.frame"))
The structure of toy2 dataset :
structure(list(random = c(8.91735465731472, 1.18594053201377,
3.9330403204076, 9.10003933124244, 9.85382732702419, 7.3299086955376,
7.75479080388322, 0.621909373439848, 9.07300168415532, 2.00036991853267
), geometry = structure(list(structure(c(1215290.00733145, 6053277.88033381
), class = c("XY", "POINT", "sfg")), structure(c(1212790.00733145,
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1216790.00733145,
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1217290.00733145,
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1217790.00733145,
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1218290.00733145,
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1219290.00733145,
6053777.88033381), class = c("XY", "POINT", "sfg")), structure(c(1215790.00733145,
6054277.88033381), class = c("XY", "POINT", "sfg")), structure(c(1216790.00733145,
6054277.88033381), class = c("XY", "POINT", "sfg")), structure(c(1221290.00733145,
6054277.88033381), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 1212790.00733145,
ymin = 6053277.88033381, xmax = 1221290.00733145, ymax = 6054277.88033381
), class = "bbox"), crs = structure(list(input = NA_character_,
wkt = "PROJCS[\"RGF_1993_Lambert_93\",\n GEOGCS[\"GCS_RGF_1993\",\n DATUM[\"Reseau_Geodesique_Francais_1993\",\n SPHEROID[\"GRS_1980\",6378137.0,298.257222101]],\n PRIMEM[\"Greenwich\",0.0],\n UNIT[\"Degree\",0.0174532925199433]],\n PROJECTION[\"Lambert_Conformal_Conic_2SP\"],\n PARAMETER[\"False_Easting\",700000.0],\n PARAMETER[\"False_Northing\",6600000.0],\n PARAMETER[\"Central_Meridian\",3.0],\n PARAMETER[\"Standard_Parallel_1\",49.0],\n PARAMETER[\"Standard_Parallel_2\",44.0],\n PARAMETER[\"Latitude_Of_Origin\",46.5],\n UNIT[\"Meter\",1.0]]"), class = "crs"), n_empty = 0L),
long = c(1215290.00733145, 1212790.00733145, 1216790.00733145,
1217290.00733145, 1217790.00733145, 1218290.00733145, 1219290.00733145,
1215790.00733145, 1216790.00733145, 1221290.00733145), lat = c(6053277.88033381,
6053777.88033381, 6053777.88033381, 6053777.88033381, 6053777.88033381,
6053777.88033381, 6053777.88033381, 6054277.88033381, 6054277.88033381,
6054277.88033381)), row.names = c(NA, -10L), sf_column = "geometry", agr = structure(c(random = NA_integer_,
long = NA_integer_, lat = NA_integer_), .Label = c("constant",
"aggregate", "identity"), class = "factor"), class = c("sf",
"tbl_df", "tbl", "data.frame"))
I found some topics related, but as the two datasets do not have the same number of points I got errors in the functions. The function distHaversine from geospatial package seems to require the same number of points in the two arguments...
one approach:
library(dplyr)
library(sf)
toy_1_with_distances <-
toy_1 |>
rowwise() |>
mutate(closest_point_row_toy_2 = st_nearest_feature(geometry, toy_2),
closest_point = toy_2[closest_point_row_toy_2,],
dist_to_closest = st_distance(geometry, closest_point)
)
> toy1_with_distances |> pull(dist_to_closest) |> mean()
3123.199 [m]