I need to measure the distance between points from the start to the end of a LINESTRING
or MULTILINESTRING
. Currently, I am splitting the LINESTRING
into individual segments and measuring the length of each one. It works, but it takes a lot of time. Is there any built-in method that exists?
Below is a reprex using the Seine river dataset from spData
and the sf
and geos
packages. A solution using terra
or rsgeo
is also of interest.
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
library(geos)
library(spData)
#> To access larger datasets in this package, install the spDataLarge
#> package with: `install.packages('spDataLarge',
#> repos='https://nowosad.github.io/drat/', type='source')`
# Example
lines <- seine[2, ]
# My foo
cumulative_length <-
function(input) {
# Save CRS
crs <- sf::st_crs(input)
# Retrive coordinates
lines_coo <-
sf::st_coordinates(input)
# Count number of segments of linestring
n <- nrow(lines_coo) - 1
# Pre-allocate a list
lines_geos <-
vector(mode = "list", length = n)
# Construct linestrings
for (i in 1:n) {
lines_geos[[i]] <-
geos::geos_make_linestring(lines_coo[i:(i+1),1],
lines_coo[i:(i+1),2],
crs = crs)
}
# Measure cumulative segment length
lines_order <-
sapply(lines_geos, geos::geos_length) |>
append(0, 0) |>
cumsum()
return(lines_order)
}
bench::mark(cumulative_length(lines))
#> # A tibble: 1 × 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 cumulative_length(lines) 13ms 13.7ms 72.9 244KB 109.
Created on 2024-03-23 with reprex v2.0.2
The desired output is a monotonically increasing numeric vector of length 0
to st_length(lines)
:
cumulative_length(lines) |>
head()
#> [1] 0.000 1716.196 3290.379 4824.087 6745.759 7446.660
Profiling your function, most of time is used by geos_make_linestring
. So, assuming you can transform your data to a planar coordinates (UTM), you can skip that by calculating directly the distance between two points:
library(sf)
library(geos)
library(spData)
# Example
input <- seine[2, ] |>
st_transform(23032)
# My foo
cumulative_length <- function(input) {
# Save CRS
crs <- sf::st_crs(input)
# Retrive coordinates
lines_coo <-
sf::st_coordinates(input)
# Count number of segments of linestring
n <- nrow(lines_coo) - 1
# Pre-allocate a list
lines_geos <-
vector(mode = "list", length = n)
# Construct linestrings
for (i in 1:n) {
lines_geos[[i]] <-
geos::geos_make_linestring(lines_coo[i:(i+1),1],
lines_coo[i:(i+1),2],
crs = crs)
}
# Measure cumulative segment length
lines_order <-
sapply(lines_geos, geos::geos_length) |>
append(0, 0) |>
cumsum()
return(lines_order)
}
cartesian_dist <- function(x1, y1, x2, y2) {
# √[(x2 − x1)2 + (y2 − y1)2]
xmax <- max(x1, x2)
xmin <- min(x1, x2)
ymax <- max(y1, y2)
ymin <- min(y1, y2)
sqrt((xmax - xmin)^2 + (ymax - ymin)^2)
}
cumulative_length2 <- function(input) {
# Retrive coordinates
lines_coo <- sf::st_coordinates(input)
# Count number of segments of linestring
n <- nrow(lines_coo) - 1
# Pre-allocate a list
dists <- numeric(n)
# Construct linestrings
for (i in 1:n) {
dists[[i]] <- cartesian_dist(lines_coo[i, 1], lines_coo[i, 2], lines_coo[i+1, 1], lines_coo[i+1, 2])
}
# Measure cumulative segment length
c(0, cumsum(dists))
}
microbenchmark::microbenchmark(
cumulative_length(input),
cumulative_length2(input),
times = 100
)
#> Unit: milliseconds
#> expr min lq mean median uq max
#> cumulative_length(input) 22.6733 23.5284 27.012571 24.42025 28.93890 53.6292
#> cumulative_length2(input) 2.5730 2.7048 3.484514 2.82825 2.98925 21.8944
#> neval
#> 100
#> 100
Created on 2024-03-23 with reprex v2.1.0
In case you can't convert to planar coordinates, you can also use the WGS84 projection and use the Haversine equation to calculate the distance between points instead of cartesian distance:
haversine_dist <- function(lon1, lat1, lon2, lat2){
R <- 6371e3 # metres
phi1 <- lat1 * pi/180 # radians
phi2 = lat2 * pi/180 # radians
delta_phi = (lat2-lat1) * pi/180 # radians
delta_lambda = (lon2-lon1) * pi/180
a <- sin(delta_phi/2) * sin(delta_phi/2) + cos(phi1) * cos(phi2) * sin(delta_lambda/2) * sin(delta_lambda/2)
c <- 2 * atan2(sqrt(a), sqrt(1-a))
R * c
}