Is there a way to completely snap a linestring to polygon in R?

I have 2 SHP files, one with many linestrings (railroads) and one with many polygons (counties in the U.S.). There is no intersections between those two files at all. My goal is to snap the lines to the nearest polygon border in order to make intersections and calculte the length of the lines in each polygon.

I tried to do it using st_snap from sf package, but I don't get my expected results. This led me to examine this function on some simple data and I got these results:

library(dplyr)
library(tidyverse)
library(sf)
library(units)
library(ggplot2)

poly = st_polygon(list(cbind(c(0, 0, 1, 1, 0), c(0, 1, 1, 0, 0))))
lines = st_multilinestring(list(
 cbind(c(0, 1), c(1.01, 1.05)),
 cbind(c(0, 1), c(-0.01, -.05)),
 cbind(c(1.025, 1.05, 1.025), c(1.05, .5, -.05))
))
t <- st_intersection(poly, lines)
st_is_empty(t)
# [1] TRUE

ggplot()+
    geom_sf(data = lines, col = "green")+
    geom_sf(data = poly, col = "red", fill = NA)

My expected result after snapping would be:

But what I get is:

for tolerance=0.5

snapped1 = st_snap(lines, poly, tolerance=0.5)
print(snapped1)
# MULTILINESTRING ((0 1, 1 1), (0 0, 1 0), (1 1, 1.05 0.5, 1 0))

ggplot()+
    geom_sf(data = lines, col = "green")+
    geom_sf(data = poly, col = "red", fill = NA)+
    geom_sf(data = snapped1, col = "blue", alpha = 0.5)

for tolerance=1.005

snapped2 = st_snap(lines, poly, tolerance=1.005)
print(snapped2)
# MULTILINESTRING ((1 1, 0 1, 0 0, 1 0), (0 1, 1 1, 1 0, 0 0), (0 1, 1 1, 1.05 0.5, 1 0, 0 0))

Which contains the expected result: (0 1, 1 1, 1 0, 0 0), but also unwanted lines.

ggplot()+
    geom_sf(data = lines, col = "green")+
    geom_sf(data = poly, col = "red", fill = NA)+
    geom_sf(data = snapped2, col = "blue", alpha = 0.5)

for tolerance=1.1

snapped3 = st_snap(lines, poly, tolerance=1.1)
print(snapped3)
# MULTILINESTRING ((1 1, 0 1, 0 0, 1 0), (0 1, 1 1, 1 0, 0 0), (1 1, 1 0, 0 0, 0 1))

Which also contains the expected result: (0 1, 1 1, 1 0, 0 0), but unwanted lines as well.

ggplot()+
    geom_sf(data = lines, col = "green")+
    geom_sf(data = poly, col = "red", fill = NA)+
    geom_sf(data = snapped3, col = "blue", alpha = 0.5)

For any bigger tolerance the result is the same as the third one. There is obviously something I don't understand about the way that this function works. I tried to look into the documentation but I could not find anything useful. I would appreciate any insights about this and how to get my expected results whether with this function or some other function.

Thanks.

Solution

st_snap snaps the vertices and segments of a geometry to another geometry's vertices.
[ ?sf::st_snap ]

If your polygons have long(ish) straight sections, you most likely need to add more points (vertices) so the line feature vertices and segments would have something to snap to. And it can be somewhat tricky to figure out optimal balance between snapping tolerance and the required point density.

With this reprex, st_segmentize(..., dfMaxLength = .2)and st_snap(..., tolerance = .18) (i.e. slightly less than dfMaxLength to avoid wrong snaps) should do:

library(sf)
#> Linking to GEOS 3.11.2, GDAL 3.6.2, PROJ 9.2.0; sf_use_s2() is TRUE
library(ggplot2)
library(dplyr, warn.conflicts = FALSE)

poly <- st_polygon(list(cbind(c(0, 0, 1, 1, 0), c(0, 1, 1, 0, 0))))
st_as_text(poly)
#> [1] "POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))"

# add points to straight lines
poly_2 <- st_segmentize(poly, dfMaxLength = .2)
st_as_text(poly_2)
#> [1] "POLYGON ((0 0, 0 0.2, 0 0.4, 0 0.6, 0 0.8, 0 1, 0.2 1, 0.4 1, 0.6 1, 0.8 1, 1 1, 1 0.8, 1 0.6, 1 0.4, 1 0.2, 1 0, 0.8 0, 0.6 0, 0.4 0, 0.2 0, 0 0))"

lines <- st_multilinestring(list(
  cbind(c(0, 1), c(1.01, 1.05)),
  cbind(c(0, 1), c(-0.01, -.05)),
  cbind(c(1.025, 1.05, 1.025), c(1.05, .5, -.05))
))

snapped <- st_snap(lines, poly_2, tolerance = .18)
st_as_text(snapped)
#> [1] "MULTILINESTRING ((0 1, 0.2 1, 0.4 1, 0.6 1, 0.8 1, 1 1), (0 0, 0.2 0, 0.4 0, 0.6 0, 0.8 0, 1 0), (1 1, 1 0.8, 1 0.6, 1 0.4, 1 0.2, 1 0))"

ggplot() +
  geom_sf(data = st_cast(poly_2,"LINESTRING"), aes(color = "poly"), fill = NA, linewidth = 3) +
  geom_sf(data = st_cast(poly_2, "MULTIPOINT"), aes(color = "poly"), size = 6) +
  geom_sf(data = lines, aes(color = "lines"), linewidth = 2, alpha = .8) +
  geom_sf(data = st_cast(lines, "MULTIPOINT"), aes(color = "lines"), size = 4, alpha = .8) +
  geom_sf(data = snapped, aes(color = "snapped"), linewidth = 1, alpha = .8) +
  geom_sf(data = st_cast(snapped, "MULTIPOINT"), aes(color = "snapped"), size = 2, alpha = .8) +
  scale_color_viridis_d() +
  theme_minimal()

-e- Spatial join with nearest feature predicate

Depending on how features in railroad network dataset are organized, spatial join with nearest feature predicate ( st_join(... , join = st_nearest_feature) ) might be a better approach to tackle that original problem. However, keep an eye on somewhat ambiguous cases, like B & D in the following example.

poly_sf <- 
  st_make_grid(poly, n = 2) |>
  st_sf(geometry = _) |>
  mutate(name = LETTERS[row_number()] |> as.factor())
poly_sf
#> Simple feature collection with 4 features and 1 field
#> Geometry type: POLYGON
#> Dimension:     XY
#> Bounding box:  xmin: 0 ymin: 0 xmax: 1 ymax: 1
#> CRS:           NA
#>                         geometry name
#> 1 POLYGON ((0 0, 0.5 0, 0.5 0...    A
#> 2 POLYGON ((0.5 0, 1 0, 1 0.5...    B
#> 3 POLYGON ((0 0.5, 0.5 0.5, 0...    C
#> 4 POLYGON ((0.5 0.5, 1 0.5, 1...    D

lines_sf <- 
  st_sfc(lines) |> 
  st_sf(geometry = _) |>
  st_cast("LINESTRING")
lines_sf
#> Simple feature collection with 3 features and 0 fields
#> Geometry type: LINESTRING
#> Dimension:     XY
#> Bounding box:  xmin: 0 ymin: -0.05 xmax: 1.05 ymax: 1.05
#> CRS:           NA
#>                         geometry
#> 1    LINESTRING (0 1.01, 1 1.05)
#> 2  LINESTRING (0 -0.01, 1 -0.05)
#> 3 LINESTRING (1.025 1.05, 1.0...

lines_w_names <- 
  st_join(lines_sf, poly_sf, join = st_nearest_feature)
lines_w_names
#> Simple feature collection with 3 features and 1 field
#> Geometry type: LINESTRING
#> Dimension:     XY
#> Bounding box:  xmin: 0 ymin: -0.05 xmax: 1.05 ymax: 1.05
#> CRS:           NA
#>   name                       geometry
#> 1    C    LINESTRING (0 1.01, 1 1.05)
#> 2    A  LINESTRING (0 -0.01, 1 -0.05)
#> 3    D LINESTRING (1.025 1.05, 1.0...

ggplot() +
  geom_sf(data = poly_sf, aes(fill = name), show.legend = FALSE) +
  geom_sf_label(data = poly_sf, aes(label = name), alpha = .8) +
  geom_sf(data = lines_w_names, aes(color = name),  show.legend = FALSE, linewidth = 2) +
  geom_sf_label(data = lines_w_names, aes(label = name), alpha = .8) +
  scale_fill_viridis_d() +
  scale_color_viridis_d() +
  theme_minimal()

^{Created on 2024-04-21 with reprex v2.1.0}