rspatialr-sfsnap

Is there a way to completely snap a linestring to polygon in R?


I have 2 SHP files, one with many linestrings (railroads) and one with many polygons (counties in the U.S.). There is no intersections between those two files at all. My goal is to snap the lines to the nearest polygon border in order to make intersections and calculte the length of the lines in each polygon.

I tried to do it using st_snap from sf package, but I don't get my expected results. This led me to examine this function on some simple data and I got these results:

library(dplyr)
library(tidyverse)
library(sf)
library(units)
library(ggplot2)

poly = st_polygon(list(cbind(c(0, 0, 1, 1, 0), c(0, 1, 1, 0, 0))))
lines = st_multilinestring(list(
 cbind(c(0, 1), c(1.01, 1.05)),
 cbind(c(0, 1), c(-0.01, -.05)),
 cbind(c(1.025, 1.05, 1.025), c(1.05, .5, -.05))
))
t <- st_intersection(poly, lines)
st_is_empty(t)
# [1] TRUE

ggplot()+
    geom_sf(data = lines, col = "green")+
    geom_sf(data = poly, col = "red", fill = NA)

Lines (green) & polygon (red)

My expected result after snapping would be:

But what I get is:

for tolerance=0.5

snapped1 = st_snap(lines, poly, tolerance=0.5)
print(snapped1)
# MULTILINESTRING ((0 1, 1 1), (0 0, 1 0), (1 1, 1.05 0.5, 1 0))

ggplot()+
    geom_sf(data = lines, col = "green")+
    geom_sf(data = poly, col = "red", fill = NA)+
    geom_sf(data = snapped1, col = "blue", alpha = 0.5)

Snpped lines in blue

for tolerance=1.005

snapped2 = st_snap(lines, poly, tolerance=1.005)
print(snapped2)
# MULTILINESTRING ((1 1, 0 1, 0 0, 1 0), (0 1, 1 1, 1 0, 0 0), (0 1, 1 1, 1.05 0.5, 1 0, 0 0))

Which contains the expected result: (0 1, 1 1, 1 0, 0 0), but also unwanted lines.

ggplot()+
    geom_sf(data = lines, col = "green")+
    geom_sf(data = poly, col = "red", fill = NA)+
    geom_sf(data = snapped2, col = "blue", alpha = 0.5)

Snpped lines in blue

for tolerance=1.1

snapped3 = st_snap(lines, poly, tolerance=1.1)
print(snapped3)
# MULTILINESTRING ((1 1, 0 1, 0 0, 1 0), (0 1, 1 1, 1 0, 0 0), (1 1, 1 0, 0 0, 0 1))

Which also contains the expected result: (0 1, 1 1, 1 0, 0 0), but unwanted lines as well.

ggplot()+
    geom_sf(data = lines, col = "green")+
    geom_sf(data = poly, col = "red", fill = NA)+
    geom_sf(data = snapped3, col = "blue", alpha = 0.5)

Snpped lines in blue

For any bigger tolerance the result is the same as the third one. There is obviously something I don't understand about the way that this function works. I tried to look into the documentation but I could not find anything useful. I would appreciate any insights about this and how to get my expected results whether with this function or some other function.

Thanks.


Solution

  • st_snap snaps the vertices and segments of a geometry to another geometry's vertices.
    [ ?sf::st_snap ]

    If your polygons have long(ish) straight sections, you most likely need to add more points (vertices) so the line feature vertices and segments would have something to snap to. And it can be somewhat tricky to figure out optimal balance between snapping tolerance and the required point density.

    With this reprex, st_segmentize(..., dfMaxLength = .2)and st_snap(..., tolerance = .18) (i.e. slightly less than dfMaxLength to avoid wrong snaps) should do:

    library(sf)
    #> Linking to GEOS 3.11.2, GDAL 3.6.2, PROJ 9.2.0; sf_use_s2() is TRUE
    library(ggplot2)
    library(dplyr, warn.conflicts = FALSE)
    
    poly <- st_polygon(list(cbind(c(0, 0, 1, 1, 0), c(0, 1, 1, 0, 0))))
    st_as_text(poly)
    #> [1] "POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))"
    
    # add points to straight lines
    poly_2 <- st_segmentize(poly, dfMaxLength = .2)
    st_as_text(poly_2)
    #> [1] "POLYGON ((0 0, 0 0.2, 0 0.4, 0 0.6, 0 0.8, 0 1, 0.2 1, 0.4 1, 0.6 1, 0.8 1, 1 1, 1 0.8, 1 0.6, 1 0.4, 1 0.2, 1 0, 0.8 0, 0.6 0, 0.4 0, 0.2 0, 0 0))"
    
    lines <- st_multilinestring(list(
      cbind(c(0, 1), c(1.01, 1.05)),
      cbind(c(0, 1), c(-0.01, -.05)),
      cbind(c(1.025, 1.05, 1.025), c(1.05, .5, -.05))
    ))
    
    snapped <- st_snap(lines, poly_2, tolerance = .18)
    st_as_text(snapped)
    #> [1] "MULTILINESTRING ((0 1, 0.2 1, 0.4 1, 0.6 1, 0.8 1, 1 1), (0 0, 0.2 0, 0.4 0, 0.6 0, 0.8 0, 1 0), (1 1, 1 0.8, 1 0.6, 1 0.4, 1 0.2, 1 0))"
    
    ggplot() +
      geom_sf(data = st_cast(poly_2,"LINESTRING"), aes(color = "poly"), fill = NA, linewidth = 3) +
      geom_sf(data = st_cast(poly_2, "MULTIPOINT"), aes(color = "poly"), size = 6) +
      geom_sf(data = lines, aes(color = "lines"), linewidth = 2, alpha = .8) +
      geom_sf(data = st_cast(lines, "MULTIPOINT"), aes(color = "lines"), size = 4, alpha = .8) +
      geom_sf(data = snapped, aes(color = "snapped"), linewidth = 1, alpha = .8) +
      geom_sf(data = st_cast(snapped, "MULTIPOINT"), aes(color = "snapped"), size = 2, alpha = .8) +
      scale_color_viridis_d() +
      theme_minimal()
    


    -e- Spatial join with nearest feature predicate

    Depending on how features in railroad network dataset are organized, spatial join with nearest feature predicate ( st_join(... , join = st_nearest_feature) ) might be a better approach to tackle that original problem. However, keep an eye on somewhat ambiguous cases, like B & D in the following example.

    poly_sf <- 
      st_make_grid(poly, n = 2) |>
      st_sf(geometry = _) |>
      mutate(name = LETTERS[row_number()] |> as.factor())
    poly_sf
    #> Simple feature collection with 4 features and 1 field
    #> Geometry type: POLYGON
    #> Dimension:     XY
    #> Bounding box:  xmin: 0 ymin: 0 xmax: 1 ymax: 1
    #> CRS:           NA
    #>                         geometry name
    #> 1 POLYGON ((0 0, 0.5 0, 0.5 0...    A
    #> 2 POLYGON ((0.5 0, 1 0, 1 0.5...    B
    #> 3 POLYGON ((0 0.5, 0.5 0.5, 0...    C
    #> 4 POLYGON ((0.5 0.5, 1 0.5, 1...    D
    
    lines_sf <- 
      st_sfc(lines) |> 
      st_sf(geometry = _) |>
      st_cast("LINESTRING")
    lines_sf
    #> Simple feature collection with 3 features and 0 fields
    #> Geometry type: LINESTRING
    #> Dimension:     XY
    #> Bounding box:  xmin: 0 ymin: -0.05 xmax: 1.05 ymax: 1.05
    #> CRS:           NA
    #>                         geometry
    #> 1    LINESTRING (0 1.01, 1 1.05)
    #> 2  LINESTRING (0 -0.01, 1 -0.05)
    #> 3 LINESTRING (1.025 1.05, 1.0...
    
    lines_w_names <- 
      st_join(lines_sf, poly_sf, join = st_nearest_feature)
    lines_w_names
    #> Simple feature collection with 3 features and 1 field
    #> Geometry type: LINESTRING
    #> Dimension:     XY
    #> Bounding box:  xmin: 0 ymin: -0.05 xmax: 1.05 ymax: 1.05
    #> CRS:           NA
    #>   name                       geometry
    #> 1    C    LINESTRING (0 1.01, 1 1.05)
    #> 2    A  LINESTRING (0 -0.01, 1 -0.05)
    #> 3    D LINESTRING (1.025 1.05, 1.0...
    
    ggplot() +
      geom_sf(data = poly_sf, aes(fill = name), show.legend = FALSE) +
      geom_sf_label(data = poly_sf, aes(label = name), alpha = .8) +
      geom_sf(data = lines_w_names, aes(color = name),  show.legend = FALSE, linewidth = 2) +
      geom_sf_label(data = lines_w_names, aes(label = name), alpha = .8) +
      scale_fill_viridis_d() +
      scale_color_viridis_d() +
      theme_minimal()
    

    Created on 2024-04-21 with reprex v2.1.0