rgis

Replacing polygons with neighboring polygons that have the largest shared border


I have a shapefile of polygons representing land cover types (which includes a "road" category), and a shapefile of lines representing roads. I created a buffer around the road lines, and I want to merge the two shapefiles to integrate the roads into the shapefile of land cover types.

I also want the remaining "road" polygons from the land cover shapefile to be deleted and replaced by the polygons of land cover types that are closest and share the most boundaries with the road polygons.

Here are the shapefiles: https://www.dropbox.com/scl/fo/6245s22xgso3thyjn2qp6/AFL0cHneRZ5Fd_nBjrDnB74?rlkey=7co2m5ev6vw1eps2lwgcb8o9r&st=hmvn93o4&dl=0

Here’s my code for each step:

Step 1: Creation of buffers around the road lines

land_cover_types <- terra::vect("C:/R_scripts/land_cover_types.shp")
## terra::plot(land_cover_types)
roads <- terra::vect("C:/R_scripts/roads.shp")
## terra::plot(roads)

roads <- terra::buffer(roads, 20, quadsegs = 10, capstyle = "round", joinstyle = "round", mitrelimit = NA, singlesided = FALSE)
roads <- terra::aggregate(roads, by = "habitat_ty", dissolve = TRUE)
roads <- roads["habitat_ty"]
## terra::plot(roads)

Step 2: Merging the two shapefiles. There's an issue with my code. I expected that a single geometry would be added to the land cover shapefile, with the "new road" category added below the other land cover types.

land_cover_types <- terra::erase(land_cover_types, roads)
land_cover_types <- terra::union(land_cover_types, roads)
## terra::plot(land_cover_types)
## terra::plot(roads, add = T, col = "red")

Step 3: Remove the old road polygons. I don’t know how to do this part. From the image, for example, the road polygon (with blue lines and a black star) should be removed and replaced by the land cover polygon with the black star that shares the most boundaries. The land cover polygon should encompass the road polygon similar to the function Eliminate in ArcGIS. enter image description here


Solution

  • This turned out to be bit longer than expected.
    Though the idea is simple -- intersection of adjacent polygons is a set of lines (or points), by selecting a longest such feature for each old road polygon (yellow) we can pick a target polygon and merge shapes: enter image description here Edge intersection can be rather sensitive to any kind of topolgy issues and imperfections, so let’s use small buffers and intersection areas instead.

    A small toy dataset makes this bit easier to debug and follow, st_set_precision() and lwgeom::st_snap_to_grid() are used to simplify shapes a bit and to work around precision issues and related artefacts & topology problems.

    Toy data and first few helper functions:

    library(sf)
    #> Linking to GEOS 3.12.1, GDAL 3.8.4, PROJ 9.3.1; sf_use_s2() is TRUE
    library(dplyr, warn.conflicts = FALSE)
    library(ggplot2)
    
    example_precision <- 100
    
    old_road <- 
      st_as_sfc("LINESTRING(1 1, 2 2, 3 1, 4 2)") |> 
      st_buffer(.3) |>
      st_set_precision(example_precision) |> 
      st_sf(geometry = _, class = "old") 
    
    other_cover_ty <- 
      st_make_grid(st_buffer(old_road, .2), n = c(3,1)) |> 
      st_set_precision(example_precision) |> 
      st_sf(geometry = _, class = LETTERS[1:3]) 
    
    cover_ty <- 
      bind_rows(old_road, other_cover_ty) |>
      st_difference() |> 
      st_set_precision(example_precision)
    tibble(cover_ty)
    #> # A tibble: 4 × 2
    #>   class                                                                 geometry
    #>   <chr>                                                               <GEOMETRY>
    #> 1 old   POLYGON ((1.79 2.21, 1.8 2.22, 1.81 2.23, 1.82 2.24, 1.84 2.25, 1.85 2.…
    #> 2 A     POLYGON ((1.83 0.5, 0.5 0.5, 0.5 2.5, 1.83 2.5, 1.83 2.245, 1.82 2.24, …
    #> 3 B     MULTIPOLYGON (((3.17 1.59, 3 1.42, 2.21 2.21, 2.2 2.22, 2.19 2.23, 2.18…
    #> 4 C     POLYGON ((4.5 2.5, 4.5 0.5, 3.17 0.5, 3.17 0.755, 3.18 0.76, 3.19 0.77,…
    
    new_road <- 
      st_as_sfc("LINESTRING(1 1.5, 4 1.5)") |> 
      st_buffer(.3) |> 
      lwgeom::st_snap_to_grid(.01) |> 
      st_sf(geometry = _, class = "new")
    tibble(new_road)
    #> # A tibble: 1 × 2
    #>   class                                                                 geometry
    #>   <chr>                                                                <POLYGON>
    #> 1 new   ((4 1.8, 4.02 1.8, 4.03 1.8, 4.05 1.8, 4.06 1.79, 4.08 1.79, 4.09 1.79,…
    
    # we can combine 2 sf objects and remove overlaps with bind_rows + st_difference 
    # (order matters(!); and it's probably not the most performant option)
    
    combine_remove_overlaps <- function(x, y, add_id = TRUE){
      result <- 
        # we want y on top as first shapes remain in case of an overlap
        bind_rows(y, x) |> 
        st_difference() |> 
        st_cast("MULTIPOLYGON") |> 
        st_cast("POLYGON") 
      if (add_id) tibble::rowid_to_column(result) else result
    }
    
    cover_plot <- function(x){
      ggplot(x) +
        # add alpha to check for overlapping polygons
        geom_sf(aes(fill = class), alpha = .7) +
        geom_sf_label(data = ~st_point_on_surface(.x), aes(label = rowid), alpha = .7) +
        scale_fill_viridis_d(drop = FALSE) +
        theme_minimal()
    }
    
    cover_ty_all_road <- 
      combine_remove_overlaps(cover_ty, new_road) |> 
      # for consistent fill values when plotting
      mutate(class = as.factor(class))
    #> Warning in st_cast.sf(st_cast(st_difference(bind_rows(y, x)), "MULTIPOLYGON"),
    #> : repeating attributes for all sub-geometries for which they may not be
    #> constant
    

    At this point we have a small example where new road buffer (class new) is added to original cover type polygons (A, B, C, old), overlapping regions are removed:

    cover_ty_all_road
    #> Simple feature collection with 9 features and 2 fields
    #> Geometry type: POLYGON
    #> Dimension:     XY
    #> Bounding box:  xmin: 0.5 ymin: 0.5 xmax: 4.5 ymax: 2.5
    #> CRS:           NA
    #>   rowid class                       geometry
    #> 1     1   new POLYGON ((4 1.8, 4.02 1.8, ...
    #> 2     2   old POLYGON ((1.8 2.22, 1.81 2....
    #> 3     3   old POLYGON ((3.79 2.21, 3.8 2....
    #> 4     4   old POLYGON ((3.21 0.79, 3.2 0....
    #> 5     5   old POLYGON ((1.21 0.79, 1.2 0....
    #> 6     6     A POLYGON ((0.5 2.5, 1.83 2.5...
    #> 7     7     B POLYGON ((2.2 2.22, 2.19 2....
    #> 8     8     B POLYGON ((2.38 1.2, 2.79 0....
    #> 9     9     C POLYGON ((3.17 0.755, 3.18 ...
    cover_plot(cover_ty_all_road)
    #> Warning: st_point_on_surface assumes attributes are constant over geometries
    

    enter image description here

    Aim is to merge remaining pieces of old (2, 3, 4, 5) to one of adjacent A or B or C by using longest shared edge to pick the target.

    # Build a recoding table based on shared edge length, 
    # but instead of relying on intersecting edges and lengths, 
    # create a small buffer around "from" features and compare intersection areas.
    # class_col: classification column
    # from: source class (old road)
    # ignore: polygon class(es) to be ignored (new road)
    recode_by_edge_lenght <- function(x, class_col, from, ignore, id_col = "rowid", buffer_dist = 1, keep_geometry = FALSE){
      # browser()
      `%notin%` <- Negate(`%in%`)
      old_idx   <- which(x[[class_col]] == from)                 # [1] 2 3 4 5
      other_idx <- which(x[[class_col]] %notin% c(ignore, from)) # [1] 6 7 8 9
      
      # rowid_recoded class_recoded 
      #   "rowid.1"       "class" 
      resulting_names <- c(paste0(id_col, ".1"), class_col)
      names(resulting_names) <- c(paste0(id_col, "_recoded"), paste0(class_col, "_recoded"))
      
      # intersections of touching polygons are shared edges or points, 
      # but this can be quite sensitive to any topology issues
      # adding a small buffer first and measuring intersection areas seems like a more robust option;
      # we can pick largest isect area by "from" column to find polygons we should merge to
      result <- 
      st_intersection(
        st_buffer(x[old_idx, id_col], dist = buffer_dist),
        x[other_idx,]
      ) |> 
      # example with areas before slicing by rowid would look like this:
      #     rowid rowid.1 class                       geometry isect_buffer_area
      # 2       2       6     A POLYGON ((1.802929 2.237071...       0.006414675
      # 5       5       6     A POLYGON ((1.186682 0.752560...       0.016321400
      # 2.1     2       7     B POLYGON ((1.834076 2.258219...       0.010204237
      # 4       4       8     B POLYGON ((3.165924 0.741781...       0.010204237
      # 3       3       9     C POLYGON ((3.792929 2.227071...       0.016321400
      # 4.1     4       9     C POLYGON ((3.207071 0.772928...       0.006414675    
      slice_max(order_by = st_area(geometry), by = all_of(id_col)) |> 
      rename(all_of(resulting_names)) 
      # we only need a recoding table for non-spatial join, so we can drop geometry
      if (keep_geometry) result else st_drop_geometry(result)
    }
    
    old_road_recode <- recode_by_edge_lenght(cover_ty_all_road, class_col = "class", from = "old", ignore = "new", buffer_dist = .01)
    #> Warning: attribute variables are assumed to be spatially constant throughout
    #> all geometries
    
    # add recoded rowid and class values through join, update with coalesce, union
    union_by_recode <- function(x, recode_df, class_col){
      id_col <- names(recode_df)[1]
      # set one-to-one as a safeguard, throws error if there are multiple matches
      left_join(x, recode_df, by = id_col, relationship = "one-to-one") |>
        # coalesce across all (x_recoded, x) column pairs, result in x
        mutate(across(ends_with("_recoded"), \(x) coalesce(x, get(sub("_recoded", "", cur_column()))), .names = '{sub("_recoded", "", .col)}')) |> 
        # mutate(across(ends_with("_recoded"), \(x) coalesce(x, get(str_split_i(cur_column(), "_", 1))), .names = '{str_split_i(.col, "_", 1)}')) |> 
        #   rowid class rowid_recoded class_recoded                       geometry
        # 1     1   new            NA          <NA> POLYGON ((4 1.8, 4.02 1.8, ...
        # 2     7     B             7             B POLYGON ((1.8 2.22, 1.81 2....
        # 3     9     C             9             C POLYGON ((3.79 2.21, 3.8 2....
        # 4     8     B             8             B POLYGON ((3.21 0.79, 3.2 0....
        # 5     6     A             6             A POLYGON ((1.21 0.79, 1.2 0....
        # 6     6     A            NA          <NA> POLYGON ((0.5 2.5, 1.83 2.5...
        # 7     7     B            NA          <NA> POLYGON ((2.2 2.22, 2.19 2....
        # 8     8     B            NA          <NA> POLYGON ((2.38 1.2, 2.79 0....
        # 9     9     C            NA          <NA> POLYGON ((3.17 0.75, 3.18 0...    
        summarise(across(geometry, st_union), .by = all_of(c(id_col, class_col)))
    }
    cover_ty_upd <- union_by_recode(cover_ty_all_road, old_road_recode, class_col = "class")
    

    Resulting sf object:

    cover_ty_upd
    #> Simple feature collection with 5 features and 2 fields
    #> Geometry type: POLYGON
    #> Dimension:     XY
    #> Bounding box:  xmin: 0.5 ymin: 0.5 xmax: 4.5 ymax: 2.5
    #> CRS:           NA
    #>   rowid class                       geometry
    #> 1     1   new POLYGON ((4 1.8, 4.02 1.8, ...
    #> 2     7     B POLYGON ((1.81 2.23, 1.82 2...
    #> 3     9     C POLYGON ((3.18 0.76, 3.19 0...
    #> 4     8     B POLYGON ((3.17 0.5, 1.83 0....
    #> 5     6     A POLYGON ((0.84 1.25, 0.85 1...
    cover_plot(cover_ty_upd)
    #> Warning: st_point_on_surface assumes attributes are constant over geometries
    

    enter image description here


    Let’s test this on some real data.

    I’m trying not to alter input data too much, but I’m afraid it’s necessary to transform from Web-Mercator so measurements (buffer radius) would make sense. Feel free to transform it back to EPSG:3857 if needed and/or choose any other more appropriate projection instead of UTM. Also note that multipolygons are split into individual polygons and that there's at least 1 corner case where old Road polygon is not merged with anything, just reclassified.

    library(mapview)
    library(leafsync)
    
    shp_url <- "https://www.dropbox.com/scl/fo/6245s22xgso3thyjn2qp6/AFL0cHneRZ5Fd_nBjrDnB74?rlkey=7co2m5ev6vw1eps2lwgcb8o9r&e=1&st=hmvn93o4&dl=1"
    shp <- curl::curl_download(shp_url, "tmp.shp.zip")
    
    # load and transform from Web-Mercator to UTM zone 18N (for proper metric buffer);
    # create buffer, union by habitat_ty
    road_buff <- 
      read_sf(shp, layer = "roads") |> 
      st_transform(32618) |> 
      st_buffer(dist = 20, nQuadSegs = 10) |> 
      summarise(across(geometry, st_union), .by = habitat_ty)
    
    # load, transform, combine with road_buff
    land_cover <- 
      read_sf(shp, layer = "land_cover_types") |> 
      st_cast("POLYGON") |> 
      st_transform(32618) |> 
      combine_remove_overlaps(road_buff)
    #> Warning in st_cast.sf(read_sf(shp, layer = "land_cover_types"), "POLYGON"):
    #> repeating attributes for all sub-geometries for which they may not be constant
    #> Warning in st_cast.sf(st_cast(st_difference(bind_rows(y, x)), "MULTIPOLYGON"),
    #> : repeating attributes for all sub-geometries for which they may not be
    #> constant
    

    Before trying to merge Road polygons with other polygons:

    mapview(land_cover, zcol = "habitat_ty")
    

    enter image description here

    
    (old_road_recode <- recode_by_edge_lenght(land_cover, class_col = "habitat_ty", from = "Road", ignore = "New road"))
    #> Warning: attribute variables are assumed to be spatially constant throughout
    #> all geometries
    #> # A tibble: 26 × 3
    #>    rowid rowid_recoded habitat_ty_recoded        
    #>  * <int>         <int> <chr>                     
    #>  1   104            17 Deciduous and mixed forest
    #>  2    93            38 Deciduous and mixed forest
    #>  3    94            38 Deciduous and mixed forest
    #>  4    95            38 Deciduous and mixed forest
    #>  5   114            43 Deciduous and mixed forest
    #>  6   117           282 Deciduous and mixed forest
    #>  7    98            59 Deciduous and mixed forest
    #>  8    99            59 Deciduous and mixed forest
    #>  9   100            59 Deciduous and mixed forest
    #> 10   101            59 Deciduous and mixed forest
    #> # ℹ 16 more rows
    
    # distribution of target types
    old_road_recode |> 
      count(habitat_ty_recoded)
    #> # A tibble: 2 × 2
    #>   habitat_ty_recoded             n
    #>   <chr>                      <int>
    #> 1 Coniferous forest              1
    #> 2 Deciduous and mixed forest    25
    
    land_cover_upd <- 
      union_by_recode(land_cover, recode_df = old_road_recode, class_col = "habitat_ty")
    

    After merging:

    land_cover_upd
    #> Simple feature collection with 354 features and 2 fields
    #> Geometry type: GEOMETRY
    #> Dimension:     XY
    #> Bounding box:  xmin: 654117.3 ymin: 5017527 xmax: 657048.2 ymax: 5021843
    #> Projected CRS: WGS 84 / UTM zone 18N
    #> # A tibble: 354 × 3
    #>    rowid habitat_ty                                                     geometry
    #>    <int> <chr>                                                     <POLYGON [m]>
    #>  1     1 New road                   ((654990.8 5017573, 654992.1 5017570, 65499…
    #>  2     2 New road                   ((655131.5 5017814, 655129.9 5017812, 65512…
    #>  3     3 New road                   ((655198.2 5018359, 655196.8 5018356, 65519…
    #>  4     4 New road                   ((654239.7 5021776, 654242.4 5021777, 65424…
    #>  5     5 New road                   ((655018.6 5021793, 655020.9 5021791, 65502…
    #>  6     6 Deciduous and mixed forest ((656081.5 5018400, 656080.2 5018397, 65607…
    #>  7     7 Deciduous and mixed forest ((655852.4 5018191, 655849.6 5018190, 65584…
    #>  8     8 Deciduous and mixed forest ((656186.4 5018372, 656185.8 5018363, 65618…
    #>  9     9 Deciduous and mixed forest ((656012.1 5018227, 656017.6 5018231, 65602…
    #> 10    10 Deciduous and mixed forest ((656176.6 5018579, 656185.7 5018584, 65618…
    #> # ℹ 344 more rows
    mapview(land_cover_upd, zcol = "habitat_ty")
    

    enter image description here

    Area of interest, before & after:

    sync(
      mapview(land_cover, zcol = "habitat_ty"),
      mapview(land_cover_upd, zcol = "habitat_ty")
    )
    

    enter image description here Created on 2025-01-20 with reprex v2.1.1