rspatial

How do I measure distances between multiple coordinates along a stream/flowline?


I am trying to measure the distance between a reference stream gage/location/coordinates and multiple other gages on a stream/river that were discovered using nhdplusTools. I am following a method described here. However, the blogger manually removes stream segments at the end (I need to automate this process) and ultimately only measures the distance between the most upstream and downstream points.

Here is the the code I have used so far:

library(sf)
library(nhdplusTools)
library(mapview)

# custom function for snapping points to flowlines
st_snap_points <- function(x, y, namevar, max_dist = 1000) {
  
  # this evaluates the length of the data
  if (inherits(x, "sf")) n = nrow(x)
  if (inherits(x, "sfc")) n = length(x)
  
  # this part: 
  # 1. loops through every piece of data (every point)
  # 2. snaps a point to the nearest line geometries
  # 3. calculates the distance from point to line geometries
  # 4. retains only the shortest distances and generates a point at that intersection
  out = do.call(c,
                lapply(seq(n), function(i) {
                  nrst = st_nearest_points(st_geometry(x)[i], y)
                  nrst_len = st_length(nrst)
                  nrst_mn = which.min(nrst_len)
                  if (as.vector(nrst_len[nrst_mn]) > max_dist) return(st_geometry(x)[i])
                  return(st_cast(nrst[nrst_mn], "POINT")[2])
                })
  )
  # this part converts the data to a dataframe and adds a named column of your choice
  out_xy <- st_coordinates(out) %>% as.data.frame()
  out_xy <- out_xy %>% 
    mutate({{namevar}} := x[[namevar]]) %>% 
    st_as_sf(coords=c("X","Y"), crs=st_crs(x), remove=FALSE)
  
  return(out_xy)
}


# GET FLOWLINES ASSOCIATED WITH COMIDS
  # make a list defining the sourcetype and ID
  feat_list <- list(featureSource = "comid", featureID = "13167914")
  
  # get upstream flowlines
  us_flowlines <- navigate_nldi(nldi_feature = feat_list,
                                mode="UM",
                                data_source = "flowlines")
  
  # get downstream mainstem only
  ds_flowlines <- navigate_nldi(nldi_feature = feat_list,
                                mode="DM",
                                data_source = "flowlines")
  
  # make a list of all the comids we've identified:
  all_comids <- c(us_flowlines$UM_flowlines$nhdplus_comid, ds_flowlines$DM_flowlines$nhdplus_comid)
  
  # download all data and create a geopackage with the comid list
  gpkg <- subset_nhdplus(comids = as.integer(all_comids),
                         simplified = TRUE,
                         overwrite = TRUE,
                         output_file = "13167914_nhdplus.gpkg",
                         nhdplus_data = "download",
                         return_data = FALSE)
  
  # pull the flowlines back in
  streams <- read_sf("13167914_nhdplus.gpkg", "NHDFlowline_Network")

# SNAP GAGE POINTS TO FLOWLINE
  # create sf of all gage locations
  all_gages <- structure(list(location = c("sample_origin", "upstream", "downstream", 
                                           "downstream"), 
                              X = c(-83.609581641841, -83.6125472, -83.54465649, -83.52743409), 
                              Y = c(41.6614141167543, 41.65968056, 41.68865998, 41.70310398)), 
                         row.names = c(NA, 4L), class = "data.frame") %>% 
    sf::st_as_sf(coords = c("X", "Y"), crs = 4326)
  
  
  # project all_gages and streams
  all_gages_proj <- sf::st_transform(all_gages, crs = 26910)
  streams_proj <- sf::st_transform(streams, crs = 26910)
  
  # snap points to the flowlines using a 500 m buffer
  gages_snapped <- st_snap_points(all_gages_proj, streams_proj, namevar = "location", max_dist = 500)
  
  # view map
  mapview(gages_snapped, col.regions="orange", layer.name="Snapped Gages") +
    mapview(streams_proj, color="steelblue", layer.name="Flowlines")

The blogger then uses the following code to split the stream into segments. But again, they manually remove stream segments (I need to automate this process) and only measure the distance between upstream and downstream points (I want to measure the distance between a reference gage/location - called 'sample_origin' in the all_gages object - and the other stream gages).

library(lwgeom)
  
# MEASURE DISTANCE BETWEEN POINTS  
  # Split stream segments between points
  # Create a 1 m buffer around snapped point
  gages_snapped_buff <- st_buffer(gages_snapped, 1)
  
  # Use lwgeom::st_split to split stream segments
  segs <- st_collection_extract(lwgeom::st_split(streams_proj, gages_snapped_buff), "LINESTRING") %>% 
    tibble::rownames_to_column(var = "rowid") %>% 
    mutate(rowid=as.integer(rowid))
  
  # Calculate river distances
  segs_dist <- segs %>% 
    # drop the "loose ends" on either extent (upstream or downstream) of first/last gage
    # here they manually provided rowids from the example
    # filter(!rowid %in% c(232, 100, 66, 62, 63)) %>% 
    mutate(seg_len_m = units::drop_units(units::set_units(st_length(.), "m")),
           seg_len_km = seg_len_m/1000) %>% 
    arrange(desc(hydroseq)) %>% 
    mutate(total_len_km = cumsum(seg_len_km)) %>% 
    # filter to just cols of interest
    select(rowid, comid, gnis_id:reachcode, streamorde, hydroseq, seg_len_km, total_len_km, geom)

How do I alter this final code chunk to find the distances between the reference gage/location - called 'sample_origin' in the all_gages object - and the other stream gages?


Solution

  • This is the workflow I use in these cases:

    1. Combine contiguous linestrings using sf::st_line_merge() to create a single linestring
    2. 'Snap' points to linestring using a combination of lwgeom::st_endpoint() and sf::st_nearest_points()
    3. Offset snapped points either side of linestring using geosphere::bearing() and spatialEco::bearing.distance(), then use resulting points to create 'blades' to split linestring
    4. Use blades to split linestring and remove "loose ends", which will always be the first and last row
    5. Assign values to result using subsetted version of snapped points and calculate cumulative length
    library(nhdplusTools)
    library(dplyr)
    library(tidyr)
    library(sf)
    library(lwgeom)
    library(geosphere)
    library(spatialEco)
    library(ggplot2)
    
    # Load flowlines (previously created from your code example)
    streams <- read_sf("13167914_nhdplus.gpkg", "NHDFlowline_Network")
    
    # Create single linestring of streams object
    streams1 <- st_transform(streams, crs = 4326) %>%
      st_combine() %>%
      st_line_merge() %>%
      st_as_sf()
    
    # Create sf of gage locations
    all_gages <- structure(list(location = c("sample_origin", "upstream", "downstream", 
                                             "downstream"), 
                                X = c(-83.609581641841, -83.6125472, -83.54465649, -83.52743409), 
                                Y = c(41.6614141167543, 41.65968056, 41.68865998, 41.70310398)), 
                           row.names = c(NA, 4L), class = "data.frame") %>% 
      st_as_sf(coords = c("X", "Y"), crs = 4326)
    
    # Create sf of nearest points of all_gages on streams1 (replaces st_snap_points())
    nearest <- all_gages %>% 
      mutate(rowid1 = 1:n(),
             nearest = st_endpoint(st_nearest_points(., streams1)),
             azimuth1 = bearing(st_coordinates(geometry),
                                st_coordinates(nearest)),
             azimuth2 = (azimuth1 + 180) %% 360) %>%
      st_set_geometry("nearest")
    
    # Filter nearest to remove sample_origin (for joining attritubes to final result)
    nearest1 <- filter(nearest, location != "sample_origin") %>%
      mutate(rowid1 = 1:n()) %>%
      select(rowid1, location)
    
    # Offset modified all_gages points by ~100mm using azimuth
    offset_p1 <- data.frame(bearing.distance(st_coordinates(nearest)[,1], 
                                             st_coordinates(nearest)[,2], 
                                             0.1/111320, 
                                             nearest$azimuth1),
                            rowid1 = 1:nrow(nearest),
                            azimuth1 = nearest$azimuth1) %>%
      st_as_sf(coords = c("X", "Y"), crs = 4326) %>%
      rename(geometry1 = geometry)
    
    offset_p2 <- data.frame(bearing.distance(st_coordinates(nearest)[,1], 
                                             st_coordinates(nearest)[,2], 
                                             0.1/111320, 
                                             nearest$azimuth2),
                            rowid1 = 1:nrow(nearest),
                            azimuth2 = nearest$azimuth2) %>%
      st_as_sf(coords = c("X", "Y"), crs = 4326) %>%
      rename(geometry2 = geometry)
    
    # Create linestring blades to split stream1
    blades <- left_join(offset_p1, data.frame(offset_p2), by = "rowid1") %>%
      select(rowid1, geometry1, geometry2) %>%
      data.frame() %>%
      pivot_longer(starts_with("geo"),
                   values_to = "geometry") %>%
      data.frame() %>%
      st_as_sf() %>%
      summarise(geometry = st_combine(geometry), .by = c(rowid1)) %>%
      st_cast("LINESTRING") %>%
      left_join(select(st_drop_geometry(nearest), rowid1, location), by = "rowid1")
    
    # Split streams1 using blades, drop "loose ends",
    # assign location values using nearest1,
    # calculate individual and cumulative length, project
    segs <- st_collection_extract(st_split(streams1, blades), "LINESTRING") %>%
      slice(2:(n()-1)) %>%
      mutate(rowid = 1:n(),
             seg_len_km = as.numeric(st_length(.) / 1000),
             rowid1 = st_nearest_feature(., nearest1)) %>%
      left_join(data.frame(nearest1), by = "rowid1") %>%
      group_by(location) %>%
      arrange(if_else(location == "downstream", rowid, desc(rowid)), .by_group = TRUE) %>%
      mutate(cumu_len_km = cumsum(seg_len_km)) %>%
      ungroup() %>%
      st_transform(26910) %>%
      select(rowid, location, seg_len_km, cumu_len_km)
    
    segs
    # Simple feature collection with 3 features and 4 fields
    # Geometry type: LINESTRING
    # Dimension:     XY
    # Bounding box:  xmin: 3790633 ymin: 5432731 xmax: 3795232 ymax: 5441401
    # Projected CRS: NAD83 / UTM zone 10N
    # # A tibble: 3 × 5
    #   rowid location   seg_len_km cumu_len_km                                 x
    #   <int> <chr>           <dbl>       <dbl>                  <LINESTRING [m]>
    # 1     2 downstream      7.18        7.18  (3790785 5433050, 3790812 543309…
    # 2     3 downstream      2.37        9.55  (3794443 5439086, 3794501 543914…
    # 3     1 upstream        0.314       0.314 (3790633 5432731, 3790672 543277…
    
    
    # For illustrative purposes only
    nearest2 <- left_join(nearest1, data.frame(segs), by = join_by(rowid1 == rowid))
    
    # Plot
    ggplot() +
      geom_sf(data = st_transform(streams1, 26910), colour = "grey", linewidth = .2) +
      geom_sf(data = segs, 
              aes(colour = location),
              linewidth = 1) +
      geom_sf(data = st_transform(nearest, 26910), aes(shape = location), size = 1.3) +
      geom_sf_text(data = nearest2,
                   aes(label = paste0(round(cumu_len_km, 2), "km")),
                   size = 3,
                   nudge_x = -1500) +
      theme(panel.background = element_blank(),
            legend.position = "inside", legend.position.inside =  c(0.25, 0.8),
            legend.title = element_blank(),
            legend.spacing.y = unit(-3, "mm"))
    

    1