I am trying to measure the distance between a reference stream gage/location/coordinates and multiple other gages on a stream/river that were discovered using nhdplusTools
. I am following a method described here. However, the blogger manually removes stream segments at the end (I need to automate this process) and ultimately only measures the distance between the most upstream and downstream points.
Here is the the code I have used so far:
library(sf)
library(nhdplusTools)
library(mapview)
# custom function for snapping points to flowlines
st_snap_points <- function(x, y, namevar, max_dist = 1000) {
# this evaluates the length of the data
if (inherits(x, "sf")) n = nrow(x)
if (inherits(x, "sfc")) n = length(x)
# this part:
# 1. loops through every piece of data (every point)
# 2. snaps a point to the nearest line geometries
# 3. calculates the distance from point to line geometries
# 4. retains only the shortest distances and generates a point at that intersection
out = do.call(c,
lapply(seq(n), function(i) {
nrst = st_nearest_points(st_geometry(x)[i], y)
nrst_len = st_length(nrst)
nrst_mn = which.min(nrst_len)
if (as.vector(nrst_len[nrst_mn]) > max_dist) return(st_geometry(x)[i])
return(st_cast(nrst[nrst_mn], "POINT")[2])
})
)
# this part converts the data to a dataframe and adds a named column of your choice
out_xy <- st_coordinates(out) %>% as.data.frame()
out_xy <- out_xy %>%
mutate({{namevar}} := x[[namevar]]) %>%
st_as_sf(coords=c("X","Y"), crs=st_crs(x), remove=FALSE)
return(out_xy)
}
# GET FLOWLINES ASSOCIATED WITH COMIDS
# make a list defining the sourcetype and ID
feat_list <- list(featureSource = "comid", featureID = "13167914")
# get upstream flowlines
us_flowlines <- navigate_nldi(nldi_feature = feat_list,
mode="UM",
data_source = "flowlines")
# get downstream mainstem only
ds_flowlines <- navigate_nldi(nldi_feature = feat_list,
mode="DM",
data_source = "flowlines")
# make a list of all the comids we've identified:
all_comids <- c(us_flowlines$UM_flowlines$nhdplus_comid, ds_flowlines$DM_flowlines$nhdplus_comid)
# download all data and create a geopackage with the comid list
gpkg <- subset_nhdplus(comids = as.integer(all_comids),
simplified = TRUE,
overwrite = TRUE,
output_file = "13167914_nhdplus.gpkg",
nhdplus_data = "download",
return_data = FALSE)
# pull the flowlines back in
streams <- read_sf("13167914_nhdplus.gpkg", "NHDFlowline_Network")
# SNAP GAGE POINTS TO FLOWLINE
# create sf of all gage locations
all_gages <- structure(list(location = c("sample_origin", "upstream", "downstream",
"downstream"),
X = c(-83.609581641841, -83.6125472, -83.54465649, -83.52743409),
Y = c(41.6614141167543, 41.65968056, 41.68865998, 41.70310398)),
row.names = c(NA, 4L), class = "data.frame") %>%
sf::st_as_sf(coords = c("X", "Y"), crs = 4326)
# project all_gages and streams
all_gages_proj <- sf::st_transform(all_gages, crs = 26910)
streams_proj <- sf::st_transform(streams, crs = 26910)
# snap points to the flowlines using a 500 m buffer
gages_snapped <- st_snap_points(all_gages_proj, streams_proj, namevar = "location", max_dist = 500)
# view map
mapview(gages_snapped, col.regions="orange", layer.name="Snapped Gages") +
mapview(streams_proj, color="steelblue", layer.name="Flowlines")
The blogger then uses the following code to split the stream into segments. But again, they manually remove stream segments (I need to automate this process) and only measure the distance between upstream and downstream points (I want to measure the distance between a reference gage/location - called 'sample_origin' in the all_gages
object - and the other stream gages).
library(lwgeom)
# MEASURE DISTANCE BETWEEN POINTS
# Split stream segments between points
# Create a 1 m buffer around snapped point
gages_snapped_buff <- st_buffer(gages_snapped, 1)
# Use lwgeom::st_split to split stream segments
segs <- st_collection_extract(lwgeom::st_split(streams_proj, gages_snapped_buff), "LINESTRING") %>%
tibble::rownames_to_column(var = "rowid") %>%
mutate(rowid=as.integer(rowid))
# Calculate river distances
segs_dist <- segs %>%
# drop the "loose ends" on either extent (upstream or downstream) of first/last gage
# here they manually provided rowids from the example
# filter(!rowid %in% c(232, 100, 66, 62, 63)) %>%
mutate(seg_len_m = units::drop_units(units::set_units(st_length(.), "m")),
seg_len_km = seg_len_m/1000) %>%
arrange(desc(hydroseq)) %>%
mutate(total_len_km = cumsum(seg_len_km)) %>%
# filter to just cols of interest
select(rowid, comid, gnis_id:reachcode, streamorde, hydroseq, seg_len_km, total_len_km, geom)
How do I alter this final code chunk to find the distances between the reference gage/location - called 'sample_origin' in the all_gages
object - and the other stream gages?
This is the workflow I use in these cases:
sf::st_line_merge()
to create a single linestringlwgeom::st_endpoint()
and sf::st_nearest_points()
geosphere::bearing()
and spatialEco::bearing.distance()
, then use resulting points to create 'blades' to split linestringlibrary(nhdplusTools)
library(dplyr)
library(tidyr)
library(sf)
library(lwgeom)
library(geosphere)
library(spatialEco)
library(ggplot2)
# Load flowlines (previously created from your code example)
streams <- read_sf("13167914_nhdplus.gpkg", "NHDFlowline_Network")
# Create single linestring of streams object
streams1 <- st_transform(streams, crs = 4326) %>%
st_combine() %>%
st_line_merge() %>%
st_as_sf()
# Create sf of gage locations
all_gages <- structure(list(location = c("sample_origin", "upstream", "downstream",
"downstream"),
X = c(-83.609581641841, -83.6125472, -83.54465649, -83.52743409),
Y = c(41.6614141167543, 41.65968056, 41.68865998, 41.70310398)),
row.names = c(NA, 4L), class = "data.frame") %>%
st_as_sf(coords = c("X", "Y"), crs = 4326)
# Create sf of nearest points of all_gages on streams1 (replaces st_snap_points())
nearest <- all_gages %>%
mutate(rowid1 = 1:n(),
nearest = st_endpoint(st_nearest_points(., streams1)),
azimuth1 = bearing(st_coordinates(geometry),
st_coordinates(nearest)),
azimuth2 = (azimuth1 + 180) %% 360) %>%
st_set_geometry("nearest")
# Filter nearest to remove sample_origin (for joining attritubes to final result)
nearest1 <- filter(nearest, location != "sample_origin") %>%
mutate(rowid1 = 1:n()) %>%
select(rowid1, location)
# Offset modified all_gages points by ~100mm using azimuth
offset_p1 <- data.frame(bearing.distance(st_coordinates(nearest)[,1],
st_coordinates(nearest)[,2],
0.1/111320,
nearest$azimuth1),
rowid1 = 1:nrow(nearest),
azimuth1 = nearest$azimuth1) %>%
st_as_sf(coords = c("X", "Y"), crs = 4326) %>%
rename(geometry1 = geometry)
offset_p2 <- data.frame(bearing.distance(st_coordinates(nearest)[,1],
st_coordinates(nearest)[,2],
0.1/111320,
nearest$azimuth2),
rowid1 = 1:nrow(nearest),
azimuth2 = nearest$azimuth2) %>%
st_as_sf(coords = c("X", "Y"), crs = 4326) %>%
rename(geometry2 = geometry)
# Create linestring blades to split stream1
blades <- left_join(offset_p1, data.frame(offset_p2), by = "rowid1") %>%
select(rowid1, geometry1, geometry2) %>%
data.frame() %>%
pivot_longer(starts_with("geo"),
values_to = "geometry") %>%
data.frame() %>%
st_as_sf() %>%
summarise(geometry = st_combine(geometry), .by = c(rowid1)) %>%
st_cast("LINESTRING") %>%
left_join(select(st_drop_geometry(nearest), rowid1, location), by = "rowid1")
# Split streams1 using blades, drop "loose ends",
# assign location values using nearest1,
# calculate individual and cumulative length, project
segs <- st_collection_extract(st_split(streams1, blades), "LINESTRING") %>%
slice(2:(n()-1)) %>%
mutate(rowid = 1:n(),
seg_len_km = as.numeric(st_length(.) / 1000),
rowid1 = st_nearest_feature(., nearest1)) %>%
left_join(data.frame(nearest1), by = "rowid1") %>%
group_by(location) %>%
arrange(if_else(location == "downstream", rowid, desc(rowid)), .by_group = TRUE) %>%
mutate(cumu_len_km = cumsum(seg_len_km)) %>%
ungroup() %>%
st_transform(26910) %>%
select(rowid, location, seg_len_km, cumu_len_km)
segs
# Simple feature collection with 3 features and 4 fields
# Geometry type: LINESTRING
# Dimension: XY
# Bounding box: xmin: 3790633 ymin: 5432731 xmax: 3795232 ymax: 5441401
# Projected CRS: NAD83 / UTM zone 10N
# # A tibble: 3 × 5
# rowid location seg_len_km cumu_len_km x
# <int> <chr> <dbl> <dbl> <LINESTRING [m]>
# 1 2 downstream 7.18 7.18 (3790785 5433050, 3790812 543309…
# 2 3 downstream 2.37 9.55 (3794443 5439086, 3794501 543914…
# 3 1 upstream 0.314 0.314 (3790633 5432731, 3790672 543277…
# For illustrative purposes only
nearest2 <- left_join(nearest1, data.frame(segs), by = join_by(rowid1 == rowid))
# Plot
ggplot() +
geom_sf(data = st_transform(streams1, 26910), colour = "grey", linewidth = .2) +
geom_sf(data = segs,
aes(colour = location),
linewidth = 1) +
geom_sf(data = st_transform(nearest, 26910), aes(shape = location), size = 1.3) +
geom_sf_text(data = nearest2,
aes(label = paste0(round(cumu_len_km, 2), "km")),
size = 3,
nudge_x = -1500) +
theme(panel.background = element_blank(),
legend.position = "inside", legend.position.inside = c(0.25, 0.8),
legend.title = element_blank(),
legend.spacing.y = unit(-3, "mm"))