I have a dataset with thousands of GPS locations, part on land, part at sea. I'd like to identify these two groups using OpenStreetMap (OSM) data because of its high resolution.
However, OSM apparently has misoriented or inverted geometries, and often represents the earth as a topological hole. Result: all points are identified as at sea.
A minimal reproducible example:
library(data.table)
library(leaflet)
library(sf)
pts <- st_as_sf(data,
coords = c("longitude", "latitude"),
crs = 4326)
sf_use_s2(FALSE)
# download land-polygons-split-4326.zip from
## https://osmdata.openstreetmap.de/data/land-polygons.html
land <- st_read("land_polygons.shp")
land_poly <- land[st_geometry_type(land) %in% c("POLYGON", "MULTIPOLYGON"), ]
land_poly <- st_cast(land_poly, "MULTIPOLYGON")
landm <- st_make_valid(land_poly)
st_crs(landm)
bbox <- st_as_sfc(st_bbox(c(xmin = -2.5, xmax = 0,
ymin = 36, ymax = 38)), crs = 4326)
st_crs(bbox) <- st_crs(landm)
land_bbox <- st_crop(landm, bbox)
on_land <- st_intersects(pts, land_bbox, sparse = FALSE)[, 1]
pts_os <- pts[!on_land, ]
data <- structure(list(latitude = c(37.74492, 37.72914, 37.72934, 37.72892,
37.72896, 37.72916, 37.72876, 37.73037, 37.73095, 37.73687, 37.74537,
37.75453, 37.7285, 37.72851, 37.72719, 37.72886, 37.72716, 37.72805,
37.7273, 37.72729, 37.72831, 37.72916, 37.7691, 37.72895, 37.72849,
37.72842, 37.74227, 37.72788, 37.74085, 37.72928, 37.72702, 37.75486,
37.72932, 37.72941, 37.72931, 37.7349, 37.7293, 37.72928, 37.72933,
37.73162, 37.72934, 37.73019, 37.72742, 37.73006, 37.72956, 37.729,
37.7293, 37.72905, 37.7293, 37.72611, 37.72924, 37.72927, 37.72613,
37.72934, 37.72621, 37.7263, 37.7292, 37.72923, 37.72924, 37.72929,
37.72931, 37.72929, 37.78725, 37.73048, 37.85666, 37.72901, 37.73468,
37.77225, 37.73039, 37.72892, 37.72933, 37.72932, 37.72909, 37.72937,
37.72919, 37.72943, 37.72953, 37.76244, 37.74427, 37.72896, 37.72899,
37.72889, 37.72923, 37.7377, 37.72998, 37.72184, 37.72933, 37.74585,
37.75051, 37.71287, 37.72912, 37.72924, 37.72942, 37.72931, 37.72922,
37.72158, 37.72931, 37.76327, 37.77759, 37.72931), longitude = c(-0.71772,
-0.7059, -0.70633, -0.70623, -0.70872, -0.70676, -0.70895, -0.70347,
-0.70258, -0.70034, -0.69904, -0.70021, -0.70486, -0.70495, -0.70606,
-0.70636, -0.70615, -0.70418, -0.70588, -0.70322, -0.70501, -0.70643,
-0.71751, -0.70493, -0.7049, -0.70471, -0.71993, -0.70301, -0.70871,
-0.70643, -0.70493, -0.72041, -0.7063, -0.70635, -0.70619, -0.70494,
-0.70627, -0.70627, -0.70626, -0.70992, -0.70627, -0.70703, -0.7049,
-0.70925, -0.70639, -0.70672, -0.70638, -0.70725, -0.70666, -0.70652,
-0.70681, -0.70675, -0.70661, -0.70673, -0.70669, -0.70677, -0.70648,
-0.70656, -0.70648, -0.70655, -0.70674, -0.7066, -0.71919, -0.70615,
-0.73799, -0.70658, -0.71353, -0.71072, -0.70673, -0.70643, -0.70628,
-0.70622, -0.70639, -0.70627, -0.70635, -0.70643, -0.70646, -0.72753,
-0.71354, -0.7062, -0.7063, -0.70623, -0.70643, -0.71149, -0.70928,
-0.72652, -0.70652, -0.71616, -0.71856, -0.73371, -0.70643, -0.70613,
-0.70634, -0.70617, -0.70624, -0.71741, -0.70626, -0.73031, -0.73075,
-0.70641)), row.names = c(NA, -100L), class = c("data.table",
"data.frame"))
When you run:
on_land <- st_intersects(pts, land_bbox, sparse = FALSE)[, 1]
you are only retrieving the first column of the intersects()
matrix. In other words, you are only testing whether each point intersects a single feature from land_bbox (a feature/polygon that none of your points intersect with).
You need something like:
on_land <- lengths(st_intersects(pts, land_bbox)) > 0
on_land
# [1] FALSE FALSE FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
# [14] FALSE TRUE TRUE TRUE FALSE TRUE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
# [27] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [40] FALSE FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
# [53] TRUE TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE FALSE FALSE FALSE
# [66] FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE
# [79] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
# [92] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
To illustrate:
library(leaflet)
library(sf)
# Either
on_land <- lengths(st_intersects(pts, land_bbox)) > 0
pts_os <- pts[!on_land, ]
leaflet() %>%
addTiles() %>%
addMarkers(data = pts_os)