rgeospatialopenstreetmapr-sf

How to find the intersection between GPS points and land polygons using OpenStreetMap?


I have a dataset with thousands of GPS locations, part on land, part at sea. I'd like to identify these two groups using OpenStreetMap (OSM) data because of its high resolution.

However, OSM apparently has misoriented or inverted geometries, and often represents the earth as a topological hole. Result: all points are identified as at sea.

A minimal reproducible example:

library(data.table)
library(leaflet)
library(sf)

pts  <- st_as_sf(data,
                 coords = c("longitude", "latitude"),
                 crs    = 4326)

sf_use_s2(FALSE)
# download land-polygons-split-4326.zip from 
## https://osmdata.openstreetmap.de/data/land-polygons.html
land <- st_read("land_polygons.shp") 

land_poly <- land[st_geometry_type(land) %in% c("POLYGON", "MULTIPOLYGON"), ]
land_poly <- st_cast(land_poly, "MULTIPOLYGON") 
landm <- st_make_valid(land_poly)
st_crs(landm) 
bbox <- st_as_sfc(st_bbox(c(xmin = -2.5, xmax = 0, 
                            ymin = 36, ymax = 38)), crs = 4326)
st_crs(bbox) <- st_crs(landm)
land_bbox <- st_crop(landm, bbox)

on_land <- st_intersects(pts, land_bbox, sparse = FALSE)[, 1]
pts_os <- pts[!on_land, ]

Data:

data <- structure(list(latitude = c(37.74492, 37.72914, 37.72934, 37.72892, 
37.72896, 37.72916, 37.72876, 37.73037, 37.73095, 37.73687, 37.74537, 
37.75453, 37.7285, 37.72851, 37.72719, 37.72886, 37.72716, 37.72805, 
37.7273, 37.72729, 37.72831, 37.72916, 37.7691, 37.72895, 37.72849, 
37.72842, 37.74227, 37.72788, 37.74085, 37.72928, 37.72702, 37.75486, 
37.72932, 37.72941, 37.72931, 37.7349, 37.7293, 37.72928, 37.72933, 
37.73162, 37.72934, 37.73019, 37.72742, 37.73006, 37.72956, 37.729, 
37.7293, 37.72905, 37.7293, 37.72611, 37.72924, 37.72927, 37.72613, 
37.72934, 37.72621, 37.7263, 37.7292, 37.72923, 37.72924, 37.72929, 
37.72931, 37.72929, 37.78725, 37.73048, 37.85666, 37.72901, 37.73468, 
37.77225, 37.73039, 37.72892, 37.72933, 37.72932, 37.72909, 37.72937, 
37.72919, 37.72943, 37.72953, 37.76244, 37.74427, 37.72896, 37.72899, 
37.72889, 37.72923, 37.7377, 37.72998, 37.72184, 37.72933, 37.74585, 
37.75051, 37.71287, 37.72912, 37.72924, 37.72942, 37.72931, 37.72922, 
37.72158, 37.72931, 37.76327, 37.77759, 37.72931), longitude = c(-0.71772, 
-0.7059, -0.70633, -0.70623, -0.70872, -0.70676, -0.70895, -0.70347, 
-0.70258, -0.70034, -0.69904, -0.70021, -0.70486, -0.70495, -0.70606, 
-0.70636, -0.70615, -0.70418, -0.70588, -0.70322, -0.70501, -0.70643, 
-0.71751, -0.70493, -0.7049, -0.70471, -0.71993, -0.70301, -0.70871, 
-0.70643, -0.70493, -0.72041, -0.7063, -0.70635, -0.70619, -0.70494, 
-0.70627, -0.70627, -0.70626, -0.70992, -0.70627, -0.70703, -0.7049, 
-0.70925, -0.70639, -0.70672, -0.70638, -0.70725, -0.70666, -0.70652, 
-0.70681, -0.70675, -0.70661, -0.70673, -0.70669, -0.70677, -0.70648, 
-0.70656, -0.70648, -0.70655, -0.70674, -0.7066, -0.71919, -0.70615, 
-0.73799, -0.70658, -0.71353, -0.71072, -0.70673, -0.70643, -0.70628, 
-0.70622, -0.70639, -0.70627, -0.70635, -0.70643, -0.70646, -0.72753, 
-0.71354, -0.7062, -0.7063, -0.70623, -0.70643, -0.71149, -0.70928, 
-0.72652, -0.70652, -0.71616, -0.71856, -0.73371, -0.70643, -0.70613, 
-0.70634, -0.70617, -0.70624, -0.71741, -0.70626, -0.73031, -0.73075, 
-0.70641)), row.names = c(NA, -100L), class = c("data.table", 
"data.frame"))

Solution

  • When you run:

    on_land <- st_intersects(pts, land_bbox, sparse = FALSE)[, 1]
    

    you are only retrieving the first column of the intersects() matrix. In other words, you are only testing whether each point intersects a single feature from land_bbox (a feature/polygon that none of your points intersect with).

    You need something like:

    on_land <- lengths(st_intersects(pts, land_bbox)) > 0
    
    on_land
     #  [1] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE
     # [14] FALSE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE
     # [27] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
     # [40] FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
     # [53]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE
     # [66] FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE
     # [79] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE
     # [92] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
    

    To illustrate:

    library(leaflet)
    library(sf)
    
    # Either
    on_land <- lengths(st_intersects(pts, land_bbox)) > 0
    pts_os <- pts[!on_land, ]
    
    leaflet() %>%
      addTiles() %>%
      addMarkers(data = pts_os)
    

    leaflet map showing all points now at sea