How to Find the Polygons Outside of a Specified Distance

I am trying to get summary statistics for districts that are outside a specified distance from other districts. I have been able to use dnearneig() to find summary statistics for districts within a specified distance, but I have been unable to figure out how to either convert this for the outside case, or use the results from dnearneigh() as an input to tell R "don't consider these districts". I have also tried using st_distance()/st_is_within_distance() but haven't had any luck. Any advice would be appreciated.

This is the code I have so far for the within a specified distance case.

packages <- c("readxl",  #Allows us to read in excel files
             "writexl", #Allows us to write to excel files 
             "haven",  #
             "stargazer",
             "tidyr",
             "dplyr",
             "labelled",
             "tidyverse",
             "expss", 
             "Hmisc",
             "foreign",
             "spdep",
             "sf",
             "qpcR",
             "plm",
             "ivreg",
             "sjlabelled",
             "Statamarkdown",
             "vctrs")
# library(tables)
# rm(list=setdiff(ls(), c("df_jslc_2000_2019","df_jslc_allyears")))

pacman::p_load(packages, character.only = TRUE)

 df_year <- df[df$YEAR==year,]
 # print(df_year)
 shp_centroid_3 <- st_centroid(df_year$geometry)
 # print(shp_centroid_3)
 print("we made it 2 lines")
 # within_km_10_pre<- st_is_within_distance(df_year$geometry,df_year$geometry,10000)
 ##Filter out self
 # within_km_10 <- within_km_10_pre[which(lengths(within_km_10_pre) >1)]
 within_km_10 <- dnearneigh(shp_centroid_3,5000,10000)

 print(within_km_10)
 shp_centroid_3$TOT_EXP <- df_year$TOTAL_E
 shp_centroid_3$Z_SCORE <- df_year$Z_b_100
 shp_centroid_3$Z_BEACH_100_ORIGINAL <- df_year$Z__100_
 shp_centroid_3$Z_WATER <- df_year$Z_WATER
 shp_centroid_3$Z_WAT_SC <- df_year$Z_WAT_S
 
 
 tourism_10km_mean1 <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$TOT_EXP[v],na.rm=TRUE)}))
 # print(tourism_10km_mean1)
 tourism_10km_sum <- unlist(lapply(within_km_10, \(v){sum(shp_centroid_3$TOT_EXP[v],na.rm = TRUE)}))
 tourism_10km_score <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$Z_SCORE[v],na.rm=TRUE)}))
 tourism_10km_score_beach_original <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$Z_BEACH_100_ORIGINAL[v],na.rm=TRUE)}))
 tourism_10km_water_zscore <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$Z_WATER[v],na.rm=TRUE)}))
 tourism_10km_water_zscore_scaled <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$Z_WAT_SC[v],na.rm=TRUE)}))

Solution

Short answer: you can negate indices returned by st_is_within_distance() to subset everything that was not within distance.

Longer answer is a slightly modified variant of the one left for one of your previous questions, Finding Subset of Polygon centroids Which Are Within A Certain Distance of the Centroids of Other Polygons in R , this time a bit less verbose.

Steps:

nc dataset from sfpackage as an example
get polygon centroids
st_is_within_distance(..., dist = 50000) on centroids to build a list of "within" indices for every record, 50000m is just more suitable for this particular example dataset
build nested dataset where every record gets a tibble of records that are NOT within distance, subset with negated "within" indices; assume it's only for attribute statistics and exclude all geometries
collect some example statistics from nested tibbles

library(sf)
#> Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(dplyr, warn.conflicts = FALSE)
library(ggplot2)

nc <- read_sf(system.file("shape/nc.shp", package="sf")) %>% 
  select(NAME, AREA, starts_with("BIR")) %>% 
  mutate(cntr = st_centroid(geometry),
         within_dist = st_is_within_distance(cntr, dist = 50000))

# ignore geometries for now for more compact output
nc_df <- st_drop_geometry(nc) %>% select(-cntr)

# build a nested tibble, each county row gets a 
# tibble of all counties outside of distance, use rowwise grouping to 
# subset nc_df with "-within_dist" (negated within_dist) of current row
nc_nested <- nc_df %>% 
  rowwise() %>%
  mutate(out_dist = (nc_df[-within_dist, ]) %>% select(-within_dist) %>% list()) %>% 
  # add some summary stats from nested tibbles:
  mutate(out_bir74_mean = mean(out_dist$BIR74),
         out_bir79_mean = mean(out_dist$BIR79)) %>% 
  ungroup() %>% 
  select(-within_dist)
nc_nested
#> # A tibble: 100 × 7
#>    NAME         AREA BIR74 BIR79 out_dist          out_bir74_mean out_bir79_mean
#>    <chr>       <dbl> <dbl> <dbl> <list>                     <dbl>          <dbl>
#>  1 Ashe        0.114  1091  1364 <tibble [96 × 4]>          3374.          4323.
#>  2 Alleghany   0.061   487   542 <tibble [96 × 4]>          3355.          4304.
#>  3 Surry       0.143  3188  3616 <tibble [95 × 4]>          3371.          4325.
#>  4 Currituck   0.07    508   830 <tibble [96 × 4]>          3407.          4357.
#>  5 Northampton 0.153  1421  1606 <tibble [97 × 4]>          3335.          4273.
#>  6 Hertford    0.097  1452  1838 <tibble [95 × 4]>          3417.          4377.
#>  7 Camden      0.062   286   350 <tibble [94 × 4]>          3467.          4434.
#>  8 Gates       0.091   420   594 <tibble [93 × 4]>          3480.          4453.
#>  9 Warren      0.118   968  1190 <tibble [95 × 4]>          3345.          4284.
#> 10 Stokes      0.124  1612  2038 <tibble [95 × 4]>          3238.          4148.
#> # ℹ 90 more rows

Visualise "outside" geometries:

# collect details for plot
lee = tibble::lst(
  polygon = nc[nc$NAME == "Lee","geometry"],
  within_range = st_centroid(polygon) %>% st_buffer(50000),
  outside_idx = -unlist(nc$within_dist[nc$NAME == "Lee"])
)

ggplot() +
  geom_sf(data = nc, fill = NA) +
  geom_sf(data = nc[lee$outside_idx, ], aes(fill = "outside of dist."), alpha = .5) +
  geom_sf(data = lee$within_range, fill = "green", alpha = .2) +
  geom_sf(data = lee$polygon, aes(fill = "Lee")) +
  geom_sf(data = nc, aes(geometry = cntr)) +
  scale_fill_manual(values = c("red", "gold"), name = NULL) +
  theme(legend.position = "bottom")

^{Created on 2023-08-31 with reprex v2.0.2}