rspatialshapefilenearest-neighbor

How to Find the Polygons Outside of a Specified Distance


I am trying to get summary statistics for districts that are outside a specified distance from other districts. I have been able to use dnearneig() to find summary statistics for districts within a specified distance, but I have been unable to figure out how to either convert this for the outside case, or use the results from dnearneigh() as an input to tell R "don't consider these districts". I have also tried using st_distance()/st_is_within_distance() but haven't had any luck. Any advice would be appreciated.

This is the code I have so far for the within a specified distance case.

packages <- c("readxl",  #Allows us to read in excel files
             "writexl", #Allows us to write to excel files 
             "haven",  #
             "stargazer",
             "tidyr",
             "dplyr",
             "labelled",
             "tidyverse",
             "expss", 
             "Hmisc",
             "foreign",
             "spdep",
             "sf",
             "qpcR",
             "plm",
             "ivreg",
             "sjlabelled",
             "Statamarkdown",
             "vctrs")
# library(tables)
# rm(list=setdiff(ls(), c("df_jslc_2000_2019","df_jslc_allyears")))

pacman::p_load(packages, character.only = TRUE)

 df_year <- df[df$YEAR==year,]
 # print(df_year)
 shp_centroid_3 <- st_centroid(df_year$geometry)
 # print(shp_centroid_3)
 print("we made it 2 lines")
 # within_km_10_pre<- st_is_within_distance(df_year$geometry,df_year$geometry,10000)
 ##Filter out self
 # within_km_10 <- within_km_10_pre[which(lengths(within_km_10_pre) >1)]
 within_km_10 <- dnearneigh(shp_centroid_3,5000,10000)

 print(within_km_10)
 shp_centroid_3$TOT_EXP <- df_year$TOTAL_E
 shp_centroid_3$Z_SCORE <- df_year$Z_b_100
 shp_centroid_3$Z_BEACH_100_ORIGINAL <- df_year$Z__100_
 shp_centroid_3$Z_WATER <- df_year$Z_WATER
 shp_centroid_3$Z_WAT_SC <- df_year$Z_WAT_S
 
 
 tourism_10km_mean1 <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$TOT_EXP[v],na.rm=TRUE)}))
 # print(tourism_10km_mean1)
 tourism_10km_sum <- unlist(lapply(within_km_10, \(v){sum(shp_centroid_3$TOT_EXP[v],na.rm = TRUE)}))
 tourism_10km_score <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$Z_SCORE[v],na.rm=TRUE)}))
 tourism_10km_score_beach_original <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$Z_BEACH_100_ORIGINAL[v],na.rm=TRUE)}))
 tourism_10km_water_zscore <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$Z_WATER[v],na.rm=TRUE)}))
 tourism_10km_water_zscore_scaled <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$Z_WAT_SC[v],na.rm=TRUE)}))

Solution

  • Short answer: you can negate indices returned by st_is_within_distance() to subset everything that was not within distance.

    Longer answer is a slightly modified variant of the one left for one of your previous questions, Finding Subset of Polygon centroids Which Are Within A Certain Distance of the Centroids of Other Polygons in R , this time a bit less verbose.

    Steps:
    library(sf)
    #> Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
    library(dplyr, warn.conflicts = FALSE)
    library(ggplot2)
    
    nc <- read_sf(system.file("shape/nc.shp", package="sf")) %>% 
      select(NAME, AREA, starts_with("BIR")) %>% 
      mutate(cntr = st_centroid(geometry),
             within_dist = st_is_within_distance(cntr, dist = 50000))
    
    # ignore geometries for now for more compact output
    nc_df <- st_drop_geometry(nc) %>% select(-cntr)
    
    # build a nested tibble, each county row gets a 
    # tibble of all counties outside of distance, use rowwise grouping to 
    # subset nc_df with "-within_dist" (negated within_dist) of current row
    nc_nested <- nc_df %>% 
      rowwise() %>%
      mutate(out_dist = (nc_df[-within_dist, ]) %>% select(-within_dist) %>% list()) %>% 
      # add some summary stats from nested tibbles:
      mutate(out_bir74_mean = mean(out_dist$BIR74),
             out_bir79_mean = mean(out_dist$BIR79)) %>% 
      ungroup() %>% 
      select(-within_dist)
    nc_nested
    #> # A tibble: 100 × 7
    #>    NAME         AREA BIR74 BIR79 out_dist          out_bir74_mean out_bir79_mean
    #>    <chr>       <dbl> <dbl> <dbl> <list>                     <dbl>          <dbl>
    #>  1 Ashe        0.114  1091  1364 <tibble [96 × 4]>          3374.          4323.
    #>  2 Alleghany   0.061   487   542 <tibble [96 × 4]>          3355.          4304.
    #>  3 Surry       0.143  3188  3616 <tibble [95 × 4]>          3371.          4325.
    #>  4 Currituck   0.07    508   830 <tibble [96 × 4]>          3407.          4357.
    #>  5 Northampton 0.153  1421  1606 <tibble [97 × 4]>          3335.          4273.
    #>  6 Hertford    0.097  1452  1838 <tibble [95 × 4]>          3417.          4377.
    #>  7 Camden      0.062   286   350 <tibble [94 × 4]>          3467.          4434.
    #>  8 Gates       0.091   420   594 <tibble [93 × 4]>          3480.          4453.
    #>  9 Warren      0.118   968  1190 <tibble [95 × 4]>          3345.          4284.
    #> 10 Stokes      0.124  1612  2038 <tibble [95 × 4]>          3238.          4148.
    #> # ℹ 90 more rows
    

    Visualise "outside" geometries:

    # collect details for plot
    lee = tibble::lst(
      polygon = nc[nc$NAME == "Lee","geometry"],
      within_range = st_centroid(polygon) %>% st_buffer(50000),
      outside_idx = -unlist(nc$within_dist[nc$NAME == "Lee"])
    )
    
    ggplot() +
      geom_sf(data = nc, fill = NA) +
      geom_sf(data = nc[lee$outside_idx, ], aes(fill = "outside of dist."), alpha = .5) +
      geom_sf(data = lee$within_range, fill = "green", alpha = .2) +
      geom_sf(data = lee$polygon, aes(fill = "Lee")) +
      geom_sf(data = nc, aes(geometry = cntr)) +
      scale_fill_manual(values = c("red", "gold"), name = NULL) +
      theme(legend.position = "bottom")
    

    Created on 2023-08-31 with reprex v2.0.2