I am trying to get summary statistics for districts that are outside a specified distance from other districts. I have been able to use dnearneig() to find summary statistics for districts within a specified distance, but I have been unable to figure out how to either convert this for the outside case, or use the results from dnearneigh() as an input to tell R "don't consider these districts". I have also tried using st_distance()/st_is_within_distance() but haven't had any luck. Any advice would be appreciated.
This is the code I have so far for the within a specified distance case.
packages <- c("readxl", #Allows us to read in excel files
"writexl", #Allows us to write to excel files
"haven", #
"stargazer",
"tidyr",
"dplyr",
"labelled",
"tidyverse",
"expss",
"Hmisc",
"foreign",
"spdep",
"sf",
"qpcR",
"plm",
"ivreg",
"sjlabelled",
"Statamarkdown",
"vctrs")
# library(tables)
# rm(list=setdiff(ls(), c("df_jslc_2000_2019","df_jslc_allyears")))
pacman::p_load(packages, character.only = TRUE)
df_year <- df[df$YEAR==year,]
# print(df_year)
shp_centroid_3 <- st_centroid(df_year$geometry)
# print(shp_centroid_3)
print("we made it 2 lines")
# within_km_10_pre<- st_is_within_distance(df_year$geometry,df_year$geometry,10000)
##Filter out self
# within_km_10 <- within_km_10_pre[which(lengths(within_km_10_pre) >1)]
within_km_10 <- dnearneigh(shp_centroid_3,5000,10000)
print(within_km_10)
shp_centroid_3$TOT_EXP <- df_year$TOTAL_E
shp_centroid_3$Z_SCORE <- df_year$Z_b_100
shp_centroid_3$Z_BEACH_100_ORIGINAL <- df_year$Z__100_
shp_centroid_3$Z_WATER <- df_year$Z_WATER
shp_centroid_3$Z_WAT_SC <- df_year$Z_WAT_S
tourism_10km_mean1 <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$TOT_EXP[v],na.rm=TRUE)}))
# print(tourism_10km_mean1)
tourism_10km_sum <- unlist(lapply(within_km_10, \(v){sum(shp_centroid_3$TOT_EXP[v],na.rm = TRUE)}))
tourism_10km_score <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$Z_SCORE[v],na.rm=TRUE)}))
tourism_10km_score_beach_original <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$Z_BEACH_100_ORIGINAL[v],na.rm=TRUE)}))
tourism_10km_water_zscore <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$Z_WATER[v],na.rm=TRUE)}))
tourism_10km_water_zscore_scaled <- unlist(lapply(within_km_10, \(v){mean(shp_centroid_3$Z_WAT_SC[v],na.rm=TRUE)}))
Short answer: you can negate indices returned by st_is_within_distance()
to subset everything that was not within distance.
Longer answer is a slightly modified variant of the one left for one of your previous questions, Finding Subset of Polygon centroids Which Are Within A Certain Distance of the Centroids of Other Polygons in R , this time a bit less verbose.
nc
dataset from sf
package as an examplest_is_within_distance(..., dist = 50000)
on centroids to build a list of "within" indices for every record, 50000m is just more suitable for this particular example datasetlibrary(sf)
#> Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(dplyr, warn.conflicts = FALSE)
library(ggplot2)
nc <- read_sf(system.file("shape/nc.shp", package="sf")) %>%
select(NAME, AREA, starts_with("BIR")) %>%
mutate(cntr = st_centroid(geometry),
within_dist = st_is_within_distance(cntr, dist = 50000))
# ignore geometries for now for more compact output
nc_df <- st_drop_geometry(nc) %>% select(-cntr)
# build a nested tibble, each county row gets a
# tibble of all counties outside of distance, use rowwise grouping to
# subset nc_df with "-within_dist" (negated within_dist) of current row
nc_nested <- nc_df %>%
rowwise() %>%
mutate(out_dist = (nc_df[-within_dist, ]) %>% select(-within_dist) %>% list()) %>%
# add some summary stats from nested tibbles:
mutate(out_bir74_mean = mean(out_dist$BIR74),
out_bir79_mean = mean(out_dist$BIR79)) %>%
ungroup() %>%
select(-within_dist)
nc_nested
#> # A tibble: 100 × 7
#> NAME AREA BIR74 BIR79 out_dist out_bir74_mean out_bir79_mean
#> <chr> <dbl> <dbl> <dbl> <list> <dbl> <dbl>
#> 1 Ashe 0.114 1091 1364 <tibble [96 × 4]> 3374. 4323.
#> 2 Alleghany 0.061 487 542 <tibble [96 × 4]> 3355. 4304.
#> 3 Surry 0.143 3188 3616 <tibble [95 × 4]> 3371. 4325.
#> 4 Currituck 0.07 508 830 <tibble [96 × 4]> 3407. 4357.
#> 5 Northampton 0.153 1421 1606 <tibble [97 × 4]> 3335. 4273.
#> 6 Hertford 0.097 1452 1838 <tibble [95 × 4]> 3417. 4377.
#> 7 Camden 0.062 286 350 <tibble [94 × 4]> 3467. 4434.
#> 8 Gates 0.091 420 594 <tibble [93 × 4]> 3480. 4453.
#> 9 Warren 0.118 968 1190 <tibble [95 × 4]> 3345. 4284.
#> 10 Stokes 0.124 1612 2038 <tibble [95 × 4]> 3238. 4148.
#> # ℹ 90 more rows
Visualise "outside" geometries:
# collect details for plot
lee = tibble::lst(
polygon = nc[nc$NAME == "Lee","geometry"],
within_range = st_centroid(polygon) %>% st_buffer(50000),
outside_idx = -unlist(nc$within_dist[nc$NAME == "Lee"])
)
ggplot() +
geom_sf(data = nc, fill = NA) +
geom_sf(data = nc[lee$outside_idx, ], aes(fill = "outside of dist."), alpha = .5) +
geom_sf(data = lee$within_range, fill = "green", alpha = .2) +
geom_sf(data = lee$polygon, aes(fill = "Lee")) +
geom_sf(data = nc, aes(geometry = cntr)) +
scale_fill_manual(values = c("red", "gold"), name = NULL) +
theme(legend.position = "bottom")
Created on 2023-08-31 with reprex v2.0.2