rtidyversegeocodingnearest-neighborzipcode

Number of locations within radius based on longitude and latitude in dataframe format in R


I have a dataframe from almost all zipcodes of Germany.

# German Zip 
Germany <- read.csv("https://gist.githubusercontent.com/MarcoKuehne/a012325ef8a9fa33aaa943dbc3db4ba9/raw/8616404bece8d405553d36380c7242fab37043d9/zipcodes.germany.csv", sep = ";")

head(Germany) 
id loc_id zipcode                                         name      lat       lon
1  1  14308   19348                          Berge bei Perleberg 53.23746 11.870770
2  2  22537   85309                                     Pörnbach 48.61670 11.466700
3  3 106968   24790 Osterrönfeld Heidkrug, Gemeinde Osterrönfeld 54.27536  9.737535
4  4  18324   98646                               Hildburghausen 50.43950 10.723922
5  5  16590   27336                           Frankenfeld, Aller 52.76951  9.430780
6  6  19092   19294                                       Karenz 53.23012 11.343840

and a dataframe of particular places/locations in Germany, e.g. blood donation center, both with their respective longitude and latitude information:

# German Blood Donation 
Blooddonation <- read.csv("https://gist.githubusercontent.com/MarcoKuehne/95cc459b81f2bc6bec2f2b46d1f6273a/raw/2b1c77fe5cf1203ca105b7f61019bb390335db8e/LocationsUpdate.csv", sep=",")
head(Blooddonation)

                                             title   zip      lat      lon
1 Haema Blutspendezentrum Dresden-World Trade Center 01067 51.04807  13.7238
2                    Octapharma Plasmaspende Dresden 01067 51.04932 13.73557
3                             Haema Dresden Elbepark 01139 51.08232   13.696
4                       DRK-Blutspendedienst Dresden 01307 51.05294 13.78027
5      Haema Blutspendezentrum Dresden-Fetscherplatz 01307 51.04654 13.77047
6                    Haema Blutspendezentrum Görlitz 02826 51.15275 14.98878

How can I find the number of neighbour locations (blood donation centers) within a radius of e.g. 10km, 20km from each zipcode in Germany and store the result as a variable in my Germany dataframe.

Is there a tidyverse (tidy) solution such that the results are stored as variable in a dataframe?


Solution

  • With sf and distance matrix:

    library(dplyr)
    library(sf)
    
    ger_sf <- st_as_sf(Germany, coords = c("lon", "lat"), crs = "WGS84")
    bd_sf <- st_as_sf(Blooddonation, coords = c("lon", "lat"), crs = "WGS84")
    
    # distance matrix in km with units dropped
    # rows: locations from Germany
    # cols: locations from Blooddonation
    distm_km <- st_distance(ger_sf, bd_sf) %>% 
      units::set_units("km") %>% 
      units::drop_units()
    
    distm_km[1:8, 1:8]
    #>          [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]     [,8]
    #> [1,] 274.3081 274.5550 270.0329 275.6236 275.9407 314.4278 313.1224 234.2907
    #> [2,] 315.0837 315.6358 317.3197 317.6389 316.6712 378.3292 378.1703 404.2936
    #> [3,] 448.2405 448.6026 444.0099 450.0901 450.2696 495.4185 494.1030 415.7224
    #> [4,] 221.6435 222.4717 220.9114 225.5819 224.7243 310.0170 309.1489 291.4043
    #> [5,] 351.1441 351.7419 347.3852 354.0903 353.9269 420.9728 419.6516 352.0349
    #> [6,] 291.9424 292.2731 287.6897 293.6504 293.8668 339.1699 337.8463 260.3578
    #> [7,] 272.3777 272.5296 268.1349 273.2452 273.6690 303.6681 302.3973 222.6707
    #> [8,] 158.5451 159.3540 156.3434 162.4375 161.8107 246.2483 245.1566 210.5743
    dim(distm_km)
    #> [1] 17367   248
    
    # rowSums() to count values matching condition across each row in the matrix
    Germany <- Germany %>% 
      mutate(within10km = rowSums(distm_km <= 10),
             within20km = rowSums(distm_km <= 20))
    

    Results :

    as_tibble(Germany)
    #> # A tibble: 17,367 × 8
    #>       id loc_id zipcode name                           lat   lon withi…¹ withi…²
    #>    <int>  <int>   <int> <chr>                        <dbl> <dbl>   <dbl>   <dbl>
    #>  1     1  14308   19348 Berge bei Perleberg           53.2 11.9        0       0
    #>  2     2  22537   85309 Pörnbach                      48.6 11.5        0       1
    #>  3     3 106968   24790 Osterrönfeld Heidkrug, Geme…  54.3  9.74       0       0
    #>  4     4  18324   98646 Hildburghausen                50.4 10.7        0       1
    #>  5     5  16590   27336 Frankenfeld, Aller            52.8  9.43       0       0
    #>  6     6  19092   19294 Karenz                        53.2 11.3        0       1
    #>  7     7 144118   19395 Wendisch Priborn Tönchow      53.3 12.3        0       0
    #>  8     8  16355   99628 Eßleben-Teutleben             51.1 11.5        0       0
    #>  9     9  25953   38486 Wenze                         52.6 11.1        0       0
    #> 10    10  21836   72622 Nürtingen                     48.6  9.35       0       0
    #> # … with 17,357 more rows, and abbreviated variable names ¹​within10km,
    #> #   ²​within20km
    

    Inuput:

    library(httr)
    library(stringr)
    
    Germany <- read.csv("https://gist.githubusercontent.com/MarcoKuehne/a012325ef8a9fa33aaa943dbc3db4ba9/raw/8616404bece8d405553d36380c7242fab37043d9/zipcodes.germany.csv", sep = ";")
    Blooddonation <- GET('https://www.blutspenden.de/blutspendedienste/#') %>% 
      content(as = "text") %>% 
      str_match("var instituionsmap_data = '(.*)'") %>% 
      .[, 2] %>% 
      jsonlite::parse_json(simplifyVector = T) %>% 
      select(title, street, number, zip, city, lat, lon)
    

    Created on 2023-01-15 with reprex v2.0.2