rgisgeospatial

R for GIS - calculating proximal densities


I recently started using R for GIS applications and need assistance with calculating proximal densities, a concept I am mostly unfamiliar with. To illustrate, suppose I am provided with schools and playgrounds location data for which I would like to calculate proximal densities at 0.25-mile, 0.50-mile, 0.75-mile, and 1-mile radii. How can this be achieved using R program? This is what I have tried so far.

# load required packages
library(sp)
library(sf)

# create standard rectangular non-spatial data frames
schools <- data.frame(Location = c("Albertville, AL", "Albertville, AL", "Albertville, AL", "Albertville, AL", "Albertville, AL", "Albertville, AL"), Place = c("Albertville Middle School", "Albertville High School", "Albertville Intermediate School", "Albertville Elementary School", "Albertville Kindergarten and PreK", "Albertville Primary School"), Lat = c(34.25996, 34.260509, 34.27279, 34.25182, 34.29161, 34.25321), Long = c(-86.2074, -86.205116, -86.220818, -86.22271, -86.19384, -86.22192))
playgrounds <- data.frame(Location = c("Albertville, AL", "Albertville, AL", "Albertville, AL"), Place = c("Graham Park", "Sand Mountain Park", "Pecan Grove Playground"), Lat = c(34.26358, 34.275471, 34.27294), Long = c(-86.20289, -86.229111, -86.2264))

# define the coordinates
schools_coords <- cbind(schools$Long, schools$Lat)
playgrounds_coords <- cbind(playgrounds$Long, playgrounds$Lat)

# create the SpatialPointsDataFrame
schools_sp <- SpatialPointsDataFrame(schools_coords, data = schools, proj4string = CRS("+proj= longlat"))
playgrounds_sp <- SpatialPointsDataFrame(playgrounds_coords, data = playgrounds, proj4string = CRS("+proj= longlat"))

# convert to sf
schools_sf <- st_as_sf(schools_sp)
playgrounds_sf <- st_as_sf(playgrounds_sp)

Solution

  • The workflow:

    It is necessary to project your data to a projected coordinate system (PCS) to ensure the buffer area calculations remain relatively constant for your buffers across the extent of your research area. If your data are at national level, or extend across multiple UTM zones, you may need to run separate calculations for each UTM zone. You could also pre-calculate area values and assign them to the buff* files, but let's keep things simple-ish for now.

    library(sf)
    library(dplyr)
    library(tidyr)
    
    # Your sample data
    schools <- data.frame(
      Location = c("Albertville, AL", "Albertville, AL", "Albertville, AL", "Albertville, AL", "Albertville, AL", "Albertville, AL"),
      Place = c("Albertville Middle School", "Albertville High School", "Albertville Intermediate School", "Albertville Elementary School", "Albertville Kindergarten and PreK", "Albertville Primary School"),
      Lat = c(34.25996, 34.260509, 34.27279, 34.25182, 34.29161, 34.25321),
      Long = c(-86.2074, -86.205116, -86.220818, -86.22271, -86.19384, -86.22192)
      )
    
    playgrounds <- data.frame(
      Location = c("Albertville, AL", "Albertville, AL", "Albertville, AL"),
      Place = c("Graham Park", "Sand Mountain Park", "Pecan Grove Playground"),
      Lat = c(34.26358, 34.275471, 34.27294),
      Long = c(-86.20289, -86.229111, -86.2264)
      )
    
    # Get UTM zone 
    floor((schools$Long + 180) / 6) + 1
    # [1] 16 16 16 16 16 16
    floor((playgrounds$Long + 180) / 6) + 1
    # [1] 16 16 16
    
    # Create sf objects and project to correct UTM zone (or whatever CRS is appropriate)
    # EPSG codes can be found @ https://epsg.io/
    sf_schools <- schools %>%
      st_as_sf(coords = c("Long", "Lat"), crs = 4326) %>%
      st_transform(32616)
    
    sf_playgrounds <- playgrounds %>%
      st_as_sf(coords = c("Long", "Lat"), crs = 4326) %>%
      st_transform(32616)
    
    # School buffers
    buff25 <- st_buffer(sf_schools, 400) %>%
      st_join(., sf_playgrounds, join = st_contains) %>%
      group_by(Place.x) %>%
      summarise(pg_count = sum(!is.na(Place.y)), .groups = 'drop') %>%
      mutate(buff = "buff025")
    
    buff50 <- st_buffer(sf_schools, 800) %>%
      st_join(., sf_playgrounds, join = st_contains) %>%
      group_by(Place.x) %>%
      summarise(pg_count = sum(!is.na(Place.y)), .groups = 'drop') %>%
      mutate(buff = "buff050")
    
    buff75 <- st_buffer(sf_schools, 1200) %>%
      st_join(., sf_playgrounds, join = st_contains) %>%
      group_by(Place.x) %>%
      summarise(pg_count = sum(!is.na(Place.y)), .groups = 'drop') %>%
      mutate(buff = "buff075")
    
    buff10 <- st_buffer(sf_schools, 1600) %>%
      st_join(., sf_playgrounds, join = st_contains) %>%
      group_by(Place.x) %>%
      summarise(pg_count = sum(!is.na(Place.y)), .groups = 'drop') %>%
      mutate(buff = "buff100")
    
    # Combine buffers, calculate area as square miles, calculate proximal density,
    # convert to dataframe, pivot to wide and reorder buffers from smallest to largest
    buff_all <- do.call(rbind, mget(ls(pattern = "^buff\\d{2}$"))) %>%
      mutate(area_sq_mile = as.numeric(st_area(.)) * 3.861e-7,
             pd = pg_count / area_sq_mile) %>%
      rename(Place = Place.x) %>%
      data.frame() %>%
      pivot_wider(id_cols = Place,
                  names_from = buff,
                  values_from = pd) %>%
      relocate(buff100, .after = last_col())
    
    buff_all
    # # A tibble: 6 × 5
    #   Place                             buff025 buff050 buff075 buff100
    #   <chr>                               <dbl>   <dbl>   <dbl>   <dbl>
    # 1 Albertville Elementary School        0       0      0       0    
    # 2 Albertville High School              5.16    1.29   0.573   0.322
    # 3 Albertville Intermediate School      0       1.29   1.15    0.644
    # 4 Albertville Kindergarten and PreK    0       0      0       0    
    # 5 Albertville Middle School            0       1.29   0.573   0.322
    # 6 Albertville Primary School           0       0      0       0
    

    The values in the buff* columns represent the proximal density of playgrounds around each school per square mile.