rdataframedplyrgeosphere

Calculate multiple point distance matrices with dplyr


I know that there is a similar question about that here Calculating geographic distances to data points with dplyr::mutate, but none of the answers really helped me to calculate it correctly.

I have something like the following data.frame:

dt <- data.frame(
  stringsAsFactors = FALSE,
  date = c(
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02",
    "2022-05-02"
  ),
  bird_ID = c(
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E",
    "350E"
  ),
  Position_Burst_ID = c(
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2,
    2
  ),
  device_ID = c(
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L,
    202927L
  ),
  devicetype = c(
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela",
    "ornitela"
  ),
  timestamp = c(
    "2022-05-02 00:03:58",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:03:59",
    "2022-05-02 00:09:00",
    "2022-05-02 00:09:01",
    "2022-05-02 00:09:01",
    "2022-05-02 00:09:01",
    "2022-05-02 00:09:01",
    "2022-05-02 00:09:01",
    "2022-05-02 00:09:01",
    "2022-05-02 00:09:01",
    "2022-05-02 00:09:01"
  ),
  sensortype = c(
    "GPS",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "GPS",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC",
    "ACC"
  ),
  GPS_ID = c(
    132933L,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    132934L,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA
  ),
  altitude_msl = c(
    37L,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    37L,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA
  ),
  latitude = c(
    64.6706237792969,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    64.6706771850586,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA
  ),
  longitude = c(
    40.1550903320312,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    40.1552696228027,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA
  ),
  ETRS89_y = c(
    4926797.58514714,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    4926806.57855044,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA
  ),
  ETRS89_x = c(
    5717023.89306346,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    5717028.86622297,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA
  ),
  ACC_ID = c(
    NA,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    2591809L,
    NA,
    2591849L,
    2591849L,
    2591849L,
    2591849L,
    2591849L,
    2591849L,
    2591849L,
    2591849L
  ),
  acc_x = c(
    NA,
    -532,
    -873,
    -562,
    -667,-728,
    -552,
    -999,
    -801,
    -555,-899,
    -788,
    -808,
    -652,-806,
    -1202,
    -707,
    -854,
    -857,-954,
    -798,
    -1044,
    -558,-1047,
    -748,
    -1114,
    -789,
    -896,-692,
    -1153,
    -637,
    -714,-920,
    -957,
    -610,
    -525,
    -1191,-593,
    -417,
    -627,
    -905,
    NA,-922,
    -691,
    -601,
    -154,-1022,
    66,
    -195,
    -587
  ),
  acc_y = c(
    NA,
    651,
    815,
    454,
    410,
    625,
    431,
    344,
    335,
    490,
    319,
    441,
    223,
    446,
    111,
    250,
    207,
    242,
    111,
    136,
    303,
    141,
    145,
    248,
    223,
    242,
    97,
    339,
    335,
    161,
    146,
    537,
    175,
    539,-29,
    799,
    -107,
    167,
    1087,
    618,
    383,
    NA,
    484,
    527,
    720,
    1199,
    537,
    911,
    1130,
    748
  ),
  acc_z = c(
    NA,
    -993,
    -732,
    -97,
    121,-529,
    -230,
    -401,
    -266,
    -881,-170,
    -321,
    -58,
    -1041,
    -117,-753,
    -821,
    -289,
    -85,
    -401,-1022,
    -97,
    78,
    -465,
    -745,-353,
    29,
    -177,
    -924,
    -241,
    112,
    -417,
    -399,
    -545,
    588,-689,
    91,
    127,
    -1356,
    -353,-187,
    NA,
    -337,
    -438,
    -65,-1163,
    -145,
    -474,
    -417,
    132
  ),
  Position.Burst.ID.length = c(
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L,
    40L
  ),
  ODBA = c(
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    25.79384765625,
    31.129052734375,
    31.129052734375,
    31.129052734375,
    31.129052734375,
    31.129052734375,
    31.129052734375,
    31.129052734375,
    31.129052734375,
    31.129052734375
  ),
  latitude_m = c(
    7199133.83911133,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    7199139.78424072,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA
  ),
  longitude_m = c(
    -1183571.81342411,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    -1183807.30305938,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA,
    NA
  ),
  ODBA.median = c(
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875,
    20.672607421875
  ),
  latitude.sd = c(
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538,
    515.425830060538
  )
)
head(dt)
#>         date bird_ID Position_Burst_ID device_ID devicetype           timestamp
#> 1 2022-05-02    350E                 1    202927   ornitela 2022-05-02 00:03:58
#> 2 2022-05-02    350E                 1    202927   ornitela 2022-05-02 00:03:59
#> 3 2022-05-02    350E                 1    202927   ornitela 2022-05-02 00:03:59
#> 4 2022-05-02    350E                 1    202927   ornitela 2022-05-02 00:03:59
#> 5 2022-05-02    350E                 1    202927   ornitela 2022-05-02 00:03:59
#> 6 2022-05-02    350E                 1    202927   ornitela 2022-05-02 00:03:59
#>   sensortype GPS_ID altitude_msl latitude longitude ETRS89_y ETRS89_x  ACC_ID
#> 1        GPS 132933           37 64.67062  40.15509  4926798  5717024      NA
#> 2        ACC     NA           NA       NA        NA       NA       NA 2591809
#> 3        ACC     NA           NA       NA        NA       NA       NA 2591809
#> 4        ACC     NA           NA       NA        NA       NA       NA 2591809
#> 5        ACC     NA           NA       NA        NA       NA       NA 2591809
#> 6        ACC     NA           NA       NA        NA       NA       NA 2591809
#>   acc_x acc_y acc_z Position.Burst.ID.length     ODBA latitude_m longitude_m
#> 1    NA    NA    NA                       40 25.79385    7199134    -1183572
#> 2  -532   651  -993                       40 25.79385         NA          NA
#> 3  -873   815  -732                       40 25.79385         NA          NA
#> 4  -562   454   -97                       40 25.79385         NA          NA
#> 5  -667   410   121                       40 25.79385         NA          NA
#> 6  -728   625  -529                       40 25.79385         NA          NA
#>   ODBA.median latitude.sd
#> 1    20.67261    515.4258
#> 2    20.67261    515.4258
#> 3    20.67261    515.4258
#> 4    20.67261    515.4258
#> 5    20.67261    515.4258
#> 6    20.67261    515.4258

Created on 2022-11-10 with reprex v2.0.2

And the following tibble:

locations <- tibble::tribble(
  ~bird_ID,   ~latitude.mean,         ~latitude.sd,  ~longitude.mean,        ~longitude.sd,
    "350E", 68.4523207306103, 0.000197644989661478,  52.122216437273, 0.000714616838312651,
    "351E", 70.6687025782724, 0.000217284948426624, 56.5641610797428, 0.000630191007951335,
    "373E", 69.1366802085825, 0.000136364833873454, 49.9669871427575, 0.000195497865494354,
    "375E", 67.8441058291665,   0.0236158366819919, 53.1982619611523,   0.0532271169756873,
    "379E", 70.8926167721249, 9.62097052940481e-05, 55.4907074886489, 0.000214362572617005,
    "383E",  68.454480489095, 3.83707104847611e-05, 51.5960375467936, 0.000212207883139862
  )
head(locations)
#> # A tibble: 6 × 5
#>   bird_ID latitude.mean latitude.sd longitude.mean longitude.sd
#>   <chr>           <dbl>       <dbl>          <dbl>        <dbl>
#> 1 350E             68.5   0.000198            52.1     0.000715
#> 2 351E             70.7   0.000217            56.6     0.000630
#> 3 373E             69.1   0.000136            50.0     0.000195
#> 4 375E             67.8   0.0236              53.2     0.0532  
#> 5 379E             70.9   0.0000962           55.5     0.000214
#> 6 383E             68.5   0.0000384           51.6     0.000212

Created on 2022-11-10 with reprex v2.0.2

The idea is that I want to create a geographical distance matrix around the possible locations from the locations tibble. I want to be able to calculate the distance between the points in the latitude and longitude columns from the dt. I was using the following piece of code that I thought worked fine, until I run each bird_ID separately and the results changed.

distances <- dt %>%
  group_by(bird_ID) %>%
  select(longitude, latitude, date) %>%
  mutate(Dist = distHaversine(cbind(locations$longitude.mean, locations$latitude.mean), cbind(longitude, latitude))) %>%
  drop_na()

The results for one bird_ID The results for all the the objects of bird_ID

Any ideas of how should I approach this?


Solution

  • If you want to calc distance by bird_ID, then you should likely left_join the locations onto the original data before calculating the distance.

    distances <- dt %>% 
      left_join(locations, by = "bird_ID") %>%   # NEW
      group_by(bird_ID) %>%
      select(bird_ID,                            # suppress an auto-add message
             longitude, latitude, date,
             longitude.mean, latitude.mean) %>%  # added so we have the data per-bird_ID
      mutate(
        Dist = geosphere::distHaversine(
          cbind(longitude.mean, latitude.mean),  # removed `location$`
          cbind(longitude, latitude))
      ) %>%
      tidyr::drop_na()
    # # A tibble: 2 x 7
    # # Groups:   bird_ID [1]
    #   bird_ID longitude latitude date       longitude.mean latitude.mean    Dist
    #   <chr>       <dbl>    <dbl> <chr>               <dbl>         <dbl>   <dbl>
    # 1 350E         40.2     64.7 2022-05-02           52.1          68.5 674850.
    # 2 350E         40.2     64.7 2022-05-02           52.1          68.5 674840.