I know that there is a similar question about that here Calculating geographic distances to data points with dplyr::mutate, but none of the answers really helped me to calculate it correctly.
I have something like the following data.frame:
dt <- data.frame(
stringsAsFactors = FALSE,
date = c(
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02",
"2022-05-02"
),
bird_ID = c(
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E",
"350E"
),
Position_Burst_ID = c(
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
2,
2,
2,
2,
2,
2,
2,
2,
2
),
device_ID = c(
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L,
202927L
),
devicetype = c(
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela",
"ornitela"
),
timestamp = c(
"2022-05-02 00:03:58",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:03:59",
"2022-05-02 00:09:00",
"2022-05-02 00:09:01",
"2022-05-02 00:09:01",
"2022-05-02 00:09:01",
"2022-05-02 00:09:01",
"2022-05-02 00:09:01",
"2022-05-02 00:09:01",
"2022-05-02 00:09:01",
"2022-05-02 00:09:01"
),
sensortype = c(
"GPS",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"GPS",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC",
"ACC"
),
GPS_ID = c(
132933L,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
132934L,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA
),
altitude_msl = c(
37L,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
37L,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA
),
latitude = c(
64.6706237792969,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
64.6706771850586,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA
),
longitude = c(
40.1550903320312,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
40.1552696228027,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA
),
ETRS89_y = c(
4926797.58514714,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
4926806.57855044,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA
),
ETRS89_x = c(
5717023.89306346,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
5717028.86622297,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA
),
ACC_ID = c(
NA,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
2591809L,
NA,
2591849L,
2591849L,
2591849L,
2591849L,
2591849L,
2591849L,
2591849L,
2591849L
),
acc_x = c(
NA,
-532,
-873,
-562,
-667,-728,
-552,
-999,
-801,
-555,-899,
-788,
-808,
-652,-806,
-1202,
-707,
-854,
-857,-954,
-798,
-1044,
-558,-1047,
-748,
-1114,
-789,
-896,-692,
-1153,
-637,
-714,-920,
-957,
-610,
-525,
-1191,-593,
-417,
-627,
-905,
NA,-922,
-691,
-601,
-154,-1022,
66,
-195,
-587
),
acc_y = c(
NA,
651,
815,
454,
410,
625,
431,
344,
335,
490,
319,
441,
223,
446,
111,
250,
207,
242,
111,
136,
303,
141,
145,
248,
223,
242,
97,
339,
335,
161,
146,
537,
175,
539,-29,
799,
-107,
167,
1087,
618,
383,
NA,
484,
527,
720,
1199,
537,
911,
1130,
748
),
acc_z = c(
NA,
-993,
-732,
-97,
121,-529,
-230,
-401,
-266,
-881,-170,
-321,
-58,
-1041,
-117,-753,
-821,
-289,
-85,
-401,-1022,
-97,
78,
-465,
-745,-353,
29,
-177,
-924,
-241,
112,
-417,
-399,
-545,
588,-689,
91,
127,
-1356,
-353,-187,
NA,
-337,
-438,
-65,-1163,
-145,
-474,
-417,
132
),
Position.Burst.ID.length = c(
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L,
40L
),
ODBA = c(
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
25.79384765625,
31.129052734375,
31.129052734375,
31.129052734375,
31.129052734375,
31.129052734375,
31.129052734375,
31.129052734375,
31.129052734375,
31.129052734375
),
latitude_m = c(
7199133.83911133,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
7199139.78424072,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA
),
longitude_m = c(
-1183571.81342411,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
-1183807.30305938,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA
),
ODBA.median = c(
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875,
20.672607421875
),
latitude.sd = c(
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538,
515.425830060538
)
)
head(dt)
#> date bird_ID Position_Burst_ID device_ID devicetype timestamp
#> 1 2022-05-02 350E 1 202927 ornitela 2022-05-02 00:03:58
#> 2 2022-05-02 350E 1 202927 ornitela 2022-05-02 00:03:59
#> 3 2022-05-02 350E 1 202927 ornitela 2022-05-02 00:03:59
#> 4 2022-05-02 350E 1 202927 ornitela 2022-05-02 00:03:59
#> 5 2022-05-02 350E 1 202927 ornitela 2022-05-02 00:03:59
#> 6 2022-05-02 350E 1 202927 ornitela 2022-05-02 00:03:59
#> sensortype GPS_ID altitude_msl latitude longitude ETRS89_y ETRS89_x ACC_ID
#> 1 GPS 132933 37 64.67062 40.15509 4926798 5717024 NA
#> 2 ACC NA NA NA NA NA NA 2591809
#> 3 ACC NA NA NA NA NA NA 2591809
#> 4 ACC NA NA NA NA NA NA 2591809
#> 5 ACC NA NA NA NA NA NA 2591809
#> 6 ACC NA NA NA NA NA NA 2591809
#> acc_x acc_y acc_z Position.Burst.ID.length ODBA latitude_m longitude_m
#> 1 NA NA NA 40 25.79385 7199134 -1183572
#> 2 -532 651 -993 40 25.79385 NA NA
#> 3 -873 815 -732 40 25.79385 NA NA
#> 4 -562 454 -97 40 25.79385 NA NA
#> 5 -667 410 121 40 25.79385 NA NA
#> 6 -728 625 -529 40 25.79385 NA NA
#> ODBA.median latitude.sd
#> 1 20.67261 515.4258
#> 2 20.67261 515.4258
#> 3 20.67261 515.4258
#> 4 20.67261 515.4258
#> 5 20.67261 515.4258
#> 6 20.67261 515.4258
Created on 2022-11-10 with reprex v2.0.2
And the following tibble:
locations <- tibble::tribble(
~bird_ID, ~latitude.mean, ~latitude.sd, ~longitude.mean, ~longitude.sd,
"350E", 68.4523207306103, 0.000197644989661478, 52.122216437273, 0.000714616838312651,
"351E", 70.6687025782724, 0.000217284948426624, 56.5641610797428, 0.000630191007951335,
"373E", 69.1366802085825, 0.000136364833873454, 49.9669871427575, 0.000195497865494354,
"375E", 67.8441058291665, 0.0236158366819919, 53.1982619611523, 0.0532271169756873,
"379E", 70.8926167721249, 9.62097052940481e-05, 55.4907074886489, 0.000214362572617005,
"383E", 68.454480489095, 3.83707104847611e-05, 51.5960375467936, 0.000212207883139862
)
head(locations)
#> # A tibble: 6 × 5
#> bird_ID latitude.mean latitude.sd longitude.mean longitude.sd
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 350E 68.5 0.000198 52.1 0.000715
#> 2 351E 70.7 0.000217 56.6 0.000630
#> 3 373E 69.1 0.000136 50.0 0.000195
#> 4 375E 67.8 0.0236 53.2 0.0532
#> 5 379E 70.9 0.0000962 55.5 0.000214
#> 6 383E 68.5 0.0000384 51.6 0.000212
Created on 2022-11-10 with reprex v2.0.2
The idea is that I want to create a geographical distance matrix around the possible locations from the locations
tibble. I want to be able to calculate the distance between the points in the latitude
and longitude
columns from the dt
. I was using the following piece of code that I thought worked fine, until I run each bird_ID
separately and the results changed.
distances <- dt %>%
group_by(bird_ID) %>%
select(longitude, latitude, date) %>%
mutate(Dist = distHaversine(cbind(locations$longitude.mean, locations$latitude.mean), cbind(longitude, latitude))) %>%
drop_na()
Any ideas of how should I approach this?
If you want to calc distance by bird_ID
, then you should likely left_join
the locations
onto the original data before calculating the distance.
distances <- dt %>%
left_join(locations, by = "bird_ID") %>% # NEW
group_by(bird_ID) %>%
select(bird_ID, # suppress an auto-add message
longitude, latitude, date,
longitude.mean, latitude.mean) %>% # added so we have the data per-bird_ID
mutate(
Dist = geosphere::distHaversine(
cbind(longitude.mean, latitude.mean), # removed `location$`
cbind(longitude, latitude))
) %>%
tidyr::drop_na()
# # A tibble: 2 x 7
# # Groups: bird_ID [1]
# bird_ID longitude latitude date longitude.mean latitude.mean Dist
# <chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
# 1 350E 40.2 64.7 2022-05-02 52.1 68.5 674850.
# 2 350E 40.2 64.7 2022-05-02 52.1 68.5 674840.