I am trying to find the length between start and finish locations. So, which packages and function I should apply.
This is what the sample looks like:
# A tibble: 10 x 5
trip_id start_location end_location start_time end_time
<int> <chr> <chr> <chr> <chr>
1 1 13.6753,100.63453 13.65828,100.71631 00:05:24 00:41:14
2 2 13.66348,100.71868 13.65258,100.71571 03:49:54 03:57:52
3 3 13.63345,100.71102 13.63349,100.71096 04:14:52 04:53:52
4 4 13.59653,100.70172 13.63433,100.71101 05:01:52 05:36:52
5 5 13.57542,100.79453 13.59612,100.74922 05:57:11 06:15:52
6 6 13.60123,100.71091 13.63241,100.71297 06:21:52 06:33:52
7 7 13.60388,100.70617 13.60567,100.71292 06:43:32 06:58:52
8 43456 13.94582,100.735 13.95905,100.62037 19:28:51 20:28:30
9 43457 14.01229,100.66908 13.98712,100.61631 20:58:30 21:23:30
10 43458 13.79245,100.70217 13.90366,100.66788 22:09:30 22:40:30
my.df <- structure(list(trip_id = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 43456L,43457L, 43458L), start_location = c("13.6753,100.63453", "13.66348,100.71868","13.63345,100.71102", "13.59653,100.70172", "13.57542,100.79453","13.60123,100.71091", "13.60388,100.70617", "13.94582,100.735","14.01229,100.66908", "13.79245,100.70217"), end_location = c("13.65828,100.71631","13.65258,100.71571", "13.63349,100.71096", "13.63433,100.71101","13.59612,100.74922", "13.63241,100.71297", "13.60567,100.71292","13.95905,100.62037", "13.98712,100.61631", "13.90366,100.66788"), start_time = c("00:05:24", "03:49:54", "04:14:52", "05:01:52","05:57:11", "06:21:52", "06:43:32", "19:28:51", "20:58:30", "22:09:30"), end_time = c("00:41:14", "03:57:52", "04:53:52", "05:36:52","06:15:52", "06:33:52", "06:58:52", "20:28:30", "21:23:30", "22:40:30")), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"))
As my question is how can I calculate the length between start_location and end_location columns which are lat-long coordinates.
And my expected output is;
trip_id start_location end_location start_time end_time length (in meters)
1 1 13.6753,100.63453 13.65828,100.71631 00:05:24 00:41:14 120
2 2 13.66348,100.71868 13.65258,100.71571 03:49:54 03:57:52 500
3 3 13.63345,100.71102 13.63349,100.71096 04:14:52 04:53:52 480
4 4 13.59653,100.70172 13.63433,100.71101 05:01:52 05:36:52 7000
5 5 13.57542,100.79453 13.59612,100.74922 05:57:11 06:15:52 1563
6 6 13.60123,100.71091 13.63241,100.71297 06:21:52 06:33:52 7892
7 7 13.60388,100.70617 13.60567,100.71292 06:43:32 06:58:52 200
8 43456 13.94582,100.735 13.95905,100.62037 19:28:51 20:28:30 5863
9 43457 14.01229,100.66908 13.98712,100.61631 20:58:30 21:23:30 1478
10 43458 13.79245,100.70217 13.90366,100.66788 22:09:30 22:40:30 2348
So, any packages can be used?
Thank you in advance.
Do this. Actually your coordinate vectors should be reversed before calculating geographical distance, because geosphere::disGeo()
takes vector as longitude and latitude order. I have modified the two coordinates columns as list cols for sake of simplicity.
library(tidyverse)
library(geosphere)
my.df %>% mutate(across(ends_with("location"),
~map(str_split(.x, ","),
~ c(as.numeric(.x[2]), as.numeric(.x[1]))))) %>%
mutate(length = map2_dbl(start_location, end_location, ~distGeo(.x, .y)))
# A tibble: 10 x 6
trip_id start_location end_location start_time end_time len
<int> <list> <list> <chr> <chr> <dbl>
1 1 <dbl [2]> <dbl [2]> 00:05:24 00:41:14 9046.
2 2 <dbl [2]> <dbl [2]> 03:49:54 03:57:52 1248.
3 3 <dbl [2]> <dbl [2]> 04:14:52 04:53:52 7.86
4 4 <dbl [2]> <dbl [2]> 05:01:52 05:36:52 4301.
5 5 <dbl [2]> <dbl [2]> 05:57:11 06:15:52 5412.
6 6 <dbl [2]> <dbl [2]> 06:21:52 06:33:52 3457.
7 7 <dbl [2]> <dbl [2]> 06:43:32 06:58:52 757.
8 43456 <dbl [2]> <dbl [2]> 19:28:51 20:28:30 12473.
9 43457 <dbl [2]> <dbl [2]> 20:58:30 21:23:30 6345.
10 43458 <dbl [2]> <dbl [2]> 22:09:30 22:40:30 12850.
As per comments below, the following syntax will not coerce original lat/long columns to change
my.df %>% mutate(across(ends_with("location"),
~map(str_split(.x, ","),
~ c(as.numeric(.x[2]), as.numeric(.x[1]))),
.names = "{.col}_1")) %>%
mutate(length = map2_dbl(start_location_1, end_location_1, ~distGeo(.x, .y))) %>%
select(!ends_with("_1"))
Or alternatively
my.df %>% mutate(across(ends_with("location"),
~map(str_split(.x, ","),
~ c(Long = as.numeric(.x[2]), Lat = as.numeric(.x[1]))))) %>%
mutate(length = map2_dbl(start_location, end_location, ~distGeo(.x, .y))) %>%
unnest_wider(start_location, names_sep = "_") %>%
unnest_wider(end_location, names_sep = "_")
# A tibble: 10 x 8
trip_id start_location_Long start_location_Lat end_location_Long end_location_Lat start_time end_time length
<int> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
1 1 101. 13.7 101. 13.7 00:05:24 00:41:14 9046.
2 2 101. 13.7 101. 13.7 03:49:54 03:57:52 1248.
3 3 101. 13.6 101. 13.6 04:14:52 04:53:52 7.86
4 4 101. 13.6 101. 13.6 05:01:52 05:36:52 4301.
5 5 101. 13.6 101. 13.6 05:57:11 06:15:52 5412.
6 6 101. 13.6 101. 13.6 06:21:52 06:33:52 3457.
7 7 101. 13.6 101. 13.6 06:43:32 06:58:52 757.
8 43456 101. 13.9 101. 14.0 19:28:51 20:28:30 12473.
9 43457 101. 14.0 101. 14.0 20:58:30 21:23:30 6345.
10 43458 101. 13.8 101. 13.9 22:09:30 22:40:30 12850.
You may also convert the list col back to character type as desired
my.df %>% mutate(across(ends_with("location"),
~map(str_split(.x, ","),
~ c(Long = as.numeric(.x[2]), Lat = as.numeric(.x[1]))))) %>%
mutate(length = map2_dbl(start_location, end_location, ~distGeo(.x, .y))) %>%
mutate(across(ends_with("location"),
~map_chr(.x, ~ paste(.x[1], .x[2], sep = ", "))))
# A tibble: 10 x 6
trip_id start_location end_location start_time end_time length
<int> <chr> <chr> <chr> <chr> <dbl>
1 1 100.63453, 13.6753 100.71631, 13.65828 00:05:24 00:41:14 9046.
2 2 100.71868, 13.66348 100.71571, 13.65258 03:49:54 03:57:52 1248.
3 3 100.71102, 13.63345 100.71096, 13.63349 04:14:52 04:53:52 7.86
4 4 100.70172, 13.59653 100.71101, 13.63433 05:01:52 05:36:52 4301.
5 5 100.79453, 13.57542 100.74922, 13.59612 05:57:11 06:15:52 5412.
6 6 100.71091, 13.60123 100.71297, 13.63241 06:21:52 06:33:52 3457.
7 7 100.70617, 13.60388 100.71292, 13.60567 06:43:32 06:58:52 757.
8 43456 100.735, 13.94582 100.62037, 13.95905 19:28:51 20:28:30 12473.
9 43457 100.66908, 14.01229 100.61631, 13.98712 20:58:30 21:23:30 6345.
10 43458 100.70217, 13.79245 100.66788, 13.90366 22:09:30 22:40:30 12850.