rgeospherestplanr

How to calculate trajectory length in R


I am trying to find the length between start and finish locations. So, which packages and function I should apply.

This is what the sample looks like: 

# A tibble: 10 x 5
   trip_id start_location     end_location       start_time end_time
     <int> <chr>              <chr>              <chr>      <chr>   
 1       1 13.6753,100.63453  13.65828,100.71631 00:05:24   00:41:14
 2       2 13.66348,100.71868 13.65258,100.71571 03:49:54   03:57:52
 3       3 13.63345,100.71102 13.63349,100.71096 04:14:52   04:53:52
 4       4 13.59653,100.70172 13.63433,100.71101 05:01:52   05:36:52
 5       5 13.57542,100.79453 13.59612,100.74922 05:57:11   06:15:52
 6       6 13.60123,100.71091 13.63241,100.71297 06:21:52   06:33:52
 7       7 13.60388,100.70617 13.60567,100.71292 06:43:32   06:58:52
 8   43456 13.94582,100.735   13.95905,100.62037 19:28:51   20:28:30
 9   43457 14.01229,100.66908 13.98712,100.61631 20:58:30   21:23:30
10   43458 13.79245,100.70217 13.90366,100.66788 22:09:30   22:40:30
my.df <- structure(list(trip_id = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 43456L,43457L, 43458L), start_location = c("13.6753,100.63453", "13.66348,100.71868","13.63345,100.71102", "13.59653,100.70172", "13.57542,100.79453","13.60123,100.71091", "13.60388,100.70617", "13.94582,100.735","14.01229,100.66908", "13.79245,100.70217"), end_location = c("13.65828,100.71631","13.65258,100.71571", "13.63349,100.71096", "13.63433,100.71101","13.59612,100.74922", "13.63241,100.71297", "13.60567,100.71292","13.95905,100.62037", "13.98712,100.61631", "13.90366,100.66788"), start_time = c("00:05:24", "03:49:54", "04:14:52", "05:01:52","05:57:11", "06:21:52", "06:43:32", "19:28:51", "20:58:30", "22:09:30"), end_time = c("00:41:14", "03:57:52", "04:53:52", "05:36:52","06:15:52", "06:33:52", "06:58:52", "20:28:30", "21:23:30", "22:40:30")), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame")) 

As my question is how can I calculate the length between start_location and end_location columns which are lat-long coordinates.

And my expected output is;

   trip_id start_location     end_location       start_time end_time   length (in meters) 
 1       1 13.6753,100.63453  13.65828,100.71631 00:05:24   00:41:14    120 
 2       2 13.66348,100.71868 13.65258,100.71571 03:49:54   03:57:52    500
 3       3 13.63345,100.71102 13.63349,100.71096 04:14:52   04:53:52    480
 4       4 13.59653,100.70172 13.63433,100.71101 05:01:52   05:36:52    7000
 5       5 13.57542,100.79453 13.59612,100.74922 05:57:11   06:15:52    1563
 6       6 13.60123,100.71091 13.63241,100.71297 06:21:52   06:33:52    7892
 7       7 13.60388,100.70617 13.60567,100.71292 06:43:32   06:58:52    200
 8   43456 13.94582,100.735   13.95905,100.62037 19:28:51   20:28:30    5863
 9   43457 14.01229,100.66908 13.98712,100.61631 20:58:30   21:23:30    1478
10   43458 13.79245,100.70217 13.90366,100.66788 22:09:30   22:40:30    2348

So, any packages can be used?

Thank you in advance.


Solution

  • Do this. Actually your coordinate vectors should be reversed before calculating geographical distance, because geosphere::disGeo() takes vector as longitude and latitude order. I have modified the two coordinates columns as list cols for sake of simplicity.

    library(tidyverse)
    library(geosphere)
    my.df %>% mutate(across(ends_with("location"), 
                            ~map(str_split(.x, ","), 
                                 ~ c(as.numeric(.x[2]), as.numeric(.x[1]))))) %>%
      mutate(length = map2_dbl(start_location, end_location, ~distGeo(.x, .y)))
    
    # A tibble: 10 x 6
       trip_id start_location end_location start_time end_time      len
         <int> <list>         <list>       <chr>      <chr>       <dbl>
     1       1 <dbl [2]>      <dbl [2]>    00:05:24   00:41:14  9046.  
     2       2 <dbl [2]>      <dbl [2]>    03:49:54   03:57:52  1248.  
     3       3 <dbl [2]>      <dbl [2]>    04:14:52   04:53:52     7.86
     4       4 <dbl [2]>      <dbl [2]>    05:01:52   05:36:52  4301.  
     5       5 <dbl [2]>      <dbl [2]>    05:57:11   06:15:52  5412.  
     6       6 <dbl [2]>      <dbl [2]>    06:21:52   06:33:52  3457.  
     7       7 <dbl [2]>      <dbl [2]>    06:43:32   06:58:52   757.  
     8   43456 <dbl [2]>      <dbl [2]>    19:28:51   20:28:30 12473.  
     9   43457 <dbl [2]>      <dbl [2]>    20:58:30   21:23:30  6345.  
    10   43458 <dbl [2]>      <dbl [2]>    22:09:30   22:40:30 12850.
    

    As per comments below, the following syntax will not coerce original lat/long columns to change

    my.df %>% mutate(across(ends_with("location"), 
                            ~map(str_split(.x, ","), 
                                 ~ c(as.numeric(.x[2]), as.numeric(.x[1]))),
                            .names = "{.col}_1")) %>%
      mutate(length = map2_dbl(start_location_1, end_location_1, ~distGeo(.x, .y))) %>%
      select(!ends_with("_1"))
    

    Or alternatively

    my.df %>% mutate(across(ends_with("location"), 
                            ~map(str_split(.x, ","), 
                                 ~ c(Long = as.numeric(.x[2]), Lat = as.numeric(.x[1]))))) %>%
      mutate(length = map2_dbl(start_location, end_location, ~distGeo(.x, .y))) %>%
      unnest_wider(start_location, names_sep = "_") %>%
      unnest_wider(end_location, names_sep = "_")
    
    # A tibble: 10 x 8
       trip_id start_location_Long start_location_Lat end_location_Long end_location_Lat start_time end_time   length
         <int>               <dbl>              <dbl>             <dbl>            <dbl> <chr>      <chr>       <dbl>
     1       1                101.               13.7              101.             13.7 00:05:24   00:41:14  9046.  
     2       2                101.               13.7              101.             13.7 03:49:54   03:57:52  1248.  
     3       3                101.               13.6              101.             13.6 04:14:52   04:53:52     7.86
     4       4                101.               13.6              101.             13.6 05:01:52   05:36:52  4301.  
     5       5                101.               13.6              101.             13.6 05:57:11   06:15:52  5412.  
     6       6                101.               13.6              101.             13.6 06:21:52   06:33:52  3457.  
     7       7                101.               13.6              101.             13.6 06:43:32   06:58:52   757.  
     8   43456                101.               13.9              101.             14.0 19:28:51   20:28:30 12473.  
     9   43457                101.               14.0              101.             14.0 20:58:30   21:23:30  6345.  
    10   43458                101.               13.8              101.             13.9 22:09:30   22:40:30 12850. 
    

    You may also convert the list col back to character type as desired

    my.df %>% mutate(across(ends_with("location"), 
                            ~map(str_split(.x, ","), 
                                 ~ c(Long = as.numeric(.x[2]), Lat = as.numeric(.x[1]))))) %>%
      mutate(length = map2_dbl(start_location, end_location, ~distGeo(.x, .y))) %>%
      mutate(across(ends_with("location"), 
                    ~map_chr(.x, ~ paste(.x[1], .x[2], sep = ", "))))
    
    # A tibble: 10 x 6
       trip_id start_location      end_location        start_time end_time   length
         <int> <chr>               <chr>               <chr>      <chr>       <dbl>
     1       1 100.63453, 13.6753  100.71631, 13.65828 00:05:24   00:41:14  9046.  
     2       2 100.71868, 13.66348 100.71571, 13.65258 03:49:54   03:57:52  1248.  
     3       3 100.71102, 13.63345 100.71096, 13.63349 04:14:52   04:53:52     7.86
     4       4 100.70172, 13.59653 100.71101, 13.63433 05:01:52   05:36:52  4301.  
     5       5 100.79453, 13.57542 100.74922, 13.59612 05:57:11   06:15:52  5412.  
     6       6 100.71091, 13.60123 100.71297, 13.63241 06:21:52   06:33:52  3457.  
     7       7 100.70617, 13.60388 100.71292, 13.60567 06:43:32   06:58:52   757.  
     8   43456 100.735, 13.94582   100.62037, 13.95905 19:28:51   20:28:30 12473.  
     9   43457 100.66908, 14.01229 100.61631, 13.98712 20:58:30   21:23:30  6345.  
    10   43458 100.70217, 13.79245 100.66788, 13.90366 22:09:30   22:40:30 12850.