rdifftime

Difftime returns 0 even there is a clear time difference


Difftime always returns 0 value even there is a difference in times.

Combined_data$ride_length <- difftime(Combined_data$ended_at, Combined_data$started_at)

/////////////////////////////////////////////////////////////////////////////////////////

str(Combined_data)
'data.frame':   4073561 obs. of  19 variables:
 $ ride_id           : chr  "8CD5DE2C2B6C4CFC" "9A191EB2C751D85D" "F37D14B0B5659BCF" "C41237B506E85FA1" ...
 $ rideable_type     : chr  "docked_bike" "docked_bike" "docked_bike" "docked_bike" ...
 $ started_at        : chr  "2020-06-13 23:24:48" "2020-06-26 07:26:10" "2020-06-23 17:12:41" "2020-06-20 01:09:35" ...
 $ ended_at          : chr  "2020-06-13 23:36:55" "2020-06-26 07:31:58" "2020-06-23 17:21:14" "2020-06-20 01:28:24" ...
 $ start_station_name: chr  "Wilton Ave & Belmont Ave" "Federal St & Polk St" "Daley Center Plaza" "Broadway & Cornelia Ave" ...
 $ start_station_id  : chr  "117" "41" "81" "303" ...
 $ end_station_name  : chr  "Damen Ave & Clybourn Ave" "Daley Center Plaza" "State St & Harrison St" "Broadway & Berwyn Ave" ...
 $ end_station_id    : chr  "163" "81" "5" "294" ...
 $ start_lat         : num  41.9 41.9 41.9 41.9 41.9 ...
 $ start_lng         : num  -87.7 -87.6 -87.6 -87.6 -87.7 ...
 $ end_lat           : num  41.9 41.9 41.9 42 41.9 ...
 $ end_lng           : num  -87.7 -87.6 -87.6 -87.7 -87.7 ...
 $ member_casual     : chr  "casual" "member" "member" "casual" ...
 $ date              : Date, format: "2020-06-13" "2020-06-26" "2020-06-23" "2020-06-20" ...
 $ month             : chr  "Jun" "Jun" "Jun" "Jun" ...
 $ year              : chr  "2020" "2020" "2020" "2020" ...
 $ day               : chr  "13" "26" "23" "20" ...
 $ day_of_week       : chr  "Saturday" "Friday" "Tuesday" "Saturday" ...
 $ ride_length       : 'difftime' num  0 0 0 0 ...

Solution

  • You can use as.POSIXct to convert character to POSIXct type and then use difftime.

    Combined_data$ended_at <- as.POSIXct(Combined_data$ended_at, tz = 'UTC')
    Combined_data$started_at <- as.POSIXct(Combined_data$started_at, tz = 'UTC')
    Combined_data$ride_length <- difftime(Combined_data$ended_at, Combined_data$started_at)
    

    If you need a tidyverse alternative -

    library(dplyr)
    library(lubridate)
    
    Combined_data <- Combined_data %>%
      mutate(across(c(started_at, ended_at), ymd_hms), 
             ride_length = difftime(ended_at,started_at))