Difftime always returns 0 value even there is a difference in times.
Combined_data$ride_length <- difftime(Combined_data$ended_at, Combined_data$started_at)
/////////////////////////////////////////////////////////////////////////////////////////
str(Combined_data)
'data.frame': 4073561 obs. of 19 variables:
$ ride_id : chr "8CD5DE2C2B6C4CFC" "9A191EB2C751D85D" "F37D14B0B5659BCF" "C41237B506E85FA1" ...
$ rideable_type : chr "docked_bike" "docked_bike" "docked_bike" "docked_bike" ...
$ started_at : chr "2020-06-13 23:24:48" "2020-06-26 07:26:10" "2020-06-23 17:12:41" "2020-06-20 01:09:35" ...
$ ended_at : chr "2020-06-13 23:36:55" "2020-06-26 07:31:58" "2020-06-23 17:21:14" "2020-06-20 01:28:24" ...
$ start_station_name: chr "Wilton Ave & Belmont Ave" "Federal St & Polk St" "Daley Center Plaza" "Broadway & Cornelia Ave" ...
$ start_station_id : chr "117" "41" "81" "303" ...
$ end_station_name : chr "Damen Ave & Clybourn Ave" "Daley Center Plaza" "State St & Harrison St" "Broadway & Berwyn Ave" ...
$ end_station_id : chr "163" "81" "5" "294" ...
$ start_lat : num 41.9 41.9 41.9 41.9 41.9 ...
$ start_lng : num -87.7 -87.6 -87.6 -87.6 -87.7 ...
$ end_lat : num 41.9 41.9 41.9 42 41.9 ...
$ end_lng : num -87.7 -87.6 -87.6 -87.7 -87.7 ...
$ member_casual : chr "casual" "member" "member" "casual" ...
$ date : Date, format: "2020-06-13" "2020-06-26" "2020-06-23" "2020-06-20" ...
$ month : chr "Jun" "Jun" "Jun" "Jun" ...
$ year : chr "2020" "2020" "2020" "2020" ...
$ day : chr "13" "26" "23" "20" ...
$ day_of_week : chr "Saturday" "Friday" "Tuesday" "Saturday" ...
$ ride_length : 'difftime' num 0 0 0 0 ...
You can use as.POSIXct
to convert character to POSIXct type and then use difftime
.
Combined_data$ended_at <- as.POSIXct(Combined_data$ended_at, tz = 'UTC')
Combined_data$started_at <- as.POSIXct(Combined_data$started_at, tz = 'UTC')
Combined_data$ride_length <- difftime(Combined_data$ended_at, Combined_data$started_at)
If you need a tidyverse
alternative -
library(dplyr)
library(lubridate)
Combined_data <- Combined_data %>%
mutate(across(c(started_at, ended_at), ymd_hms),
ride_length = difftime(ended_at,started_at))