rdatetimedplyrlubridate

Converting datetime format from chr to datetime in r


I have downloaded data set from an agency website and the datetime column (labelled 'date' in the df) for two of the files is not converting from chr to datetime. For the other files the date time is in numeric format (example "01/09/2023 00:01:00") and not '01-oct-2023 00:01:00' and they convert using dmy_hm. It may be an issue with the month abbreviation as oct instead of Oct but I'm really not sure. I've tried various methods but none are working for me and I'd be grateful for any help.


library(datapasta)

dpasta(subset_Rain_10.23)


tibble::tribble(
                  ~stno, ~year, ~month, ~day, ~hour, ~minute,                  ~date, ~ispeed, ~speed, ~idir, ~dir, ~imaxgust, ~maxgust, ~idirmgust, ~dirmgust, ~itimemgust, ~timemgust, ~iminspeed, ~minspeed, ~ispeedstdev, ~speedstdev, ~idrybulb, ~drybulb, ~igrasstemp, ~grasstemp, ~it5cm, ~t5cm, ~it10cm, ~t10cm, ~it20cm, ~t20cm, ~it30cm, ~t30cm, ~it50cm, ~t50cm, ~it100cm, ~t100cm, ~irelhum, ~relhum, ~icbl,     ~cbl, ~isoltot, ~soltot, ~irain, ~rain,
                   1275,  2023,     10,    1,     0,       0, "01-oct-2023 00:00:00",       4,     NA,     4,   NA,         4,       NA,          4,        NA,           4,         NA,          4,        NA,            4,          NA,         0,    15.19,           0,      14.25,      0, 13.74,       0,  13.52,       0,  13.24,       0,  13.19,       0,  13.29,        0,   14.17,        0,    91.5,     0,  1006.34,        0,       0,      0,     0,
                   1275,  2023,     10,    1,     0,       1, "01-oct-2023 00:01:00",       4,     NA,     4,   NA,         4,       NA,          4,        NA,           4,         NA,          4,        NA,            4,          NA,         0,    15.19,           0,      14.25,      0, 13.75,       0,  13.51,       0,  13.24,       0,   13.2,       0,  13.29,        0,   14.17,        0,      92,     0, 1006.335,        0,       0,      0,     0,
                   1275,  2023,     10,    1,     0,       2, "01-oct-2023 00:02:00",       4,     NA,     4,   NA,         4,       NA,          4,        NA,           4,         NA,          4,        NA,            4,          NA,         0,    15.19,           0,      14.24,      0, 13.75,       0,  13.51,       0,  13.24,       0,  13.19,       0,  13.29,        0,   14.17,        0,    91.7,     0,  1006.32,        0,       0,      0,     0,
                   1275,  2023,     10,    1,     0,       3, "01-oct-2023 00:03:00",       4,     NA,     4,   NA,         4,       NA,          4,        NA,           4,         NA,          4,        NA,            4,          NA,         0,    15.19,           0,      14.23,      0, 13.75,       0,  13.52,       0,  13.24,       0,   13.2,       0,  13.29,        0,   14.17,        0,    91.6,     0,  1006.33,        0,       0,      0,     0,
                   1275,  2023,     10,    1,     0,       4, "01-oct-2023 00:04:00",       4,     NA,     4,   NA,         4,       NA,          4,        NA,           4,         NA,          4,        NA,            4,          NA,         0,    15.19,           0,      14.23,      0, 13.75,       0,  13.51,       0,  13.25,       0,   13.2,       0,  13.29,        0,   14.17,        0,    91.7,     0, 1006.328,        0,       0,      0,     0,
                   1275,  2023,     10,    1,     0,       5, "01-oct-2023 00:05:00",       4,     NA,     4,   NA,         4,       NA,          4,        NA,           4,         NA,          4,        NA,            4,          NA,         0,     15.2,           0,      14.21,      0, 13.75,       0,  13.52,       0,  13.25,       0,   13.2,       0,  13.29,        0,   14.16,        0,    91.6,     0, 1006.315,        0,       0,      0,     0
                  )

str(subset_Rain_10.23)  # date is chr

# Convert from chr to datetime
library(lubridate)
library(dplyr)
library(parsedate)


df1<-subset_Rain_10.23$date<-as.POSIXct(subset_Rain_10.23$date, format="%d%b%Y:%H:%M:%S")  # returns format NA
str(df1)


df2<-subset_Rain_10.23%>%mutate(subset_Rain_10.23=lubridate::dmy_hms(subset_Rain_10.23)) # returns error in mutate, must be size 6 or 1, not 45

df3<-as.POSIXct(strptime(subset_Rain_10.23$date, format=%d%b%Y %H:%M:%S, tz= "UTC")) # error unexpected special

df4<- parse_date_time(subset_Rain_10.23, "01-oct-2023 00:00:00")  # unknown formats supplied: oct

df5<-parse_date_time(subset_Rain_10.23, "01-oct-2023 00:00:00", %d%b%Y %H:%M:%S, tz= "UTC")  # error unexpected special


                                                   ^

Solution

  • It is important to ensure that your format string matches precisely the format of the column. You had omitted the dashes. You need:

    as.POSIXct(subset_Rain_10.23$date, format="%d-%b-%Y %H:%M:%S")
    # [1] "2023-10-01 00:00:00 BST" "2023-10-01 00:01:00 BST"
    # [3] "2023-10-01 00:02:00 BST" "2023-10-01 00:03:00 BST"
    # [5] "2023-10-01 00:04:00 BST" "2023-10-01 00:05:00 BST"
    

    The data you have posted only includes one format of date specified in your question. Let's use both:

    x  <- c(
        "01-oct-2023 00:00:00",
        "01/10/2023 00:01:00"
    )
    

    As you're using lubridate you can explicitly specify the format with:

    lubridate::parse_date_time(x, orders = c(
        "%d-%b-%Y %H:%M:%S",
        "%d/%m/%Y %H:%M:%S"
    ))
    # [1] "2023-10-01 00:00:00 UTC" "2023-10-01 00:01:00 UTC"
    

    Alternatively, in base R:

    ifelse(
        is.na(fmt1 <- as.POSIXct(x, format = "%d-%b-%Y %H:%M:%S")),
        as.POSIXct(x, format = "%d/%m/%Y %H:%M:%S"),
        fmt1
    ) |> as.POSIXct()
    # [1] "2023-10-01 00:00:00 BST" "2023-10-01 00:01:00 BST"
    

    Note that as we haven't specified a time zone and your data does not have one, as.POSIXct() assumes the local time zone, which in my case is BST. The lubridate approach assumes UTC. If the time zone of your locale is not the same as where your data was collected, you should specify the time zone.