I apologize if this is redundant, but I have tried to look for solutions, and have not found anything that appears to be the answer to my question. So, I have time series data for a bunch of variables. Some variables (continuous) were recorded at 1000hz, and the other variables (categorical) is at 500hz. I want to align them in R, such that the categorical variables have the same number of rows as the continuous variables. In other words, I would like to merge the two dataframes and fill in the gaps automatically for the categorical variables, so that my data, which looks like this:
t_emg | CF_01 | CF_02 |
---|---|---|
0 | -0.01796875 | 0.01234375 |
0.001 | 0.001875 | 0.05125 |
0.002 | 0.00828125 | -0.04140625 |
0.003 | -0.0125 | -0.005625 |
0.004 | -0.00765625 | 0.00078125 |
0.005 | -0.01 | 0.02234375 |
0.006 | 0.01515625 | -0.01296875 |
0.007 | -0.01375 | -0.070625 |
0.008 | 0.0096875 | 0.1534375 |
0.009 | -0.05203125 | 0.02984375 |
0.01 | 0.1234375 | 0.00765625 |
0.011 | -0.0596875 | 0.005625 |
0.012 | -0.09296875 | -0.001875 |
0.013 | -0.0834375 | -0.1128125 |
0.014 | -0.009375 | 0.0090625 |
0.015 | -0.00609375 | 0.02 |
0.016 | 0.00890625 | 0.0209375 |
0.017 | 0.02671875 | 0.00375 |
0.018 | -0.001875 | -0.0003125 |
0.019 | -0.00171875 | 0.00171875 |
and this:
t_kinematics | Phase | stride_stop |
---|---|---|
0 | swing | A |
0.002 | swing | A |
0.004 | swing | A |
0.006 | swing | A |
0.008 | swing | A |
0.01 | swing | A |
0.012 | swing | A |
0.014 | stance | A |
0.016 | stance | A |
0.018 | stance | A |
will end up looking like this:
t_emg | CF_01 | CF_02 | t_kinematics | Phase | stride_stop |
---|---|---|---|---|---|
0 | -0.01796875 | 0.01234375 | 0 | swing | A |
0.001 | 0.001875 | 0.05125 | 0.001 | swing | A |
0.002 | 0.00828125 | -0.04140625 | 0.002 | swing | A |
0.003 | -0.0125 | -0.005625 | 0.003 | swing | A |
0.004 | -0.00765625 | 0.00078125 | 0.004 | swing | A |
0.005 | -0.01 | 0.02234375 | 0.005 | swing | A |
0.006 | 0.01515625 | -0.01296875 | 0.006 | swing | A |
0.007 | -0.01375 | -0.070625 | 0.007 | swing | A |
0.008 | 0.0096875 | 0.1534375 | 0.008 | swing | A |
0.009 | -0.05203125 | 0.02984375 | 0.009 | swing | A |
0.01 | 0.1234375 | 0.00765625 | 0.01 | swing | A |
0.011 | -0.0596875 | 0.005625 | 0.011 | swing | A |
0.012 | -0.09296875 | -0.001875 | 0.012 | swing | A |
0.013 | -0.0834375 | -0.1128125 | 0.013 | swing | A |
0.014 | -0.009375 | 0.0090625 | 0.014 | stance | A |
0.015 | -0.00609375 | 0.02 | 0.015 | stance | A |
0.016 | 0.00890625 | 0.0209375 | 0.016 | stance | A |
0.017 | 0.02671875 | 0.00375 | 0.017 | stance | A |
0.018 | -0.001875 | -0.0003125 | 0.018 | stance | A |
0.019 | -0.00171875 | 0.00171875 | 0.019 | stance | A |
Additionally, there are many NAs in the categorical data, and I would like those to be retained. All the information that I can find is more geared towards imbalanced data or imputation of missing values. It is also not really prediction, as I am really just interested in filling in the temporal gaps for the categorical variables.
From your single-frame, we really have two 3-column frames.
quux1 <- structure(list(t_emg = c(0, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.011, 0.012, 0.013, 0.014, 0.015, 0.016, 0.017, 0.018, 0.019), CF_01 = c(-0.01796875, 0.001875, 0.00828125, -0.0125, -0.00765625, -0.01, 0.01515625, -0.01375, 0.0096875, -0.05203125, 0.1234375, -0.0596875, -0.09296875, -0.0834375, -0.009375, -0.00609375, 0.00890625, 0.02671875, -0.001875, -0.00171875), CF_02 = c(0.01234375, 0.05125, -0.04140625, -0.005625, 0.00078125, 0.02234375, -0.01296875, -0.070625, 0.1534375, 0.02984375, 0.00765625, 0.005625, -0.001875, -0.1128125, 0.0090625, 0.02, 0.0209375, 0.00375, -0.0003125, 0.00171875)), class = "data.frame", row.names = c(NA, -20L))
quux2 <- structure(list(t_kinematics = c(0, 0.002, 0.004, 0.006, 0.008, 0.01, 0.012, 0.014, 0.016, 0.018), Phase = c("swing", "swing", "swing", "swing", "swing", "swing", "swing", "stance", "stance", "stance"), stride_stop = c("A", "A", "A", "A", "A", "A", "A", "A", "A", "A")), row.names = c(NA, 10L), class = "data.frame", na.action = structure(11:20, names = c("11", "12", "13", "14", "15", "16", "17", "18", "19", "20"), class = "omit"))
Using this, we'll use findInterval
to map the other columns to the time interval.
intvl <- findInterval(quux1$t_emg, quux2$t_kinematics)
out <- cbind(quux1, lapply(quux2[,-1], `[`, intvl))
out
# t_emg CF_01 CF_02 Phase stride_stop
# 1 0.000 -0.01796875 0.01234375 swing A
# 2 0.001 0.00187500 0.05125000 swing A
# 3 0.002 0.00828125 -0.04140625 swing A
# 4 0.003 -0.01250000 -0.00562500 swing A
# 5 0.004 -0.00765625 0.00078125 swing A
# 6 0.005 -0.01000000 0.02234375 swing A
# 7 0.006 0.01515625 -0.01296875 swing A
# 8 0.007 -0.01375000 -0.07062500 swing A
# 9 0.008 0.00968750 0.15343750 swing A
# 10 0.009 -0.05203125 0.02984375 swing A
# 11 0.010 0.12343750 0.00765625 swing A
# 12 0.011 -0.05968750 0.00562500 swing A
# 13 0.012 -0.09296875 -0.00187500 swing A
# 14 0.013 -0.08343750 -0.11281250 swing A
# 15 0.014 -0.00937500 0.00906250 stance A
# 16 0.015 -0.00609375 0.02000000 stance A
# 17 0.016 0.00890625 0.02093750 stance A
# 18 0.017 0.02671875 0.00375000 stance A
# 19 0.018 -0.00187500 -0.00031250 stance A
# 20 0.019 -0.00171875 0.00171875 stance A
If you really need t_kinematics
, then just copy it over from t_emg
:
out$t_kinematics <- out$t_emg
We could also do a range-join:
library(dplyr)
# library(tidyr) # fill
quux1 |>
mutate(t_emg_next = lead(t_emg, default = Inf) - 1e-9) |>
left_join(quux2, join_by(between(y$t_kinematics, x$t_emg, x$t_emg_next))) |>
tidyr::fill(t_kinematics, Phase, stride_stop) |>
select(-t_emg_next)
# t_emg CF_01 CF_02 t_kinematics Phase stride_stop
# 1 0.000 -0.01796875 0.01234375 0.000 swing A
# 2 0.001 0.00187500 0.05125000 0.000 swing A
# 3 0.002 0.00828125 -0.04140625 0.002 swing A
# 4 0.003 -0.01250000 -0.00562500 0.002 swing A
# 5 0.004 -0.00765625 0.00078125 0.004 swing A
# 6 0.005 -0.01000000 0.02234375 0.004 swing A
# 7 0.006 0.01515625 -0.01296875 0.006 swing A
# 8 0.007 -0.01375000 -0.07062500 0.006 swing A
# 9 0.008 0.00968750 0.15343750 0.008 swing A
# 10 0.009 -0.05203125 0.02984375 0.008 swing A
# 11 0.010 0.12343750 0.00765625 0.010 swing A
# 12 0.011 -0.05968750 0.00562500 0.010 swing A
# 13 0.012 -0.09296875 -0.00187500 0.012 swing A
# 14 0.013 -0.08343750 -0.11281250 0.012 swing A
# 15 0.014 -0.00937500 0.00906250 0.014 stance A
# 16 0.015 -0.00609375 0.02000000 0.014 stance A
# 17 0.016 0.00890625 0.02093750 0.016 stance A
# 18 0.017 0.02671875 0.00375000 0.016 stance A
# 19 0.018 -0.00187500 -0.00031250 0.018 stance A
# 20 0.019 -0.00171875 0.00171875 0.018 stance A
The 1e-9
is to force the between
comparison to be right-open; may not be required with your data, I wanted to be certain.