I have a rather large data.table and I would like to create a new column that searches for different rows of the same data.table based on a filter defined in the current row.
See example below:
library(data.table)
dt<-data.table(a=seq.Date(as.Date("2023-11-01"), as.Date("2023-11-30"), by="day"),
b=c(1:27, 46, 34, 101))
dt$a_m1<-c(seq.Date(as.Date("2023-11-16"), as.Date("2023-11-30"), by="day"), seq.Date(as.Date("2023-11-01"), as.Date("2023-11-15"), by="day"))
a b a_m1
1: 2023-11-01 1 2023-11-16
2: 2023-11-02 2 2023-11-17
3: 2023-11-03 3 2023-11-18
4: 2023-11-04 4 2023-11-19
5: 2023-11-05 5 2023-11-20
6: 2023-11-06 6 2023-11-21
7: 2023-11-07 7 2023-11-22
8: 2023-11-08 8 2023-11-23
9: 2023-11-09 9 2023-11-24
10: 2023-11-10 10 2023-11-25
11: 2023-11-11 11 2023-11-26
12: 2023-11-12 12 2023-11-27
13: 2023-11-13 13 2023-11-28
14: 2023-11-14 14 2023-11-29
15: 2023-11-15 15 2023-11-30
16: 2023-11-16 16 2023-11-01
17: 2023-11-17 17 2023-11-02
18: 2023-11-18 18 2023-11-03
19: 2023-11-19 19 2023-11-04
20: 2023-11-20 20 2023-11-05
21: 2023-11-21 21 2023-11-06
22: 2023-11-22 22 2023-11-07
23: 2023-11-23 23 2023-11-08
24: 2023-11-24 24 2023-11-09
25: 2023-11-25 25 2023-11-10
26: 2023-11-26 26 2023-11-11
27: 2023-11-27 27 2023-11-12
28: 2023-11-28 46 2023-11-13
29: 2023-11-29 34 2023-11-14
30: 2023-11-30 101 2023-11-15
The desired output would be:
a b a_m1 b_m1
1: 2023-11-01 1 2023-11-16 16
2: 2023-11-02 2 2023-11-17 17
3: 2023-11-03 3 2023-11-18 18
4: 2023-11-04 4 2023-11-19 19
5: 2023-11-05 5 2023-11-20 20
6: 2023-11-06 6 2023-11-21 21
7: 2023-11-07 7 2023-11-22 22
8: 2023-11-08 8 2023-11-23 23
9: 2023-11-09 9 2023-11-24 24
10: 2023-11-10 10 2023-11-25 25
11: 2023-11-11 11 2023-11-26 26
12: 2023-11-12 12 2023-11-27 27
13: 2023-11-13 13 2023-11-28 46
14: 2023-11-14 14 2023-11-29 34
15: 2023-11-15 15 2023-11-30 101
16: 2023-11-16 16 2023-11-01 1
17: 2023-11-17 17 2023-11-02 2
18: 2023-11-18 18 2023-11-03 3
19: 2023-11-19 19 2023-11-04 4
20: 2023-11-20 20 2023-11-05 5
21: 2023-11-21 21 2023-11-06 6
22: 2023-11-22 22 2023-11-07 7
23: 2023-11-23 23 2023-11-08 8
24: 2023-11-24 24 2023-11-09 9
25: 2023-11-25 25 2023-11-10 10
26: 2023-11-26 26 2023-11-11 11
27: 2023-11-27 27 2023-11-12 12
28: 2023-11-28 46 2023-11-13 13
29: 2023-11-29 34 2023-11-14 14
30: 2023-11-30 101 2023-11-15 15
Help with this would be much appreciated. I have tried code like the following:
dt[,b_m1:=dt[a_m1==dt$a]$b,]
But to no avail.
I think you want a self-join assignment.
dt[dt, b_m1 := i.b, on = .(a == a_m1)]
# a b a_m1 b_m1
# <Date> <num> <Date> <num>
# 1: 2023-11-01 1 2023-11-16 16
# 2: 2023-11-02 2 2023-11-17 17
# 3: 2023-11-03 3 2023-11-18 18
# 4: 2023-11-04 4 2023-11-19 19
# 5: 2023-11-05 5 2023-11-20 20
# 6: 2023-11-06 6 2023-11-21 21
# 7: 2023-11-07 7 2023-11-22 22
# 8: 2023-11-08 8 2023-11-23 23
# 9: 2023-11-09 9 2023-11-24 24
# 10: 2023-11-10 10 2023-11-25 25
# ---
# 21: 2023-11-21 21 2023-11-06 6
# 22: 2023-11-22 22 2023-11-07 7
# 23: 2023-11-23 23 2023-11-08 8
# 24: 2023-11-24 24 2023-11-09 9
# 25: 2023-11-25 25 2023-11-10 10
# 26: 2023-11-26 26 2023-11-11 11
# 27: 2023-11-27 27 2023-11-12 12
# 28: 2023-11-28 46 2023-11-13 13
# 29: 2023-11-29 34 2023-11-14 14
# 30: 2023-11-30 101 2023-11-15 15
all.equal(dt, desired)
# [1] TRUE
In general this is a merge/join, and a good data.table
-specific Q/A here on StackOverflow is Left join using data.table. For the concept in more general terms, see
Data:
# calculated from the question
dt <- data.table::as.data.table(structure(list(a = structure(c(19662, 19663, 19664, 19665, 19666, 19667, 19668, 19669, 19670, 19671, 19672, 19673, 19674, 19675, 19676, 19677, 19678, 19679, 19680, 19681, 19682, 19683, 19684, 19685, 19686, 19687, 19688, 19689, 19690, 19691), class = "Date"), b = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 46, 34, 101), a_m1 = structure(c(19677, 19678, 19679, 19680, 19681, 19682, 19683, 19684, 19685, 19686, 19687, 19688, 19689, 19690, 19691, 19662, 19663, 19664, 19665, 19666, 19667, 19668, 19669, 19670, 19671, 19672, 19673, 19674, 19675, 19676), class = "Date"), b_m1 = c(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 46, 34, 101, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)), row.names = c(NA, -30L), class = c("data.table", "data.frame")))
# copied and Date-ified from the question text
desired <- data.table::as.data.table(structure(list(a = structure(c(19662, 19663, 19664, 19665, 19666, 19667, 19668, 19669, 19670, 19671, 19672, 19673, 19674, 19675, 19676, 19677, 19678, 19679, 19680, 19681, 19682, 19683, 19684, 19685, 19686, 19687, 19688, 19689, 19690, 19691), class = "Date"), b = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 46L, 34L, 101L), a_m1 = structure(c(19677, 19678, 19679, 19680, 19681, 19682, 19683, 19684, 19685, 19686, 19687, 19688, 19689, 19690, 19691, 19662, 19663, 19664, 19665, 19666, 19667, 19668, 19669, 19670, 19671, 19672, 19673, 19674, 19675, 19676), class = "Date"), b_m1 = c(16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 46L, 34L, 101L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L)), row.names = c(NA, -30L), class = c("data.table", "data.frame")))