I have a strage case of grouping query.
I need to write a sql server query that groups rows based on the difference between datetime column value from a previous row.
That's the query:
select
[id],
[timestamp],
LAG([timestamp]) OVER (ORDER BY [id]) as [prev_timestamp],
datediff(second, [timestamp], LAG([timestamp]) OVER (ORDER BY [id])) as [timestamp_diff]
from orders
order by Id
Which returns results something like this:
id | timestamp | prev_timestamp | diff | count |
---|---|---|---|---|
10058256 | 2025-01-25 22:02:45.2033333 | NULL | NULL | |
10058257 | 2025-01-25 22:02:57.3633333 | 2025-01-25 22:02:45.2033333 | -12 | |
10058258 | 2025-01-25 22:03:03.0166667 | 2025-01-25 22:02:57.3633333 | -6 | 1 |
10058259 | 2025-01-25 22:03:26.2400000 | 2025-01-25 22:03:03.0166667 | -23 | |
10058260 | 2025-01-25 22:33:44.4133333 | 2025-01-25 22:03:26.2400000 | -16 | |
10058285 | 2025-01-25 22:33:53.4666667 | 2025-01-25 22:33:44.4133333 | -9 | 1 |
10058289 | 2025-01-25 22:48:03.6533333 | 2025-01-25 22:33:53.4666667 | -850 | |
10058951 | 2025-01-26 09:30:57.0966667 | 2025-01-25 22:48:03.6533333 | -38574 | |
10058952 | 2025-01-27 15:13:00.4000000 | 2025-01-26 09:30:57.0966667 | -106923 | |
10058953 | 2025-02-03 15:45:11.5200000 | 2025-01-27 15:13:00.4000000 | -606731 | |
10059619 | 2025-01-26 14:14:46.4900000 | 2025-02-03 15:45:11.5200000 | 696625 | |
10059620 | 2025-02-01 13:51:31.8666667 | 2025-01-26 14:14:46.4900000 | -517005 | |
10059727 | 2025-01-26 15:22:34.3700000 | 2025-02-01 13:51:31.8666667 | 512937 | |
10059728 | 2025-01-26 15:22:40.3666667 | 2025-01-26 15:22:34.3700000 | -6 | 2 |
10059729 | 2025-01-26 15:22:45.8300000 | 2025-01-26 15:22:40.3666667 | -5 | |
10060627 | 2025-02-02 09:31:24.8366667 | 2025-01-26 15:22:45.8300000 | -583719 | |
10062205 | 2025-01-27 09:31:03.3066667 | 2025-02-02 09:31:24.8366667 | 518421 |
Couple of things to notice (also regarding the real situation):
I've attempted various approaches using LEAD(), LAG(), and cursors, but due to the dynamic nature of the groups, I haven't been able to get it to work correctly.
Also i saw some questions already here on this, some on datediff, some on grouping dates, but none of them are what i'm looikng for.
This feels a little heavy handed, but I believe it captures your criteria:
with cte AS
(
select
[id],
[timestamp],
LAG([timestamp]) OVER (ORDER BY [id]) as [prev_timestamp],
datediff(second, [timestamp], LAG([timestamp]) OVER (ORDER BY [id])) as [timestamp_diff]
from orders
)
,group_start AS
(
SELECT cte.*,
--establish the start of a group
CASE WHEN timestamp_diff between -10 and -1
and (LAG(timestamp_diff) OVER (ORDER BY id) < -10
or LAG(timestamp_diff) OVER (ORDER BY id) > -1)
THEN 1
END as group_start_ind
FROM cte
)
,group_ind AS
(
SELECT group_start.*,
--give each group an id for final partitioning
sum(group_start_ind) OVER (ORDER BY ID ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as group_id
FROM group_start
)
SELECT group_ind.*,
--for the first member of each group/partition, write the count of records meeting the condition
CASE WHEN group_start_ind IS NOT NULL
THEN count(CASE WHEN timestamp_diff BETWEEN -10 AND -1 THEN 1 END) OVER (PARTITION BY group_id)
END as outpt
FROM group_ind
ORDER BY id
id | timestamp | prev_timestamp | timestamp_diff | group_start_ind | group_id | outpt |
---|---|---|---|---|---|---|
10058256 | 2025-01-25 22:02:45.2033333 | null | null | null | null | null |
10058257 | 2025-01-25 22:02:57.3633333 | 2025-01-25 22:02:45.2033333 | -12 | null | null | null |
10058258 | 2025-01-25 22:03:03.0166667 | 2025-01-25 22:02:57.3633333 | -6 | 1 | 1 | 1 |
10058259 | 2025-01-25 22:03:26.2400000 | 2025-01-25 22:03:03.0166667 | -23 | null | 1 | null |
10058260 | 2025-01-25 22:03:42.4133333 | 2025-01-25 22:03:26.2400000 | -16 | null | 1 | null |
10058285 | 2025-01-25 22:03:51.4666667 | 2025-01-25 22:03:42.4133333 | -9 | 1 | 2 | 1 |
10058289 | 2025-01-25 22:48:03.6533333 | 2025-01-25 22:03:51.4666667 | -2652 | null | 2 | null |
10058951 | 2025-01-26 09:30:57.0966667 | 2025-01-25 22:48:03.6533333 | -38574 | null | 2 | null |
10058952 | 2025-01-27 15:13:00.4000000 | 2025-01-26 09:30:57.0966667 | -106923 | null | 2 | null |
10058953 | 2025-02-03 15:45:11.5200000 | 2025-01-27 15:13:00.4000000 | -606731 | null | 2 | null |
10059619 | 2025-01-26 14:14:46.4900000 | 2025-02-03 15:45:11.5200000 | 696625 | null | 2 | null |
10059620 | 2025-02-01 13:51:31.8666667 | 2025-01-26 14:14:46.4900000 | -517005 | null | 2 | null |
10059727 | 2025-01-26 15:22:34.3700000 | 2025-02-01 13:51:31.8666667 | 512937 | null | 2 | null |
10059728 | 2025-01-26 15:22:40.3666667 | 2025-01-26 15:22:34.3700000 | -6 | 1 | 3 | 2 |
10059729 | 2025-01-26 15:22:45.8300000 | 2025-01-26 15:22:40.3666667 | -5 | null | 3 | null |
10060627 | 2025-02-02 09:31:24.8366667 | 2025-01-26 15:22:45.8300000 | -583719 | null | 3 | null |
10062205 | 2025-01-27 09:31:03.3066667 | 2025-02-02 09:31:24.8366667 | 518421 | null | 3 | null |