Using kql, I'm trying to get records that overlap getting the min start date and the max start date for each group of records that overlap. The "g_id" field signifies a container of "s_id"s each having a datetime range (start and end). I want to get the start and end times for each "g_id" where any overlapping time ranges within a g_id will be a single record (min(start_time) and max(end_time)) of all the overlapping record and if there is a gap where a record exists but no other record overlaps it, then it should be a second record in the result (see g_id == 200 below).
let T = datatable(g_id:int, s_id:int, s_start_time:datetime, s_end_time:datetime)
[
100, 1, datetime('2025-05-01 5:23:00'), datetime('2025-05-01 5:44:00'),
100, 2, datetime('2025-05-01 5:00:00'), datetime('2025-05-01 5:10:00'),
100, 3, datetime('2025-05-01 5:05:00'), datetime('2025-05-01 5:20:00'),
100, 4, datetime('2025-05-01 5:10:00'), datetime('2025-05-01 5:30:00'),
200, 1, datetime('2025-05-01 5:05:00'), datetime('2025-05-01 5:15:00'),
100, 1, datetime('2025-05-01 5:30:00'), datetime('2025-05-01 5:35:00'),
100, 2, datetime('2025-05-01 5:40:00'), datetime('2025-05-01 5:50:00'),
100, 3, datetime('2025-05-01 5:45:00'), datetime('2025-05-01 5:47:00'),
200, 2, datetime('2025-05-01 5:35:00'), datetime('2025-05-01 5:40:00'),
200, 3, datetime('2025-05-01 5:37:00'), datetime('2025-05-01 5:38:00'),
230, 4, datetime('2025-05-01 5:25:00'), datetime('2025-05-01 5:45:00'),
];
T
| order by g_id asc, s_id asc, s_start_time asc
Result should be below:
g_id | s_start_time | s_end_time |
---|---|---|
100 | 2025-05-01 5:00:00 | 2025-05-01 5:50:00 |
200 | 2025-05-01 5:05:00 | 2025-05-01 5:15:00 |
200 | 2025-05-01 5:35:00 | 2025-05-01 5:40:00 |
230 | 2025-05-01 5:25:00 | 2025-05-1 5:45:00 |
I figured it out. What I ended up doing is creating a single record per min based on the min and max dates per g_id. Then I do the same except only between the date ranges for each g_id. Let I can join the two together on g_id and timestamp. Then do a dense ranking function to group the overlap ranges and then filter out the non-overlaps.
The solution works but there probably is a more optimized way to do this.
let T = datatable(g_id:int, s_id:int, s_start_time:datetime, s_end_time:datetime)
[
100, 1, datetime('2025-05-01 5:23:00'), datetime('2025-05-01 5:44:00'),
100, 2, datetime('2025-05-01 5:00:00'), datetime('2025-05-01 5:10:00'),
100, 3, datetime('2025-05-01 5:05:00'), datetime('2025-05-01 5:20:00'),
100, 4, datetime('2025-05-01 5:10:00'), datetime('2025-05-01 5:30:00'),
200, 1, datetime('2025-05-01 5:05:00'), datetime('2025-05-01 5:15:00'),
100, 1, datetime('2025-05-01 5:30:00'), datetime('2025-05-01 5:35:00'),
100, 2, datetime('2025-05-01 5:40:00'), datetime('2025-05-01 5:50:00'),
100, 3, datetime('2025-05-01 5:45:00'), datetime('2025-05-01 5:47:00'),
200, 2, datetime('2025-05-01 5:35:00'), datetime('2025-05-01 5:40:00'),
200, 3, datetime('2025-05-01 5:37:00'), datetime('2025-05-01 5:38:00'),
230, 4, datetime('2025-05-01 5:25:00'), datetime('2025-05-01 5:45:00'),
];
let filler_range =
T
| summarize min_start = min(s_start_time), max_end = max(s_end_time) by g_id
| extend timestamp_range = range(min_start, max_end, 1m)
| mv-expand ts = timestamp_range
| project g_id, timestamp = todatetime(ts)
;
let expanded_ranges = T
| extend timestamp_range = range(s_start_time, s_end_time, 1m)
| mv-expand ts = timestamp_range
| project g_id, timestamp = todatetime(ts)
| distinct g_id, timestamp
;
filler_range
| join kind=leftouter (expanded_ranges) on timestamp and g_id
| extend overlaps = iff(isnotempty(timestamp1), true, false)
| order by g_id asc, timestamp asc
| extend grp=row_rank_dense(overlaps, isempty(g_id1) and overlaps)
| summarize start=min(timestamp), end=max(timestamp) by g_id, grp, overlaps
| where overlaps == true // only get me the overlaps
| project g_id, start, end
| order by g_id asc, start asc