azure-data-explorer

Using Kusto scan operator to capture sequential pairs that meets criteria


I am trying to get the kusto scan operator to work for this use case. I want to create pairs where columnA matches.

let threecolumns = datatable( Ts: timespan, ColumnA: string, ColumnB: string) 
[
    0m, "aaa","023",
    1m, "aaa","134",
    2m, "aaa", "256",
    3m, "aaa", "356",
    4m, "aaa", "467",
    5m, "bbb", "578",
    6m, "bbb", "687",
    8m, "ccc", "788",
    11m, "ddd", "899",
    12m, "ddd", "988"];

For the above table, I'm looking to get a list this

Ts ColumnA ColumnB isPair A_1 B_1
0m aaa 023 true aaa 134
2m aaa 256 true aaa 356
5m bbb 578 true bbb 687
11m ddd 899 true ddd 988

This is what I've tried, and as you can see, I am getting an extra pair

let threecolumns = datatable( Ts: timespan,ColumnA:string, ColumnB: string) [
    0m, "aaa","023",
    1m, "aaa","134",
    2m, "aaa", "256",
    3m, "aaa", "356",
    4m, "aaa", "467",
    5m, "bbb", "578",
    6m, "bbb", "687",
    8m, "ccc", "788",
    11m, "ddd", "899",
    12m, "ddd", "988"
];
threecolumns
| partition hint.strategy=native by ColumnA
(
    order by Ts desc
    | extend A_1 = next(ColumnA, 1), B_1 = next(ColumnB, 1)
    | extend A_2 = next(ColumnA, 2), B_2 = next(ColumnB, 2)
    | scan with_match_id=m_id declare(paired: bool, step: string) with
    (
        step s1 output = last: (ColumnA == A_1 and paired != true) => paired = true, step = 's1';
        step s2 :(ColumnA == A_2 and s1.paired == true) =>  paired = false, step = 's2';
        step s3: (ColumnA != A_1) => paired = false, step = 's3';
    )
)
| where paired == true
| project Ts, ColumnA, ColumnB, paired, A_1, B_1

Ts ColumnA ColumnB isPair A_1 B_1
00:04:00 aaa 467 true aaa 356
00:03:00 aaa 356 true aaa 256
00:01:00 aaa 134 true aaa 023
00:06:00 bbb 687 true bbb 578
00:12:00 ddd 988 true ddd 899

Solution

  • You can try below code to achieve your requirement.

    let threecolumns = datatable( Ts: timespan,ColumnA:string, ColumnB: string) [
        0m, "aaa","023",
        1m, "aaa","134",
        2m, "aaa", "256",
        3m, "aaa", "356",
        4m, "aaa", "467",
        5m, "bbb", "578",
        6m, "bbb", "687",
        8m, "ccc", "788",
        11m, "ddd", "899",
        12m, "ddd", "988"
    ];
    threecolumns
    | partition hint.strategy=native by ColumnA
    (
        order by Ts asc
        | extend A_1 = next(ColumnA, 1), B_1 = next(ColumnB, 1)
        | extend x=row_number()
        | where A_1 !=''
        | where x%2 == 1
    )
    | order by Ts asc
    | project Ts, ColumnA, ColumnB, paired=true, A_1, B_1;
    

    Result:

    enter image description here