vertica

how to use COUNT(DISTINCT error_code) OVER () in vertica


I am using Vertica, I have a table with 3 columns: id, error_code and timestamp.

I want to check if in the last hour there were 3 different error_code in 5 minutes window interval for the same id.

this is my query:

select * from
(SELECT id, err_code, timestamp,
COUNT(DISTINCT err_code) OVER (PARTITION BY id ORDER BY timestamp
       RANGE BETWEEN INTERVAL '10 minutes'
   PRECEDING AND CURRENT ROW) as count
FROM my_table
WHERE timestamp > CURRENT_TIMESTAMP - INTERVAL '1 hour 5 minutes'
group by id, err_code, timestamp
order by id, timestamp) a
order by id, timestamp

However, Vertica doesn't support COUNT(DISTINCT) OVER():

ERROR: Only MIN/MAX and BOOL_AND/BOOL_OR are allowed to use DISTINCT

how to get the wanted result?

without DISTINCT this is the result: enter image description here

the wanted result should be:

enter image description here


Solution

  • With 4 distinct values altogether for err_code, add a sum of 4 OLAP expressions returning

    WITH
    -- your in data (would have been nice if you had added it so that I could have copy-pasted it)
    indata(id,err_code,ts) AS (
                SELECT 87654321,1,TIMESTAMP '2023-08-10 06:29:40'
      UNION ALL SELECT 87654321,2,TIMESTAMP '2023-08-10 06:29:43'
      UNION ALL SELECT 87654321,3,TIMESTAMP '2023-08-10 06:29:49'
      UNION ALL SELECT 87654321,4,TIMESTAMP '2023-08-10 06:29:52'
      UNION ALL SELECT 87654321,1,TIMESTAMP '2023-08-10 06:34:52'
      UNION ALL SELECT 87654321,2,TIMESTAMP '2023-08-10 06:34:55'
      UNION ALL SELECT 87654321,3,TIMESTAMP '2023-08-10 06:35:01'
      UNION ALL SELECT 87654321,4,TIMESTAMP '2023-08-10 06:35:04'
      UNION ALL SELECT 87654321,1,TIMESTAMP '2023-08-10 06:40:04'
      UNION ALL SELECT 87654321,2,TIMESTAMP '2023-08-10 06:40:07'
      UNION ALL SELECT 87654321,3,TIMESTAMP '2023-08-10 06:40:13'
      UNION ALL SELECT 87654321,4,TIMESTAMP '2023-08-10 06:40:16'
      UNION ALL SELECT 87654321,1,TIMESTAMP '2023-08-10 06:45:16'
      UNION ALL SELECT 87654321,2,TIMESTAMP '2023-08-10 06:45:19'
      UNION ALL SELECT 87654321,3,TIMESTAMP '2023-08-10 06:45:25'
      UNION ALL SELECT 87654321,4,TIMESTAMP '2023-08-10 06:45:28'
      UNION ALL SELECT 87654321,1,TIMESTAMP '2023-08-10 06:50:28'
      UNION ALL SELECT 87654321,2,TIMESTAMP '2023-08-10 06:50:32'
    )
    -- end of input, real query starts here ....
    SELECT
      *
    ,   MAX(CASE err_code WHEN 1 THEN 1 ELSE 0 END) 
         OVER (w RANGE BETWEEN '10 min' PRECEDING AND CURRENT ROW) 
      + MAX(CASE err_code WHEN 2 THEN 1 ELSE 0 END) 
         OVER (w RANGE BETWEEN '10 min' PRECEDING AND CURRENT ROW) 
      + MAX(CASE err_code WHEN 3 THEN 1 ELSE 0 END) 
         OVER (w RANGE BETWEEN '10 min' PRECEDING AND CURRENT ROW) 
      + MAX(CASE err_code WHEN 4 THEN 1 ELSE 0 END) 
         OVER (w RANGE BETWEEN '10 min' PRECEDING AND CURRENT ROW) 
      AS running_count_distinct
    FROM indata
    WINDOW w AS (PARTITION BY id ORDER BY ts) -- named window
    
    id err_code ts running_count_distinct
    87,654,321 1 2023-08-10 06:29:40 1
    87,654,321 2 2023-08-10 06:29:43 2
    87,654,321 3 2023-08-10 06:29:49 3
    87,654,321 4 2023-08-10 06:29:52 4
    87,654,321 1 2023-08-10 06:34:52 4
    87,654,321 2 2023-08-10 06:34:55 4
    87,654,321 3 2023-08-10 06:35:01 4
    87,654,321 4 2023-08-10 06:35:04 4
    87,654,321 1 2023-08-10 06:40:04 4
    87,654,321 2 2023-08-10 06:40:07 4
    87,654,321 3 2023-08-10 06:40:13 4
    87,654,321 4 2023-08-10 06:40:16 4
    87,654,321 1 2023-08-10 06:45:16 4
    87,654,321 2 2023-08-10 06:45:19 4
    87,654,321 3 2023-08-10 06:45:25 4
    87,654,321 4 2023-08-10 06:45:28 4
    87,654,321 1 2023-08-10 06:50:28 4
    87,654,321 2 2023-08-10 06:50:32 4