I am using Vertica, I have a table with 3 columns: id, error_code and timestamp.
I want to check if in the last hour there were 3 different error_code in 5 minutes window interval for the same id.
this is my query:
select * from
(SELECT id, err_code, timestamp,
COUNT(DISTINCT err_code) OVER (PARTITION BY id ORDER BY timestamp
RANGE BETWEEN INTERVAL '10 minutes'
PRECEDING AND CURRENT ROW) as count
FROM my_table
WHERE timestamp > CURRENT_TIMESTAMP - INTERVAL '1 hour 5 minutes'
group by id, err_code, timestamp
order by id, timestamp) a
order by id, timestamp
However, Vertica doesn't support COUNT(DISTINCT) OVER():
ERROR: Only MIN/MAX and BOOL_AND/BOOL_OR are allowed to use DISTINCT
how to get the wanted result?
without DISTINCT
this is the result:
the wanted result should be:
With 4 distinct values altogether for err_code
, add a sum of 4 OLAP expressions returning
WITH
-- your in data (would have been nice if you had added it so that I could have copy-pasted it)
indata(id,err_code,ts) AS (
SELECT 87654321,1,TIMESTAMP '2023-08-10 06:29:40'
UNION ALL SELECT 87654321,2,TIMESTAMP '2023-08-10 06:29:43'
UNION ALL SELECT 87654321,3,TIMESTAMP '2023-08-10 06:29:49'
UNION ALL SELECT 87654321,4,TIMESTAMP '2023-08-10 06:29:52'
UNION ALL SELECT 87654321,1,TIMESTAMP '2023-08-10 06:34:52'
UNION ALL SELECT 87654321,2,TIMESTAMP '2023-08-10 06:34:55'
UNION ALL SELECT 87654321,3,TIMESTAMP '2023-08-10 06:35:01'
UNION ALL SELECT 87654321,4,TIMESTAMP '2023-08-10 06:35:04'
UNION ALL SELECT 87654321,1,TIMESTAMP '2023-08-10 06:40:04'
UNION ALL SELECT 87654321,2,TIMESTAMP '2023-08-10 06:40:07'
UNION ALL SELECT 87654321,3,TIMESTAMP '2023-08-10 06:40:13'
UNION ALL SELECT 87654321,4,TIMESTAMP '2023-08-10 06:40:16'
UNION ALL SELECT 87654321,1,TIMESTAMP '2023-08-10 06:45:16'
UNION ALL SELECT 87654321,2,TIMESTAMP '2023-08-10 06:45:19'
UNION ALL SELECT 87654321,3,TIMESTAMP '2023-08-10 06:45:25'
UNION ALL SELECT 87654321,4,TIMESTAMP '2023-08-10 06:45:28'
UNION ALL SELECT 87654321,1,TIMESTAMP '2023-08-10 06:50:28'
UNION ALL SELECT 87654321,2,TIMESTAMP '2023-08-10 06:50:32'
)
-- end of input, real query starts here ....
SELECT
*
, MAX(CASE err_code WHEN 1 THEN 1 ELSE 0 END)
OVER (w RANGE BETWEEN '10 min' PRECEDING AND CURRENT ROW)
+ MAX(CASE err_code WHEN 2 THEN 1 ELSE 0 END)
OVER (w RANGE BETWEEN '10 min' PRECEDING AND CURRENT ROW)
+ MAX(CASE err_code WHEN 3 THEN 1 ELSE 0 END)
OVER (w RANGE BETWEEN '10 min' PRECEDING AND CURRENT ROW)
+ MAX(CASE err_code WHEN 4 THEN 1 ELSE 0 END)
OVER (w RANGE BETWEEN '10 min' PRECEDING AND CURRENT ROW)
AS running_count_distinct
FROM indata
WINDOW w AS (PARTITION BY id ORDER BY ts) -- named window
id | err_code | ts | running_count_distinct |
---|---|---|---|
87,654,321 | 1 | 2023-08-10 06:29:40 | 1 |
87,654,321 | 2 | 2023-08-10 06:29:43 | 2 |
87,654,321 | 3 | 2023-08-10 06:29:49 | 3 |
87,654,321 | 4 | 2023-08-10 06:29:52 | 4 |
87,654,321 | 1 | 2023-08-10 06:34:52 | 4 |
87,654,321 | 2 | 2023-08-10 06:34:55 | 4 |
87,654,321 | 3 | 2023-08-10 06:35:01 | 4 |
87,654,321 | 4 | 2023-08-10 06:35:04 | 4 |
87,654,321 | 1 | 2023-08-10 06:40:04 | 4 |
87,654,321 | 2 | 2023-08-10 06:40:07 | 4 |
87,654,321 | 3 | 2023-08-10 06:40:13 | 4 |
87,654,321 | 4 | 2023-08-10 06:40:16 | 4 |
87,654,321 | 1 | 2023-08-10 06:45:16 | 4 |
87,654,321 | 2 | 2023-08-10 06:45:19 | 4 |
87,654,321 | 3 | 2023-08-10 06:45:25 | 4 |
87,654,321 | 4 | 2023-08-10 06:45:28 | 4 |
87,654,321 | 1 | 2023-08-10 06:50:28 | 4 |
87,654,321 | 2 | 2023-08-10 06:50:32 | 4 |