The following SQL works well; it gives me the result I want for the counts and percentage: (please use EXTRACT (YEAR|MONTH|DAY ...) where required):
SELECT YEAR(evt_date) AS yr, MONTH(evt_date) AS mth, DAY(evt_date) AS dy, COUNT(*) AS cnt,
ROUND(COUNT(*)*100 / (SUM(COUNT(*)) OVER (PARTITION BY YEAR(evt_date), MONTH(evt_date))), 2) AS prct
FROM my_data
WHERE evt_date >= '2023-11-01'
GROUP BY yr, mth, dy
ORDER BY 1,2,3
;
But if I introduce the ROLLUP in, I get the cnt totals fine but not the totals for percentages where the window expression is used. This must be a bug but I have no way to report it to be fixed:
SELECT YEAR(evt_date) AS yr, MONTH(evt_date) AS mth, DAY(evt_date) AS dy, COUNT(*) AS cnt,
ROUND(COUNT(*)*100 / (SUM(COUNT(*)) OVER (PARTITION BY YEAR(evt_date), MONTH(evt_date))), 2) AS prct
FROM my_data
WHERE evt_date >= '2023-11-01'
GROUP BY yr, ROLLUP (mth, dy) -- ROLLUP does not give the the result I expect to see for the percentage column
ORDER BY 1,2,3
;
Does anybody knows why? I am using Postgresql v12.
I have this alternative solution with tricks, but it is weird the one above doesn't work:
WITH my_data(evt_date, foo) AS
(SELECT '2023-11-01'::DATE, 'X' UNION
SELECT '2023-11-01', 'Y' UNION
SELECT '2023-11-02', 'X' UNION
SELECT '2023-11-02', 'Y' UNION
SELECT '2024-01-01', 'X' UNION
SELECT '2024-01-01', 'Y' UNION
SELECT '2024-01-01', 'Z' UNION
SELECT '2024-01-02', 'X' UNION
SELECT '2024-01-02', 'Y' UNION
SELECT '2024-01-03', 'X' UNION
SELECT '2024-01-03', 'Y' UNION
SELECT '2024-01-03', 'Z' UNION
SELECT '2024-01-03', 'W'
),
totals AS
(SELECT YEAR(evt_date) AS yr, MONTH(evt_date) AS mth, DAY(evt_date) AS dy, COUNT(*) AS cnt,
ROUND(COUNT(*)*100 / (SUM(COUNT(*)) OVER (PARTITION BY YEAR(evt_date), MONTH(evt_date))), 2) AS prct
FROM my_data
WHERE evt_date >= '2023-11-01'
GROUP BY yr, mth, dy
ORDER BY 1,2,3
)
SELECT yr, mth, dy, SUM(cnt) AS cnt, SUM(prct) AS prct --<-- Fake SUM() to be able to use ROLLUP
FROM totals
GROUP BY yr, ROLLUP (mth, dy) --- Only way? for ROLLUP to handle the percentage column well
ORDER BY 1,2,3
See example.
First, prct
calculation can be before ROLLUP
.
And, You calculate sum()over(partition by yr,mth
), therefore ROLLUP can be (dy
).
select yr,mth,dy,sum(prct) pct
from (
SELECT extract(year from evt_date) AS yr
, extract(MONTH from evt_date) AS mth
, extract(DAY from evt_date) AS dy
, COUNT(*) AS cnt
, ROUND(COUNT(*)*100 /
(SUM(COUNT(*))
OVER (PARTITION BY extract(year from evt_date), extract(MONTH from evt_date)))
, 2) AS prct
FROM my_data
WHERE evt_date >= '2023-11-01'
GROUP BY evt_date
)x
group by yr,mth,rollup(dy)
ORDER BY 1,2,3
;
Output
yr | mth | dy | pct |
---|---|---|---|
2023 | 11 | 1 | 50.00 |
2023 | 11 | 2 | 50.00 |
2023 | 11 | null | 100.00 |
2024 | 1 | 1 | 33.33 |
2024 | 1 | 2 | 22.22 |
2024 | 1 | 3 | 44.44 |
2024 | 1 | null | 99.99 |
With partition by yr
and rollup(mth,dy
)
select yr,mth,dy,sum(prct) pct
from (
SELECT extract(year from evt_date) AS yr
, extract(MONTH from evt_date) AS mth
, extract(DAY from evt_date) AS dy
, COUNT(*) AS cnt
, ROUND(COUNT(*)*100 /
(SUM(COUNT(*))
OVER (PARTITION BY extract(year from evt_date)))
, 2) AS prct
FROM my_data
WHERE evt_date >= '2023-11-01'
GROUP BY evt_date
)x
group by yr,rollup(mth,dy)
ORDER BY 1,2,3
;
output
yr | mth | dy | pct |
---|---|---|---|
2023 | 11 | 1 | 50.00 |
2023 | 11 | 2 | 50.00 |
2023 | 11 | null | 100.00 |
2023 | null | null | 100.00 |
2024 | 1 | 1 | 33.33 |
2024 | 1 | 2 | 22.22 |
2024 | 1 | 3 | 44.44 |
2024 | 1 | null | 99.99 |
2024 | null | null | 99.99 |
I do not know what the point is here.