I have two source tables:
Table A | Table B | |
---|---|---|
Dim 1 | X | X |
Dim 2 | X | X |
Dim 3 | X | |
Dim 4 | X | |
Metric 1 | X | X |
I want to:
SELECT
IF(GROUPING(table_b.dim_3 = 0, "Single product", "All products") AS aggregation_level,
dim_1,
dim_2,
COALESCE(table_b.dim_3, "All") AS dim_3,
COALESCE(table_b.dim_4, "All") AS dim_4,
SUM(metric_1) AS b_metric_1
FROM table_b
GROUP BY GROUPING SETS (
(dim_1, dim_2),
(dim_1, dim_2, table_b.dim_3, table_b.dim_4)
)
SELECT
dim_1,
dim_2,
SUM(metric_1) AS a_metric_1
FROM table_a
GROUP BY dim_1, dim_2
a_metric_1
"totals" calculated in the previous step to every row for the step 1 output. However, remember that step 1 will have missing combinations on dim 1 and dim 2 (but step 2 output won't). So we need to:b_metric_1
with a 0
All products
grouping set which we calculated on step 1I've been trying to use FULL OUTER JOINs to create the data grid I need, but I cannot hit the right solution
I got the expected output by changing the order where I applied the grouping sets:
grouped_by_product
CTEtotals
CTEThe SQL logic would be something like:
WITH grouped_by_product AS (
SELECT
dim_1,
dim_2,
dim_3,
dim_4,
SUM(metric_1) AS b_metric_1
FROM table_b
GROUP BY ALL
), totals AS (
SELECT
dim_1,
dim_2,
SUM(metric_1) AS a_metric_1
FROM table_a
GROUP BY ALL
), totals_grid AS (
-- Creates a grid with all possible combinations dim_3 & dim_4 can take
SELECT
*
FROM totals
CROSS JOIN dim_table -- dim_3 & dim_4 combinations
), joined AS (
SELECT
dim_1,
dim_2,
dim_3,
dim_4,
COALESCE(b_metric_1, 0) AS b_metric_1,
COALESCE(a_metric_1, 0) AS a_metric_1
FROM totals_grid
FULL OUTER JOIN grouped_by_product
USING (dim_1, dim_2, dim_3, dim_4)
)
SELECT
IF(GROUPING(table_b.dim_3 = 0, "Single product", "All products") AS aggregation_level,
dim_1,
dim_2,
COALESCE(table_b.dim_3, "All") AS dim_3,
COALESCE(table_b.dim_4, "All") AS dim_4,
SUM(b_metric_1) AS b_metric_1,
MAX(a_metric_1) AS a_metric_1 -- we take the max because it's the same for all rows
FROM joined
GROUP BY GROUPING SETS (
(dim_1, dim_2),
(dim_1, dim_2, joined.dim_3, joined.dim_4)
)