sqloracle-databasejoinamazon-redshift

fill gaps in outer join in redshift


I have a table that contains products and sales data. Unfortunately, not every month a given product was sold, and I would like unsold products in a given month to also be shown. In Oracle, the query is built this way: https://www.oracle.com/ocom/groups/public/@otn/documents/webcontent/270646.htm.

I would like to do it similarly in Redshift, but I can't use the partitioning described here. Can I easily replace it somehow?

data sample:

WITH dates AS (
    SELECT to_date('2023-01-01', 'YYYY-MM-DD') AS d FROM dual
    UNION ALL
    SELECT to_date('2023-02-01', 'YYYY-MM-DD') AS d FROM dual
    UNION ALL
    SELECT to_date('2023-03-01', 'YYYY-MM-DD') AS d FROM dual
    UNION ALL
    SELECT to_date('2023-04-01', 'YYYY-MM-DD') AS d FROM dual
    UNION ALL
    SELECT to_date('2023-05-01', 'YYYY-MM-DD') AS d FROM dual
)
, product_sales as(
    --Yanuary
    SELECT 1 AS prod_id, 'test1' AS prod_name, to_date('2023-01-01', 'YYYY-MM-DD') AS sale_date, 3 AS sale_amount FROM dual
    UNION ALL
    SELECT 2 AS prod_id, 'test2' AS prod_name, to_date('2023-01-01', 'YYYY-MM-DD') AS sale_date, 8 AS sale_amount FROM dual
    UNION ALL
    SELECT 3 AS prod_id, 'test3' AS prod_name, to_date('2023-01-01', 'YYYY-MM-DD') AS sale_date, 9 AS sale_amount FROM dual
    
    UNION ALL
    
    --February
    SELECT 1 AS prod_id, 'test1' AS prod_name, to_date('2023-02-01', 'YYYY-MM-DD') AS sale_date, 5 AS sale_amount FROM dual
    UNION ALL
    SELECT 2 AS prod_id, 'test2' AS prod_name, to_date('2023-02-01', 'YYYY-MM-DD') AS sale_date, 2 AS sale_amount FROM dual
    UNION ALL
    SELECT 3 AS prod_id, 'test3' AS prod_name, to_date('2023-02-01', 'YYYY-MM-DD') AS sale_date, 1 AS sale_amount FROM dual
    
    UNION ALL
    
    --March
--  SELECT 1 AS prod_id, 'test1' AS prod_name, to_date('2023-02-01', 'YYYY-MM-DD') AS sale_date, 5 AS sale_amount FROM dual
--  UNION ALL
    SELECT 2 AS prod_id, 'test2' AS prod_name, to_date('2023-03-01', 'YYYY-MM-DD') AS sale_date, 7 AS sale_amount FROM dual
--  UNION ALL
--  SELECT 3 AS prod_id, 'test3' AS prod_name, to_date('2023-02-01', 'YYYY-MM-DD') AS sale_date, 1 AS sale_amount FROM dual
    
    UNION ALL
    
    --April
    SELECT 1 AS prod_id, 'test1' AS prod_name, to_date('2023-04-01', 'YYYY-MM-DD') AS sale_date, 1 AS sale_amount FROM dual
    UNION ALL
    SELECT 2 AS prod_id, 'test2' AS prod_name, to_date('2023-04-01', 'YYYY-MM-DD') AS sale_date, 2 AS sale_amount FROM dual
--  UNION ALL
--  SELECT 3 AS prod_id, 'test3' AS prod_name, to_date('2023-04-01', 'YYYY-MM-DD') AS sale_date, 1 AS sale_amount FROM dual
    
    UNION ALL
    
    --May
    SELECT 1 AS prod_id, 'test1' AS prod_name, to_date('2023-05-01', 'YYYY-MM-DD') AS sale_date, 6 AS sale_amount FROM dual
    UNION ALL
    SELECT 2 AS prod_id, 'test2' AS prod_name, to_date('2023-05-01', 'YYYY-MM-DD') AS sale_date, 4 AS sale_amount FROM dual
    UNION ALL
    SELECT 3 AS prod_id, 'test3' AS prod_name, to_date('2023-05-01', 'YYYY-MM-DD') AS sale_date, 2 AS sale_amount FROM dual
)


SELECT ps.prod_id, ps.prod_name, nvl(ps.sale_amount, 0) AS sale_amount, da.d AS sale_date
FROM product_sales ps
PARTITION BY(ps.prod_id, ps.prod_name)
RIGHT OUTER JOIN dates da ON ps.sale_date = da.d
ORDER BY da.d, ps.prod_id

Solution

  • See example

    select p.prod_id,p.prod_name,dates.d, coalesce(month_sale_amount,0) month_sale_amount
    from dates
    cross join (select distinct prod_id,prod_name from product_sales) p 
    left join(
      select prod_id,date_trunc('month',sale_date) sale_date
        ,sum(sale_amount) as month_sale_amount
      from product_sales
      group by prod_id,date_trunc('month',sale_date)
    ) total_sales on sale_date=dates.d and total_sales.prod_id=p.prod_id
    order by dates.d,p.prod_id
    
    prod_id prod_name d month_sale_amount
    1 test1 2023-01-01 3
    2 test2 2023-01-01 8
    3 test3 2023-01-01 9
    1 test1 2023-02-01 5
    2 test2 2023-02-01 2
    3 test3 2023-02-01 1
    1 test1 2023-03-01 0
    2 test2 2023-03-01 7
    3 test3 2023-03-01 0
    1 test1 2023-04-01 1
    2 test2 2023-04-01 2
    3 test3 2023-04-01 0
    1 test1 2023-05-01 6
    2 test2 2023-05-01 4
    3 test3 2023-05-01 2

    fiddle