sqlduckdb

Is there a way to use column list in group by clause in DuckDB?


Suppose that I have the following data

CREATE TABLE sample_table (
    YEAR INTEGER,
    BRAND VARCHAR, 
    PRODUCT VARCHAR,  
    SALES INTEGER     
);

INSERT INTO sample_table (YEAR, BRAND, PRODUCT, SALES) VALUES
(2023, 'AX', 'A', 10),
(2024, 'AX', 'A', 20),
(2024, 'AX', 'B', 70),
(2022, 'AY', 'C', 20),
(2023, 'AY', 'C', 90),
;

Is there a way to create a macro to achieve the same result below where I can just use Brand and Product as list arguments

SELECT YEAR BRAND, PRODUCT, SUM(SALES) FROM SAMPLE_TABLE 
  GROUP BY YEAR, GROUPING SETS(CUBE(BRAND, PRODUCT));

───────┬─────────┬────────────┐
│ BRAND │ PRODUCT │ sum(SALES) │
│ int32 │ varchar │   int128   │
├───────┼─────────┼────────────┤
│  2024 │         │         90 │
│  2022 │         │         20 │
│  2022 │ C       │         20 │
│  2022 │         │         20 │
│  2023 │         │         90 │
│  2023 │         │        100 │
│  2023 │ A       │         10 │
│  2024 │ B       │         70 │
│  2023 │ C       │         90 │
│  2023 │         │         10 │
│  2024 │         │         90 │
│  2024 │ B       │         70 │
│  2024 │ A       │         20 │
│  2023 │ C       │         90 │
│  2023 │ A       │         10 │
│  2024 │ A       │         20 │
│  2022 │ C       │         20 │
├───────┴─────────┴────────────┤
│ 17 rows            3 columns

What I had in mind is

CREATE OR REPLACE MACRO MSUM(
    GRPCOLS
    ) AS TABLE (
    FROM TBL
    SELECT 
        COLUMNS(C -> (LIST_CONTAINS(GRPCOLS, C))),
        SUM(SALES)
    GROUP BY YEAR, GROUPING SETS(CUBE(COLUMNS(C -> LIST(CONTAINS(GRPCOLS, C)))))
    );

WITH TBL AS (SELECT * FROM SAMPLE_TABLE)
    FROM MSUM([BRAND, PRODUCT]);

but it can't be done because if I understood it right COLUMNS is an star expression and can't be used in GROUP BY

Binder Error: STAR expression is not supported here

Any ideas?


Solution

  • Not sure if it's quite what you're asking, but:

    It is possible to build a query string and execute it with query()

    duckdb.sql("""
    create or replace macro msum(tbl, grpcols) as table (
       from query(format(
          '
          from {0}
          select {1}, sum(sales)
          group by year, grouping sets(cube({1}))
          ', 
          tbl, 
          array_to_string(grpcols, ',')
       ))
    )
    """)
    
    duckdb.sql("""
    from msum(sample_table, [brand, product])
    """)
    
    ┌─────────┬─────────┬────────────┐
    │  BRAND  │ PRODUCT │ sum(sales) │
    │ varchar │ varchar │   int128   │
    ├─────────┼─────────┼────────────┤
    │ AY      │ NULL    │         20 │
    │ AY      │ C       │         20 │
    │ AY      │ NULL    │         90 │
    │ NULL    │ B       │         70 │
    │ NULL    │ NULL    │         20 │
    │ AX      │ NULL    │         10 │
    │ AX      │ NULL    │         90 │
    │ NULL    │ A       │         10 │
    │ NULL    │ A       │         20 │
    │ NULL    │ C       │         20 │
    │ NULL    │ NULL    │         90 │
    │ AX      │ A       │         20 │
    │ NULL    │ C       │         90 │
    │ NULL    │ NULL    │        100 │
    │ AX      │ A       │         10 │
    │ AX      │ B       │         70 │
    │ AY      │ C       │         90 │
    ├─────────┴─────────┴────────────┤
    │ 17 rows              3 columns │
    └────────────────────────────────┘