sql-servert-sqlunpivottable-functions

Table function in SQL Server with multiple parameters as an argument


I have a table in SQL Server 2016 named df:

-- Create a new table with department and gender columns
CREATE TABLE df 
(
    country VARCHAR(50),
    year INT,
    val1 INT,
    val2 INT,
    val3 INT,
    department VARCHAR(50),
    gender VARCHAR(10)
);

-- Insert data into the new table, including department and gender
INSERT INTO df (country, year, val1, val2, val3, department, gender) 
VALUES ('USA', 2020, 4, 4, 5, 'Sales', 'Male'),
('USA', 2020, 4, 4, 5, 'Sales', 'Male'),
('USA', 2020, 5, 5, 5, 'Sales', 'Female'),
('USA', 2020, 5, 5, 5, 'Sales', 'Female'),
('USA', 2020, 1, 1, 5, 'Sales', 'Male'),
('USA', 2020, 3, 3, 5, 'Sales', 'Female'),
('USA', 2020, 4, 2, 5, 'Sales', 'Male'),
('USA', 2020, 1, 1, 5, 'Sales', 'Female'),
('USA', 2020, 2, 2, 5, 'Sales', 'Male'),
('Canada', 2020, 2, 2, 3, 'HR', 'Female'),
('Canada', 2020, 2, 2, 3, 'HR', 'Female'),
('Canada', 2020, 2, 2, 3, 'HR', 'Male'),
('Canada', 2020, 2, 2, 3, 'HR', 'Male'),
('Canada', 2020, 5, 5, 3, 'HR', 'Female'),
('Canada', 2020, 5, 5, 3, 'HR', 'Male'),
('Canada', 2020, 1, 1, 3, 'HR', 'Female'),
('Canada', 2020, 1, 1, 3, 'HR', 'Male'),
('Canada', 2020, 3, 4, 3, 'HR', 'Female'),
('Canada', 2020, 3, 4, 3, 'HR', 'Male'),
('Canada', 2020, 5, 4, 3, 'HR', 'Female'),
('Canada', 2020, 5, 4, 5, 'HR', 'Male'),
('Canada', 2020, 5, 4, 5, 'HR', 'Female'),
('Germany', 2022, 5, 5, 4, 'IT', 'Male'),
('France', 2020, 1, 1, 2, 'Finance', 'Female'),
('France', 2020, 1, 1, 2, 'Finance', 'Female'),
('France', 2020, 3, 2, 2, 'Finance', 'Male'),
('France', 2020, 3, 4, 2, 'Finance', 'Female'),
('France', 2020, 3, 5, 5, 'Finance', 'Male'),
('France', 2020, 3, 4, 4, 'Finance', 'Female'),
('France', 2020, 3, 4, 4, 'Finance', 'Male'),
('France', 2020, 3, 4, 3, 'Finance', 'Female'),
('UK', 2021, 4, 2, 3, 'Marketing', 'Male'),
('Australia', 2022, 3, 3, 4, 'Support', 'Female'),
('Italy', 2020, 5, 5, 5, 'Operations', 'Male'),
('Italy', 2020, 5, 5, 5, 'Operations', 'Female'),
('Italy', 2020, 5, 1, 1, 'Operations', 'Male'),
('Italy', 2020, 4, 4, 1, 'Operations', 'Female'),
('Italy', 2020, 2, 1, 2, 'Operations', 'Male'),
('Italy', 2020, 3, 5, 3, 'Operations', 'Female'),
('Spain', 2021, 1, 2, 3, 'Customer Service', 'Male'),
('Mexico', 2022, 4, 4, 4, 'Logistics', 'Female'),
('Brazil', 2020, 4, 1, 1, 'R&D', 'Male'),
('Brazil', 2020, 4, 1, 1, 'R&D', 'Female'),
('Brazil', 2020, 4, 3, 4, 'R&D', 'Male'),
('Brazil', 2020, 5, 3, 5, 'R&D', 'Female'),
('Brazil', 2020, 5, 3, 5, 'R&D', 'Male'),
('Brazil', 2020, 3, 3, 1, 'R&D', 'Female'),
('Brazil', 2020, 2, 3, 1, 'R&D', 'Male');

-- Select all rows from the new table to check the data
SELECT * FROM df;

With this table, I create some percentages and a count column based on some filtering.

-- Parameters
DECLARE @Year INT = 2020;
DECLARE @Metric VARCHAR(50) = 'count'; 
DECLARE @Gender VARCHAR(20) = NULL; -- Set to specific gender (e.g., 'Male', 'Female') or NULL to include all
DECLARE @Department VARCHAR(50) = NULL; -- Set to specific department (e.g., 'HR', 'Engineering') or NULL to include all
-- Set @Metric to 'dissatisfaction', 'satisfaction', or 'count'

WITH UnpivotedData AS 
(
    SELECT country, gender, department, year, Vals
    FROM 
        (SELECT country, gender, department, year, val1, val2, val3
         FROM df) AS SourceTable
    UNPIVOT 
        (Vals FOR ValueColumn IN (val1, val2, val3)) AS Unpivoted
    WHERE year = @Year
),
Proportions AS 
(
    SELECT 
        country,
        gender,
        department,
        CASE 
            WHEN Vals = 1 THEN 'Very Dissatisfied'
            WHEN Vals = 2 THEN 'Dissatisfied'
            WHEN Vals = 3 THEN 'Neutral'
            WHEN Vals = 4 THEN 'Satisfied'
            WHEN Vals = 5 THEN 'Very Satisfied'
        END AS SatisfactionLevel,
        COUNT(*) * 1.0 / SUM(COUNT(*)) OVER (PARTITION BY country, gender, department) AS Proportion
    FROM 
        UnpivotedData
    GROUP BY 
        country, gender, department, Vals
),
Pivoted AS 
(
    SELECT country, gender, department, 
           [Very Dissatisfied], 
           [Dissatisfied], 
           [Neutral], 
           [Satisfied], 
           [Very Satisfied]
    FROM Proportions
    PIVOT 
        (MAX(Proportion)
         FOR SatisfactionLevel IN ([Very Dissatisfied], [Dissatisfied], [Neutral], [Satisfied], [Very Satisfied])) AS p
),
CountryCounts AS 
(
    SELECT 
        CASE WHEN country IS NULL THEN 'Unknown' ELSE country END AS country,
        gender, 
        department,
        COUNT(*) AS Total
    FROM df
    WHERE year = @Year
    -- Apply filters for gender and department if provided
    AND (@Gender IS NULL OR gender = @Gender)
    AND (@Department IS NULL OR department = @Department)
    GROUP BY CASE WHEN country IS NULL THEN 'Unknown' ELSE country END, gender, department
),
OrderedData AS 
(
    SELECT 
        p.country,
        p.gender,
        p.department,
        [Very Dissatisfied],
        [Dissatisfied],
        [Neutral],
        [Satisfied],
        [Very Satisfied],
        c.Total,
        CASE 
            WHEN @Metric = 'satisfaction' THEN ISNULL([Satisfied], 0) + ISNULL([Very Satisfied], 0)
            WHEN @Metric = 'dissatisfaction' THEN ISNULL([Very Dissatisfied], 0) + ISNULL([Dissatisfied], 0)
            WHEN @Metric = 'count' THEN c.Total
        END AS SortValue
    FROM Pivoted AS p
    INNER JOIN CountryCounts AS c ON p.country = c.country AND p.gender = c.gender AND p.department = c.department
)
SELECT 
    country,
    gender,
    department,
    [Very Dissatisfied],
    [Dissatisfied],
    [Neutral],
    [Satisfied],
    [Very Satisfied],
    Total
FROM 
    OrderedData
ORDER BY 
    SortValue DESC;

I want to create a table function that will have 3 arguments:

  1. Metric
  2. Year
  3. Factor

Factor can be the Gender or the Department or both of them. If for example the Factor is the Gender the table to be grouped by the Gender and if is the Department to be grouped by Department.

If both to be grouped by both. If the Factor is null or default to not be grouped at all.

Regarding the Year: if the Year is passed in to be grouped by year. If the Year is null, show all the years without grouping.

Is there a way to do that in SQL Server?

I have a fiddle here


Solution

  • As I told you on your previous SQL question, you are hugely over-complicating this.

    You can do the filtering, unpivot and pivot in a single level of CTE, and you only need one level to add the Total, which itself wouldn't be necessary if there was an ID column, because then you could do COUNT(DISTINCT ID).

    To create a function, just add the normal CREATE FUNCTION syntax. You can't add ORDER BY to a table function, it's basically just a view. You need to add that on to the outer query.

    CREATE OR ALTER FUNCTION dbo.MyAggregation (
        @Year INT,
        @Gender VARCHAR(20), -- Set to specific gender (e.g., 'Male', 'Female') or NULL to include all
        @Department VARCHAR(50), -- Set to specific department (e.g., 'HR', 'Engineering') or NULL to include all
        @Metric VARCHAR(50) -- Set @Metric to 'dissatisfaction', 'satisfaction', or 'count'
    )
    RETURNS TABLE
    AS RETURN
    
    WITH AllRows AS (
          SELECT *,
              COUNT(*) OVER (PARTITION BY country, gender, department, year) AS Total
          FROM df
          WHERE (year = @Year OR @Year IS NULL)
            AND (Department = @Department OR @DepARTMENT IS NULL)
            AND (Gender = @Gender OR @Gender IS NULL)
    )
    SELECT
          country,
          gender,
          department,
          year,
          COUNT(CASE WHEN Vals = 1 THEN 1 END) * 1.0
             / SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year) AS [Very Dissatisfied],
          COUNT(CASE WHEN Vals = 2 THEN 1 END) * 1.0
             / SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year) AS [Dissatisfied],
          COUNT(CASE WHEN Vals = 3 THEN 1 END) * 1.0
             / SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year) AS [Neutral],
          COUNT(CASE WHEN Vals = 4 THEN 1 END) * 1.0
             / SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year) AS [Satisfied],
          COUNT(CASE WHEN Vals = 5 THEN 1 END) * 1.0
             / SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year) AS [Very Satisfied],
          MIN(Total) AS Total,
          CASE @Metric
            WHEN 'satisfaction' THEN
              COUNT(CASE WHEN Vals IN (4, 5) THEN 1 END) * 1.0
                / SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year)
            WHEN 'dissatisfaction' THEN
              COUNT(CASE WHEN Vals IN (1, 2) THEN 1 END) * 1.0
               / SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year)
            WHEN 'count' THEN MIN(Total)
          END AS SortValue
    FROM AllRows
    CROSS APPLY (VALUES
            ('val1', val1),
            ('val2', val2),
            ('val3', val3)
    ) v(ValueColumn, Vals)
    GROUP BY
        country,
        gender,
        department,
        year;
    

    Then you just do

    SELECT *
    FROM dbo.MyAggregation(2020, NULL, NULL, 'count')
    ORDER BY
        SortValue;    
    

    db<>fiddle

    Note that the sort-value parameter should not be passed in from a variable or lateral join, as that will slow down your query a lot. If it's a constant string then the optimizer can factor it out.

    Adding in dynamic grouping substantially complicates this, because now you need to null out the values before you group them up (as shown in the other answer). It will also be really slow on large tables, as you can't use indexes. I would strongly recommend you create separate functions with different grouping/partitioning constructs, alternatively do this in dynamic SQL.