I have a table in SQL Server 2016 named df
:
-- Create a new table with department and gender columns
CREATE TABLE df
(
country VARCHAR(50),
year INT,
val1 INT,
val2 INT,
val3 INT,
department VARCHAR(50),
gender VARCHAR(10)
);
-- Insert data into the new table, including department and gender
INSERT INTO df (country, year, val1, val2, val3, department, gender)
VALUES ('USA', 2020, 4, 4, 5, 'Sales', 'Male'),
('USA', 2020, 4, 4, 5, 'Sales', 'Male'),
('USA', 2020, 5, 5, 5, 'Sales', 'Female'),
('USA', 2020, 5, 5, 5, 'Sales', 'Female'),
('USA', 2020, 1, 1, 5, 'Sales', 'Male'),
('USA', 2020, 3, 3, 5, 'Sales', 'Female'),
('USA', 2020, 4, 2, 5, 'Sales', 'Male'),
('USA', 2020, 1, 1, 5, 'Sales', 'Female'),
('USA', 2020, 2, 2, 5, 'Sales', 'Male'),
('Canada', 2020, 2, 2, 3, 'HR', 'Female'),
('Canada', 2020, 2, 2, 3, 'HR', 'Female'),
('Canada', 2020, 2, 2, 3, 'HR', 'Male'),
('Canada', 2020, 2, 2, 3, 'HR', 'Male'),
('Canada', 2020, 5, 5, 3, 'HR', 'Female'),
('Canada', 2020, 5, 5, 3, 'HR', 'Male'),
('Canada', 2020, 1, 1, 3, 'HR', 'Female'),
('Canada', 2020, 1, 1, 3, 'HR', 'Male'),
('Canada', 2020, 3, 4, 3, 'HR', 'Female'),
('Canada', 2020, 3, 4, 3, 'HR', 'Male'),
('Canada', 2020, 5, 4, 3, 'HR', 'Female'),
('Canada', 2020, 5, 4, 5, 'HR', 'Male'),
('Canada', 2020, 5, 4, 5, 'HR', 'Female'),
('Germany', 2022, 5, 5, 4, 'IT', 'Male'),
('France', 2020, 1, 1, 2, 'Finance', 'Female'),
('France', 2020, 1, 1, 2, 'Finance', 'Female'),
('France', 2020, 3, 2, 2, 'Finance', 'Male'),
('France', 2020, 3, 4, 2, 'Finance', 'Female'),
('France', 2020, 3, 5, 5, 'Finance', 'Male'),
('France', 2020, 3, 4, 4, 'Finance', 'Female'),
('France', 2020, 3, 4, 4, 'Finance', 'Male'),
('France', 2020, 3, 4, 3, 'Finance', 'Female'),
('UK', 2021, 4, 2, 3, 'Marketing', 'Male'),
('Australia', 2022, 3, 3, 4, 'Support', 'Female'),
('Italy', 2020, 5, 5, 5, 'Operations', 'Male'),
('Italy', 2020, 5, 5, 5, 'Operations', 'Female'),
('Italy', 2020, 5, 1, 1, 'Operations', 'Male'),
('Italy', 2020, 4, 4, 1, 'Operations', 'Female'),
('Italy', 2020, 2, 1, 2, 'Operations', 'Male'),
('Italy', 2020, 3, 5, 3, 'Operations', 'Female'),
('Spain', 2021, 1, 2, 3, 'Customer Service', 'Male'),
('Mexico', 2022, 4, 4, 4, 'Logistics', 'Female'),
('Brazil', 2020, 4, 1, 1, 'R&D', 'Male'),
('Brazil', 2020, 4, 1, 1, 'R&D', 'Female'),
('Brazil', 2020, 4, 3, 4, 'R&D', 'Male'),
('Brazil', 2020, 5, 3, 5, 'R&D', 'Female'),
('Brazil', 2020, 5, 3, 5, 'R&D', 'Male'),
('Brazil', 2020, 3, 3, 1, 'R&D', 'Female'),
('Brazil', 2020, 2, 3, 1, 'R&D', 'Male');
-- Select all rows from the new table to check the data
SELECT * FROM df;
With this table, I create some percentages and a count column based on some filtering.
-- Parameters
DECLARE @Year INT = 2020;
DECLARE @Metric VARCHAR(50) = 'count';
DECLARE @Gender VARCHAR(20) = NULL; -- Set to specific gender (e.g., 'Male', 'Female') or NULL to include all
DECLARE @Department VARCHAR(50) = NULL; -- Set to specific department (e.g., 'HR', 'Engineering') or NULL to include all
-- Set @Metric to 'dissatisfaction', 'satisfaction', or 'count'
WITH UnpivotedData AS
(
SELECT country, gender, department, year, Vals
FROM
(SELECT country, gender, department, year, val1, val2, val3
FROM df) AS SourceTable
UNPIVOT
(Vals FOR ValueColumn IN (val1, val2, val3)) AS Unpivoted
WHERE year = @Year
),
Proportions AS
(
SELECT
country,
gender,
department,
CASE
WHEN Vals = 1 THEN 'Very Dissatisfied'
WHEN Vals = 2 THEN 'Dissatisfied'
WHEN Vals = 3 THEN 'Neutral'
WHEN Vals = 4 THEN 'Satisfied'
WHEN Vals = 5 THEN 'Very Satisfied'
END AS SatisfactionLevel,
COUNT(*) * 1.0 / SUM(COUNT(*)) OVER (PARTITION BY country, gender, department) AS Proportion
FROM
UnpivotedData
GROUP BY
country, gender, department, Vals
),
Pivoted AS
(
SELECT country, gender, department,
[Very Dissatisfied],
[Dissatisfied],
[Neutral],
[Satisfied],
[Very Satisfied]
FROM Proportions
PIVOT
(MAX(Proportion)
FOR SatisfactionLevel IN ([Very Dissatisfied], [Dissatisfied], [Neutral], [Satisfied], [Very Satisfied])) AS p
),
CountryCounts AS
(
SELECT
CASE WHEN country IS NULL THEN 'Unknown' ELSE country END AS country,
gender,
department,
COUNT(*) AS Total
FROM df
WHERE year = @Year
-- Apply filters for gender and department if provided
AND (@Gender IS NULL OR gender = @Gender)
AND (@Department IS NULL OR department = @Department)
GROUP BY CASE WHEN country IS NULL THEN 'Unknown' ELSE country END, gender, department
),
OrderedData AS
(
SELECT
p.country,
p.gender,
p.department,
[Very Dissatisfied],
[Dissatisfied],
[Neutral],
[Satisfied],
[Very Satisfied],
c.Total,
CASE
WHEN @Metric = 'satisfaction' THEN ISNULL([Satisfied], 0) + ISNULL([Very Satisfied], 0)
WHEN @Metric = 'dissatisfaction' THEN ISNULL([Very Dissatisfied], 0) + ISNULL([Dissatisfied], 0)
WHEN @Metric = 'count' THEN c.Total
END AS SortValue
FROM Pivoted AS p
INNER JOIN CountryCounts AS c ON p.country = c.country AND p.gender = c.gender AND p.department = c.department
)
SELECT
country,
gender,
department,
[Very Dissatisfied],
[Dissatisfied],
[Neutral],
[Satisfied],
[Very Satisfied],
Total
FROM
OrderedData
ORDER BY
SortValue DESC;
I want to create a table function that will have 3 arguments:
Factor
can be the Gender or the Department or both of them. If for example the Factor
is the Gender the table to be grouped by the Gender and if is the Department to be grouped by Department.
If both to be grouped by both. If the Factor
is null or default to not be grouped at all.
Regarding the Year
: if the Year
is passed in to be grouped by year. If the Year
is null, show all the years without grouping.
Is there a way to do that in SQL Server?
I have a fiddle here
As I told you on your previous SQL question, you are hugely over-complicating this.
You can do the filtering, unpivot and pivot in a single level of CTE, and you only need one level to add the Total
, which itself wouldn't be necessary if there was an ID
column, because then you could do COUNT(DISTINCT ID)
.
To create a function, just add the normal CREATE FUNCTION
syntax. You can't add ORDER BY
to a table function, it's basically just a view. You need to add that on to the outer query.
CREATE OR ALTER FUNCTION dbo.MyAggregation (
@Year INT,
@Gender VARCHAR(20), -- Set to specific gender (e.g., 'Male', 'Female') or NULL to include all
@Department VARCHAR(50), -- Set to specific department (e.g., 'HR', 'Engineering') or NULL to include all
@Metric VARCHAR(50) -- Set @Metric to 'dissatisfaction', 'satisfaction', or 'count'
)
RETURNS TABLE
AS RETURN
WITH AllRows AS (
SELECT *,
COUNT(*) OVER (PARTITION BY country, gender, department, year) AS Total
FROM df
WHERE (year = @Year OR @Year IS NULL)
AND (Department = @Department OR @DepARTMENT IS NULL)
AND (Gender = @Gender OR @Gender IS NULL)
)
SELECT
country,
gender,
department,
year,
COUNT(CASE WHEN Vals = 1 THEN 1 END) * 1.0
/ SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year) AS [Very Dissatisfied],
COUNT(CASE WHEN Vals = 2 THEN 1 END) * 1.0
/ SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year) AS [Dissatisfied],
COUNT(CASE WHEN Vals = 3 THEN 1 END) * 1.0
/ SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year) AS [Neutral],
COUNT(CASE WHEN Vals = 4 THEN 1 END) * 1.0
/ SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year) AS [Satisfied],
COUNT(CASE WHEN Vals = 5 THEN 1 END) * 1.0
/ SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year) AS [Very Satisfied],
MIN(Total) AS Total,
CASE @Metric
WHEN 'satisfaction' THEN
COUNT(CASE WHEN Vals IN (4, 5) THEN 1 END) * 1.0
/ SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year)
WHEN 'dissatisfaction' THEN
COUNT(CASE WHEN Vals IN (1, 2) THEN 1 END) * 1.0
/ SUM(COUNT(*)) OVER (PARTITION BY country, gender, department, year)
WHEN 'count' THEN MIN(Total)
END AS SortValue
FROM AllRows
CROSS APPLY (VALUES
('val1', val1),
('val2', val2),
('val3', val3)
) v(ValueColumn, Vals)
GROUP BY
country,
gender,
department,
year;
Then you just do
SELECT *
FROM dbo.MyAggregation(2020, NULL, NULL, 'count')
ORDER BY
SortValue;
Note that the sort-value parameter should not be passed in from a variable or lateral join, as that will slow down your query a lot. If it's a constant string then the optimizer can factor it out.
Adding in dynamic grouping substantially complicates this, because now you need to null out the values before you group them up (as shown in the other answer). It will also be really slow on large tables, as you can't use indexes. I would strongly recommend you create separate functions with different grouping/partitioning constructs, alternatively do this in dynamic SQL.