sqlpostgresqlcountaggregate-filter

SQL (standard) join count by several columns


I have these tables:

CREATE TABLE poster (
    poster_id SERIAL NOT NULL  PRIMARY KEY,
    country VARCHAR ( 50 ) NOT NULL
);

CREATE TABLE batch (
    batch_id SERIAL NOT NULL PRIMARY KEY,
    price VARCHAR ( 50 ) NOT NULL
);

ALTER TABLE poster 
    ADD COLUMN batch_id SERIAL REFERENCES batch (batch_id);

INSERT INTO batch (price) VALUES (10)
INSERT INTO batch (price) VALUES (11)

INSERT INTO poster (country, batch_id) VALUES ('DEU', 1);
INSERT INTO poster (country, batch_id) VALUES ('DEU', 1);
INSERT INTO poster (country, batch_id) VALUES ('FRA', 1);

I want get how many posters I have for a particular country, for the rest of the world and the total, grouped by batch.

This code solves the problem, but I want to know if there's a more performant query.

SELECT 
    batch.batch_id, 
    germany.count AS germany, 
    other.count AS outside, 
    total.count AS total 
FROM
    batch 
LEFT JOIN
    (SELECT 
         poster.batch_id AS batch_id, 
         COUNT(*) AS count 
     FROM
         poster 
     WHERE
         poster.country = 'DEU' 
     GROUP BY
         poster.batch_id) AS germany ON germany.batch_id = batch.batch_id
LEFT JOIN
    (SELECT
         poster.batch_id AS batch_id, 
         COUNT(*) AS count 
     FROM
         poster 
     WHERE
         poster.country <> 'DEU' 
     GROUP BY
         poster.batch_id) AS other ON other.batch_id = batch.batch_id
LEFT JOIN
    (SELECT
         poster.batch_id AS batch_id, 
         COUNT(*) AS count 
     FROM
         poster 
     GROUP BY
         poster.batch_id) AS total ON total.batch_id = batch.batch_id

Also when the batch doesn't have any poster I want to return 0 instead of null.


Solution

  • In modern (>=9.4) PostgreSQL (that you tagged the question with), you can use a filter clause on an aggregate function and use that to apply conditions on the rows effected by the function. With this technique, you can reduce the query to a single left join:

    SELECT    b.batch_id, germany, outside, total
    FROM      batch b
    LEFT JOIN (SELECT   batch_id,
                        COUNT(*) FILTER (WHERE country = 'DEU') AS germany,
                        COUNT(*) FILTER (WHERE country <> 'DEU') AS outside,
                        COUNT(*) AS total
               FROM     poster
               GROUP BY batch_id) p ON b.batch_id = p.batch_id
    

    SQLFiddle demo

    If you don't want to use PostgreSQL-specific syntax, you could use case expressions instead:

    SELECT    b.batch_id, germany, outside, total
    FROM      batch b
    LEFT JOIN (SELECT   batch_id,
                        COUNT(CASE country WHEN 'DEU' THEN 1 END) AS germany,
                        COUNT(CASE country WHEN 'DEU' THEN NULL ELSE 1 END) AS outside,
                        COUNT(*) AS total
               FROM     poster
               GROUP BY batch_id) p ON b.batch_id = p.batch_id
    

    SQLFiddle demo