sqlpostgresqlcountcommon-table-expression

Select counts from one table based in the results from another table


I have two tables, lets call them t1 and t2. Fiddle

select setseed(.42);

create table t1(a,b,c,d)as
select (random()*9)::int
     , (random()*9)::int
     , (random()*9)::int
     , (random()*9)::int
from generate_series(1,100);

create table t2(a,b,c,d)as
select (random()*9)::int
     , (random()*9)::int
     , (random()*9)::int
     , (random()*9)::int
from generate_series(1,200);

I'm trying to filter t2 and count based on my results from a t1 selected query. This would be an inefficient working example (takes half an hour to finish):

SELECT *,
  SELECT 
   (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A) AS cnt1,
  SELECT 
   (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A AND t2.B = T1.B) AS cnt2,
  SELECT 
   (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A AND t2.B = T1.B AND t2.C = T1.C) AS cnt3,
  SELECT 
   (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A AND t2.B = T1.B AND t2.C = T1.C AND t2.D = T1.D) AS cnt4,
   ... and so on ...

FROM t1 WHERE A>B AND 1.5>C AND D-E>A

And this is what I'm trying to do using CTE:

SELECT *,
  (WITH CTE1 AS (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A),
   CTE2 AS (SELECT COUNT (*) FROM CTE1 WHERE t2.B = T1.B),
   CTE3 AS (SELECT COUNT (*) FROM CTE2 WHERE t2.C = T1.C),
   CTE4 AS (SELECT COUNT (*) FROM CTE3 WHERE t2.D = T1.D)
     SELECT 
       (SELECT COUNT (*) FROM CTE) AS cnt1,
     SELECT 
       (SELECT COUNT (*) FROM CTE2) AS cnt2,
     SELECT 
       (SELECT COUNT (*) FROM CTE3) AS cnt3,
     SELECT 
       (SELECT COUNT (*) FROM CTE3) AS cnt4
   )

FROM t1 WHERE A>B AND 1.5>C AND D-E>A

This only allows to return a single result, ex. cnt1. My guess is that maybe I can pass the result as an array like the example above, but I'm not sure how to achieve this.

SELECT *,
  (WITH CTE1 AS (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A),
   CTE2 AS (SELECT COUNT (*) FROM CTE1 WHERE t2.B = T1.B),
   CTE3 AS (SELECT COUNT (*) FROM CTE2 WHERE t2.C = T1.C),
   CTE4 AS (SELECT COUNT (*) FROM CTE3 WHERE t2.D = T1.D)
     ARRAY (SELECT 
       (SELECT COUNT (*) FROM CTE) ,
     SELECT 
       (SELECT COUNT (*) FROM CTE2),
     SELECT 
       (SELECT COUNT (*) FROM CTE3),
     SELECT 
       (SELECT COUNT (*) FROM CTE3))
   ) AS CNT[]

FROM t1 WHERE A>B AND 1.5>C AND D-E>A

EDIT: This would be a simplified example of my tables and expected result.

fiddle

create table t1(id,a,b,c) as 
 values(1,2,1,1),
       (2,3,2,2),
       (3,1,3,3),
       (4,2,4,4),
       (5,3,5,5),
       (6,1,2,6),
       (7,2,2,7),
       (8,3,2,8),
       (9,4,2,9),
       (10,5,4,1);

create table t2(id,a,b,c) as 
 values(1,2,1,1),
       (2,3,2,2),
       (3,1,3,1),
       (4,2,4,4),
       (5,3,5,5),
       (6,1,2,6),
       (7,2,2,7),
       (8,3,2,8),
       (9,4,3,9),
       (10,5,4,1),
       (11,3,2,2),
       (12,4,4,4),
       (13,5,6,5),
       (14,3,2,3),
       (15,2,2,7),
       (16,1,2,8),
       (17,4,1,9),
       (18,5,2,5),
       (19,6,3,6),
       (20,7,4,7);

so updating my faulty query for this example:

SELECT *,   
 (WITH CTE1 AS (SELECT COUNT (*) FROM t2 WHERE t2.A = T1.A),    
  CTE2 AS (SELECT COUNT (*) FROM CTE1 WHERE t2.B = T1.B),   
  CTE3 AS (SELECT COUNT (*) FROM CTE2 WHERE t2.C = T1.C)
     SELECT 
       (SELECT COUNT (*) FROM CTE) AS cnt1,
     SELECT 
       (SELECT COUNT (*) FROM CTE2) AS cnt2,
     SELECT 
       (SELECT COUNT (*) FROM CTE3) AS cnt3    
   ) 
 FROM t1 WHERE A>B AND C>1.5 

Solution

  • Refering to your last sample data with expected result (the result seems to be wrong) maybe you could use something like below:

    --      S a m p l e    D a t a : 
    create table t1(id,a,b,c) as 
      values(1,2,1,1), (2,3,2,2), (3,1,3,3), (4,2,4,4), (5,3,5,5),
            (6,1,2,6), (7,2,2,7), (8,3,2,8), (9,4,3,9), -- it is (9,4,2,9)  in your fiddle but (9,4,3,9) in your result
            (10,5,4,1);
    
    create table t2(id,a,b,c) as 
      values(1,2,1,1), (2,3,2,2), (3,1,3,1), (4,2,4,4), (5,3,5,5),
            (6,1,2,6), (7,2,2,7), (8,3,2,8), (9,4,3,9), (10,5,4,1),
            (11,3,2,2), (12,4,4,4), (13,5,6,5), (14,3,2,3), (15,2,2,7),
            (16,1,2,8), (17,4,1,9), (18,5,2,5), (19,6,3,6), (20,7,4,7);
    

    1st - filter t1 to just rows of interest (id 2, 8 and 9)

    WITH
      t1_filtered AS
        ( SELECT   t1.*
          FROM     t1 
          WHERE    a > b And c > 1.5
        )
    

    ... then cross join t2 with filtered data and use conditional aggregation to fetch the counts ...

    --      S Q L : 
    Select     t.id, t.a, t.b, t.c,
               Count(Distinct Case When t2.a = t.a Then t2.id End) as cnt1,
               Count(Distinct Case When t2.a = t.a And t2.b = t.b Then t2.id End) as cnt2, 
               Count(Distinct Case When t2.a = t.a And t2.b = t.b And t2.c = t.c Then t2.id End) as cnt3
    From       t1_filtered t
    Cross Join t2 
    Group By   t.id, t.a, t.b, t.c
    Order By   t.id
    

    R e s u l t :

    id a b c cnt1 cnt2 cnt3
    2 3 2 2 5 4 2
    8 3 2 8 5 4 1
    9 4 3 9 3 1 1

    fiddle

    ... with randomly generated data it would be like in the fiddle