sqljoincommon-table-expression

SQL CTE Joins are producing wrong table output for Count statement


I've been trying to create a feature analysis for our app by counting specific attributes of users from given database tables and store it in an analytical view via SQL CTE's

As soon as I add more than two CTEs to my data model the final output changes from the original correct output.

For example when I run in the beginning:

WITH USER_INFO AS 
(
    SELECT
        ID AS ACCOUNT_ID,
        STATUS,
        CREATED_AT AS REGISTRATION_DATE
    FROM
        LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.ACCOUNT
    WHERE
        STATUS IS NULL
),
WEIGHT_LOG AS 
(
    SELECT
        wl.ID AS WEIGHT_LOG_ID,
        wl.ACCOUNT_ID,
        wl.WEIGHT,
        wl.CREATED_AT
    FROM
        LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.WEIGHT_LOG wl
    LEFT JOIN
        USER_INFO ui ON wl.ACCOUNT_ID = ui.ACCOUNT_ID
    WHERE
        wl.CREATED_AT > ui.REGISTRATION_DATE
)
SELECT 
    ui.ACCOUNT_ID,
    COUNT(wl.WEIGHT_LOG_ID) AS WEIGHT_LOG_COUNT,
FROM
    USER_INFO ui
LEFT JOIN
    WEIGHT_LOG wl ON ui.ACCOUNT_ID = wl.ACCOUNT_ID
GROUP BY
    ui.ACCOUNT_ID
ORDER BY
    ui.ACCOUNT_ID;

I get the correct count of rows for each user in the weight_log table in our data base minus the entry that is being generated during the onboarding process (registration date).

Pre_Join Table View

Account_ID 879 = 4 
Account_ID 881 = 1 

Now when I try to add another column to my final select statement Reflection_Count which represents the total count of distinct reflections a user has completed my data for my weight log users change and is wrong.

WITH USER_INFO AS 
(
    SELECT
        ID AS ACCOUNT_ID,
        STATUS,
        CREATED_AT AS REGISTRATION_DATE
    FROM
        LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.ACCOUNT
    WHERE
        STATUS IS NULL
),
WEIGHT_LOG AS 
(
    SELECT
        wl.ID AS WEIGHT_LOG_ID,
        wl.ACCOUNT_ID,
        wl.WEIGHT,
        wl.CREATED_AT
    FROM
        LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.WEIGHT_LOG wl
    LEFT JOIN
        USER_INFO ui ON wl.ACCOUNT_ID = ui.ACCOUNT_ID
    WHERE
        wl.CREATED_AT > ui.REGISTRATION_DATE
),
REFLECTIONS AS 
(
    WITH REFLECTION_PROGRESS_LOG AS 
    (
        SELECT
            ACCOUNT_ID,
            ID AS REFLECTION_PROGRESS_ID,
            EXTERNAL_REFLECTION_ID,
            CREATED_AT
        FROM
            LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.REFLECTION_PROGRESS
        ORDER BY
            ACCOUNT_ID
    ),
    REFLECTION_DETAILS AS 
    (
        SELECT 
            ID AS REFLECTION_ID,
            EXTERNAL_ID,
            TITLE AS REFLECTION_TITLE,
            LANGUAGE
        FROM 
            LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.REFLECTION
        WHERE
            LANGUAGE = 'en'
    )
    SELECT
        rpl.ACCOUNT_ID,
        rpl.REFLECTION_PROGRESS_ID,
        rpl.EXTERNAL_REFLECTION_ID,
        r.REFLECTION_TITLE,
        rpl.CREATED_AT
    FROM
        REFLECTION_PROGRESS_LOG rpl
    LEFT JOIN
        REFLECTION_DETAILS r ON rpl.EXTERNAL_REFLECTION_ID = r.EXTERNAL_ID
    ORDER BY 
        ACCOUNT_ID,
        REFLECTION_PROGRESS_ID
)
SELECT 
    ui.ACCOUNT_ID,
    COUNT(wl.WEIGHT_LOG_ID) AS WEIGHT_LOG_COUNT,
    COUNT(DISTINCT r.EXTERNAL_REFLECTION_ID) AS REFLECTION_COUNT
FROM
    USER_INFO ui
LEFT JOIN
    WEIGHT_LOG wl ON ui.ACCOUNT_ID = wl.ACCOUNT_ID
LEFT JOIN
    REFLECTIONS r ON ui.ACCOUNT_ID = r.ACCOUNT_ID
GROUP BY
    ui.ACCOUNT_ID
ORDER BY
    ui.ACCOUNT_ID;

Post_Join Table View

Account_ID 879 = 12 
Account_ID 881 = 1 

I can't seem to locate the issue and would appreciate any insights.

Thanks


Solution

  • The problem is that you are grouping after the join, which means you get a cartesian product between each of the one-to-many tables. You can only group after a join if you are joining a single one-to-many table.

    Instead group before joining.

    Note also:

    WITH REFLECTIONS AS 
    (
        SELECT
            rpl.ACCOUNT_ID,
            COUNT(*) AS REFLECTION_COUNT
        FROM
            LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.REFLECTION_PROGRESS rpl
        GROUP BY
            rpl.ACCOUNT_ID
    )
    SELECT 
        ui.ACCOUNT_ID,
        wl.WEIGHT_LOG_COUNT,
        rpl.REFLECTION_COUNT
    FROM
        LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.ACCOUNT ui
    LEFT JOIN LATERAL  
    (
        SELECT
            wl.ACCOUNT_ID,
            COUNT(*) AS WEIGHT_LOG_COUNT
        FROM
            LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.WEIGHT_LOG wl
        WHERE
            ui.ACCOUNT_ID = wl.ACCOUNT_ID
            AND wl.CREATED_AT > ui.REGISTRATION_DATE
        GROUP BY
            wl.ACCOUNT_ID
    ) wl ON 1=1
    LEFT JOIN
        REFLECTIONS r ON ui.ACCOUNT_ID = r.ACCOUNT_ID
    WHERE
        ui.STATUS IS NULL
    ORDER BY
        ui.ACCOUNT_ID;