sqlgoogle-bigquerywindow-functions

SQL date window reset after


I am looking at a series of session dates of a user, and I am trying to “collapse” the event counts in a way that any event occurring in a 7 day time window is only counted once, starting from the first event and then again after the initial period of 7 days is over, and a new event (and a 7 day window) starts again.

I am trying to figure out a logic, where the count “breaks” and resets the window. Thank you!

WITH data1 AS (
    SELECT 'bob' AS user, DATE('2024-04-08') AS session_date
    UNION ALL
    SELECT 'bob', DATE('2024-04-11')
    UNION ALL
    SELECT 'bob', DATE('2024-04-12')
    UNION ALL
    SELECT 'bob', DATE('2024-04-17')
    UNION ALL
    SELECT 'bob', DATE('2024-04-18')
    UNION ALL
    SELECT 'bob', DATE('2024-04-22')
    UNION ALL
    SELECT 'bob', DATE('2024-04-23')
    UNION ALL
    SELECT 'bob', DATE('2024-04-25')
    
)

SELECT
    *,
    DATE_ADD(session_date, INTERVAL 7 day) AS session_date_end,
    COUNT(session_date) OVER (PARTITION BY user
                ORDER BY UNIX_DATE(session_date)
                RANGE BETWEEN CURRENT ROW AND 6 FOLLOWING) 
            AS next_7_days
FROM data1

input

enter image description here

desired output

enter image description here

Additional edit: For example, if a user logs in on the 1st, 10th and 16th the windows should be 1st to 7th and 10th to 16th to accommodate varying user activity.


Solution

  • WITH data1 AS (
        SELECT 'bob' AS user, DATE('2024-04-08') AS session_date
        UNION ALL
        SELECT 'bob', DATE('2024-04-11')
        UNION ALL
        SELECT 'bob', DATE('2024-04-12')
        UNION ALL
        SELECT 'bob', DATE('2024-04-17')
        UNION ALL
        SELECT 'bob', DATE('2024-04-18')
        UNION ALL
        SELECT 'bob', DATE('2024-04-22')
        UNION ALL
        SELECT 'bob', DATE('2024-04-23')
        UNION ALL
        SELECT 'bob', DATE('2024-04-25')
    ),
    

    Lookup the preceding session date...

      look_behind AS
    (
      SELECT
        data.*,
        LAG(session_date) OVER (
          PARTITION BY user
              ORDER BY session_date
        )
          AS previous_session_date
      FROM
        data1
    ),
    

    Get the most recent session date which is more than 7 days after its preceding session's date.

    (CASE expression ensures the first date is considered to be 7 days after, even though there is no preceding session date.)

      partitioned AS
    (
      SELECT
        look_behind.*,
        LAST_VALUE(
          CASE WHEN session_date <= DATE_ADD(previous_session_date, INTERVAL 7 DAY) THEN NULL ELSE session_date END
          IGNORE NULLS
        ) OVER (
          PARTITION BY user
              ORDER BY session_date
        )
          AS base_session_date
      FROM
        look_behind
    )
    

    Use dense rank to calculate the session id.

    SELECT
      partitioned.*,
      DENSE_RANK() OVER (
        PARTITION BY user
            ORDER BY base_session_date,
                     DIV(DATE_DIFF(session_date, base_session_date, DAY), 7)
      )
        AS user_session_id
    FROM
      partitioned
    ORDER BY
      user, session_date