sqlsql-serverwindow-functionssql-server-2016cumulative-sum

Count Distinct Window Function Workaround


Let me start with explaining the data and the problem. We have a data table that is transactional detail and a table called TimeSortOrder30 which list each of the 30 min intervals in a day with columns for Hour, Min, CustomDateSort. The CutsomDateSort is so we can have the first value be when the store opens at 6am and move forward from there wrapping around to the next day. That is the point behind the Dateadd - 6 hours bit.

It is very common for our customers to have multiple transactions in a day and we are trying to show the unique number of customer so far that day at each 30 min interval. The number of transactions in a day could easily be in the hundreds of millions so the dataset is quite large. Which is why I tried temp tables first. I have tried this with both CTE and with Temp tables and the code just runs for hours and never finishes.

I am having trouble figuring out the right way to get this code to work. If I could use distinct in a window function it would be real easy. But since we can not I have attempted this.

DROP TABLE IF EXISTS #TripPrep;

SELECT
    DATEADD(MINUTE, 30 * (DATEDIFF(MINUTE, 0, StartTime) / 30), 0)               AS IntervalStartTime
   ,CAST(Dateadd(HH,-6,StartTime) AS DATE)                                                      AS Date
   ,DATEPART(HH, DATEADD(MINUTE, 30 * (DATEDIFF(MINUTE, 0, StartTime) / 30), 0)) AS I_Hour
   ,DATEPART(MI, DATEADD(MINUTE, 30 * (DATEDIFF(MINUTE, 0, StartTime) / 30), 0)) AS I_Min
   ,Cust_ID
INTO
    #TripPrep
FROM DataTable
WHERE
    StoreID       = 1
    AND StartTime >= '2024-07-09 06:00:00 -05:00';
DROP TABLE IF EXISTS #TripPrepWithSortOrder;

SELECT
    TP.IntervalStartTime
   ,TP.Date
   ,TP.I_Hour
   ,TP.I_Min
   ,TP.Cust_ID
   ,TSO.CustomDateSort
INTO
    #TripPrepWithSortOrder
FROM #TripPrep               AS TP
    JOIN dbo.TimeSortOrder30 AS TSO
        ON TP.I_Hour    = TSO.Hour
           AND TP.I_Min = TSO.Min;
DROP TABLE IF EXISTS #DistinctTrips;

SELECT
    TP1.IntervalStartTime
   ,TP1.Date
   ,TP1.I_Hour
   ,TP1.I_Min
   ,TP1.CustomDateSort
   ,COUNT(DISTINCT TP2.Cust_ID) AS Trips
INTO
    #DistinctTrips
FROM #TripPrepWithSortOrder     AS TP1
    JOIN #TripPrepWithSortOrder AS TP2
        ON TP1.Date               = TP2.Date
           AND TP1.CustomDateSort >= TP2.CustomDateSort
GROUP BY
    TP1.IntervalStartTime
   ,TP1.Date
   ,TP1.I_Hour
   ,TP1.I_Min
   ,TP1.CustomDateSort;

SELECT
    DistinctTrips.IntervalStartTime
   ,DistinctTrips.Date
   ,DistinctTrips.I_Hour
   ,DistinctTrips.I_Min
   ,DistinctTrips.CustomDateSort
   ,DistinctTrips.Trips
FROM #DistinctTrips
ORDER BY
    DistinctTrips.IntervalStartTime;


WITH TripPrep
AS
 (
 SELECT
     DATEADD(MINUTE, 30 * (DATEDIFF(MINUTE, 0, StartTime) / 30), 0)               AS IntervalStartTime
    ,CAST(Dateadd(HH,-6,StartTime) AS DATE)                                                      AS "Date"
    ,DATEPART(HH, DATEADD(MINUTE, 30 * (DATEDIFF(MINUTE, 0, StartTime) / 30), 0)) AS I_Hour
    ,DATEPART(MI, DATEADD(MINUTE, 30 * (DATEDIFF(MINUTE, 0, StartTime) / 30), 0)) AS I_Min
    ,Cust_ID
 FROM DataTable
 WHERE
     StoreID     = 1
     AND StartTime >= '2024-07-09 06:00:00 -05:00'
 )
    ,TripPrepWithSortOrder
AS
 (
 SELECT
     TP.IntervalStartTime
    ,TP.[Date]
    ,TP.I_Hour
    ,TP.I_Min
    ,TP.Cust_ID
    ,TSO.CustomDateSort

 FROM TripPrep                AS TP
     JOIN dbo.TimeSortOrder30 AS TSO
         ON TP.I_Hour    = TSO.Hour
            AND TP.I_Min = TSO.Min
 )
    ,DistinctTrips
AS
 (
 SELECT
     TP1.IntervalStartTime
    ,TP1.[Date]
    ,TP1.I_Hour
    ,TP1.I_Min
    ,TP1.CustomDateSort
    ,COUNT(DISTINCT TP2.Cust_ID) AS Trips
 FROM TripPrepWithSortOrder     AS TP1
     JOIN TripPrepWithSortOrder AS TP2
         ON TP1.[Date]         = TP2.[Date]
            AND TP1.CustomDateSort >= TP2.CustomDateSort
 GROUP BY
     TP1.IntervalStartTime
    ,TP1.[Date]
    ,TP1.I_Hour
    ,TP1.I_Min
    ,TP1.CustomDateSort
 )
SELECT
    DistinctTrips.IntervalStartTime
   ,DistinctTrips.[Date]
   ,DistinctTrips.I_Hour
   ,DistinctTrips.I_Min
   ,DistinctTrips.CustomDateSort
   ,DistinctTrips.Trips
FROM DistinctTrips
ORDER BY
    DistinctTrips.IntervalStartTime;

Solution

  • I found a way to make it works and it runs quite fast for me. Under 30 seconds.

    DECLARE @StartDate DATETIME = DATEADD(HH, 7, CAST(CAST(GETDATE() AS DATE) AS DATETIME));
    DECLARE @EndDate DATETIME = DATEADD(DD, 1, @StartDate);
    
    
    Drop Table If Exists #Testing
    
    SELECT
        DATEADD(MINUTE, 30 * (DATEDIFF(MINUTE, 0, StartTime) / 30), 0)               AS IntervalStartTime
       ,CAST(DATEADD(HH,-6,StartTime) AS DATE)                                       AS Date
       ,DATEPART(HH, DATEADD(MINUTE, 30 * (DATEDIFF(MINUTE, 0, StartTime) / 30), 0)) AS I_Hour
       ,DATEPART(MI, DATEADD(MINUTE, 30 * (DATEDIFF(MINUTE, 0, StartTime) / 30), 0)) AS I_MIn
       ,Cust_ID
      
    INTO #Testing
    
    FROM ODS.dbo.VW_CDS_StatDetail AS CSd
    
    WHERE
        Store_ID     = 1
        AND cast(StartTime as datetime) >= @StartDate
        AND cast(StartTime as datetime) < @EndDate;
    
    Drop Table If Exists #TripPrep2
    
    SELECT
        TP.IntervalStartTime
       ,TP.Date
       ,TP.I_Hour
       ,TP.I_MIn
       ,TP.Cust_ID
       ,TSO.CustomDateSort
    
    INTO #TripPrep2
    
    FROM #Testing               AS TP
        JOIN dbo.TimeSortOrder30 AS TSO
            ON TP.I_Hour    = TSO.Hour
               AND TP.I_MIn = TSO.Min;
    
    
    
    SELECT
         CASE
            WHEN TSO.CustomDateSort >36 THEN DATEADD(DD,1,DATEADD(HH,TSO.Hour,DATEADD(MI,TSO.Min,CAST(TP.Date AS DATETIME))))
            ELSE DATEADD(HH,TSO.Hour,DATEADD(MI,TSO.Min,CAST(TP.Date AS DATETIME)))
         END AS IntervalStartTime
        ,TSO.CustomDateSort
        ,COUNT(DISTINCT TP.Cust_ID)
    
    FROM #TripPrep2 AS TP
        CROSS APPLY dbo.TimeSortOrder30 AS TSO
    
    WHERE Tp.CustomDateSort <= TSO.CustomDateSort
    
    GROUP BY CASE
            WHEN TSO.CustomDateSort > 36 THEN DATEADD(DD, 1, DATEADD(HH, TSO.Hour, DATEADD(MI, TSO.Min, CAST(TP.Date AS DATETIME))))
            ELSE DATEADD(HH, TSO.Hour, DATEADD(MI, TSO.Min, CAST(TP.Date AS DATETIME)))
            END
            ,TSO.CustomDateSort
    
    ORDER BY TSO.CustomDateSort