sqlsql-servert-sqlfor-xml-pathstuff

SQL STUFF FOR XML with specific grouping


I'm working in SQL Server 2014, and I have the following simple data, which tracks calling history of users:

PhoneNumber Activity    ActivityDate
------------------------------------
9075551234  Incoming    2022-04-01
9075551234  Outgoing    2022-04-06
9075551234  Outgoing    2022-04-10
9075551234  Outgoing    2022-08-02
9075551234  Incoming    2022-08-05
9075551234  Lateral     2022-08-10

5551239876  Incoming    2022-07-01
5551239876  Outgoing    2022-07-06
5551239876  Outgoing    2022-08-01
5551239876  Outgoing    2022-08-02
5551239876  Incoming    2022-08-15

I need to group the ACTIVITY values into one field, which effortlessly be done using the STUFF FOR XML function:

phonenumber FirstContact    LatestContact   Result
------------------------------------------------------------------
5551239876  2022-07-01      2022-08-15      Incoming,Outgoing,Outgoing,Outgoing,Incoming
9075551234  2022-04-01      2022-08-10      Incoming,Outgoing,Outgoing,Outgoing,Incoming,Lateral

However I need to conditionally group these, on the condition that there was more than a 90 day between the activity date. Desired result:

phonenumber FirstContact    LatestContact   Result
-------------------------------------------------------------------
9075551234  2022-04-01      2022-04-10      Incoming,Outgoing,Outgoing
9075551234  2022-08-02      2022-08-10      Outgoing,Incoming,Lateral
5551239876  2022-07-01      2022-08-15      Incoming,Outgoing,Outgoing,Outgoing,Incoming

Here's the code/sample values I'm using, thanks!

DECLARE @separator CHAR(1) = ',';

WITH testTable   (PhoneNumber,Activity,ActivityDate) as 
(
  SELECT 9075551234 , 'Incoming' , '2022-04-01'    UNION ALL
  SELECT 9075551234 , 'Outgoing' , '2022-04-06'    UNION ALL
  SELECT 9075551234 , 'Outgoing' , '2022-04-10'    UNION ALL
  SELECT 9075551234 , 'Outgoing' , '2022-08-02'    UNION ALL
  SELECT 9075551234 , 'Incoming' , '2022-08-05'    UNION ALL
  SELECT 9075551234 , 'Lateral' ,  '2022-08-10'     UNION ALL
  SELECT 5551239876 , 'Incoming' , '2022-07-01'    UNION ALL
  SELECT 5551239876 , 'Outgoing' , '2022-07-06'    UNION ALL
  SELECT 5551239876 , 'Outgoing' , '2022-08-01'    UNION ALL
  SELECT 5551239876 , 'Outgoing' , '2022-08-02'    UNION ALL
  SELECT 5551239876 , 'Incoming' , '2022-08-15'   
)    
 
SELECT p.phonenumber
,   min(activitydate) FirstContact
,max(activitydate) LatestContact 
, STUFF((SELECT @separator + Activity 
          FROM testTable AS c
          WHERE c.phonenumber = p.phonenumber
 
          FOR XML PATH('')), 1, LEN(@separator), '') AS Result
FROM testTable AS p
GROUP BY p.phonenumber  
ORDER BY p.phonenumber;

Solution

  • Below query will able to generate the desired result. Please not I have addeed one more row in your input data to validate results

    DECLARE @separator CHAR(1) = ',';
    DECLARE @allowedgap smallint = 90;
    
    
    WITH testTable   (PhoneNumber,Activity,ActivityDate) as 
    (
      SELECT 9075551234 , 'Incoming' , '2022-04-01'    UNION ALL
      SELECT 9075551234 , 'Outgoing' , '2022-04-06'    UNION ALL
      SELECT 9075551234 , 'Outgoing' , '2022-04-10'    UNION ALL
      SELECT 9075551234 , 'Outgoing' , '2022-08-02'    UNION ALL
      SELECT 9075551234 , 'Incoming' , '2022-08-05'    UNION ALL
      SELECT 9075551234 , 'Lateral' ,  '2022-08-10'     UNION ALL
      SELECT 9075551234 , 'Lateral' ,  '2022-12-10'     UNION ALL
      SELECT 5551239876 , 'Incoming' , '2022-07-01'    UNION ALL
      SELECT 5551239876 , 'Outgoing' , '2022-07-06'    UNION ALL
      SELECT 5551239876 , 'Outgoing' , '2022-08-01'    UNION ALL
      SELECT 5551239876 , 'Outgoing' , '2022-08-02'    UNION ALL
      SELECT 5551239876 , 'Incoming' , '2022-08-15'   
    )    
     
    , testTable2
    as
    (
    select t.PhoneNumber
        , t.Activity
        , t.ActivityDate 
        , LAST_VALUE(t.ActivityDate) OVER (PARTITION BY t.PhoneNumber ORDER BY t.ActivityDate rows BETWEEN current row and 1 Following ) AS NextActivityDate
        , First_VALUE(t.ActivityDate) OVER (PARTITION BY t.PhoneNumber ORDER BY t.ActivityDate rows BETWEEN 1 preceding and current row ) AS PreviousActivityDate
    from testTable t
    )
    , testTable3
    as
    (
    select t.PhoneNumber
        , t.Activity
        , t.ActivityDate 
        --, t.NextActivityDate
        --, t.PreviousActivityDate
        , case when DATEDIFF(day,t.PreviousActivityDate, t.ActivityDate) > @allowedgap  then 1 else 0 end newPeroidStart
    from testTable2 t
    )
    , testTable4
    as
    (select t.PhoneNumber
        , t.Activity
        , t.ActivityDate
    , sum(newPeroidStart) over(PARTITION BY t.PhoneNumber ORDER BY t.ActivityDate  ROWS UNBOUNDED PRECEDING) grp
    from testTable3 t
    )
    
    SELECT p.phonenumber
    ,   min(activitydate) FirstContact
    ,max(activitydate) LatestContact 
    , STUFF((SELECT @separator + Activity 
              FROM testTable4 AS c
              WHERE c.phonenumber = p.phonenumber and c.grp = p.grp
              order by c.ActivityDate   
              FOR XML PATH('')), 1, LEN(@separator), '') AS Result
    FROM testTable4 AS p
    GROUP BY p.phonenumber, p.grp
    ORDER BY p.phonenumber;
    

    Please refer excellent article from Itzik Ben-Gan is on very similar problem for more details https://sqlperformance.com/2018/09/t-sql-queries/special-islands

    Best!!