I am attempting to create a sequence number that is unique to the account and resets after an hour of no activity. Below I have code that generates sample data and desired output. I have tried doing lag, lead, row_number, combing date and time columns into datetime field to use in those functions but I have not gotten it to work.
I am working in SQL Server 2016
DROP TABLE IF EXISTS #TempFuelPurchase;
CREATE TABLE #TempFuelPurchase (
Account_Number INT,
FuelPurchase_Date DATE,
Fuel_TOD TIME
);
INSERT INTO #TempFuelPurchase (Account_Number, FuelPurchase_Date, Fuel_TOD)
VALUES
(19, '2024-04-03', '07:02:02 AM'),
(19, '2024-04-03', '07:02:41 AM'),
(19, '2024-04-03', '02:58:49 PM'),
(19, '2024-04-03', '07:58:49 PM'),
(19, '2024-04-05', '02:58:49 PM'),
(19, '2024-04-05', '02:59:31 PM'),
(19, '2024-04-17', '11:56:13 PM'),
(20, '2024-04-17', '11:59:13 PM'),
(19, '2024-04-18', '12:15:13 AM'),
(19, '2024-04-18', '02:56:13 PM'),
(20, '2024-04-18', '07:41:55 AM'),
(20, '2024-04-18', '07:41:55 PM'),
(20, '2024-04-18', '07:56:55 PM'),
(19, '2024-04-19', '07:41:55 AM'),
(19, '2024-04-19', '07:42:20 AM');
DROP TABLE IF EXISTS #DesiredOutput;
CREATE TABLE #DesiredOutput (
Account_Number INT,
FuelPurchase_Date DATE,
Fuel_TOD TIME,
Seq Int
);
INSERT INTO #DesiredOutput (Account_Number, FuelPurchase_Date, Fuel_TOD, Seq)
VALUES
(19, '2024-04-03', '07:02:02 AM',1),
(19, '2024-04-03', '07:02:41 AM',2),
(19, '2024-04-03', '02:58:49 PM',1),
(19, '2024-04-03', '07:58:49 PM',1),
(19, '2024-04-05', '02:58:49 PM',1),
(19, '2024-04-05', '02:59:31 PM',2),
(19, '2024-04-17', '11:56:13 PM',1),
(20, '2024-04-17', '11:59:13 PM',1),
(19, '2024-04-18', '12:15:13 AM',2),
(19, '2024-04-18', '02:56:13 PM',1),
(20, '2024-04-18', '07:41:55 AM',1),
(20, '2024-04-18', '07:41:55 PM',1),
(20, '2024-04-18', '07:56:55 PM',2),
(19, '2024-04-19', '07:41:55 AM',1),
(19, '2024-04-19', '07:42:20 AM',2);
I think this first method might generate a slightly cheaper plan because it uses one fewer sort operation. The second solution is probably more common though:
with dataX as (
select *,
-- convert to a combined datetime that can be compared easily
cast(FuelPurchase_Date as datetime) + cast(Fuel_TOD as datetime) as Fuel_DT
from #TempFuelPurchase
), dataY as (
select *,
row_number() over (partition by Account_Number order by Fuel_DT) as rn,
-- make sure first row gets a dummy value that will create a break
lag(Fuel_DT, 1, dateadd(day, -1, Fuel_DT)) over (partition by Account_Number order by Fuel_DT) as last_DT
from dataX
), dataZ as (
select *,
-- resolution is whole seconds, tag breaks with the row number
case when datediff(second, last_DT, Fuel_DT) > 3600 then rn end as brk
from dataY
)
select Account_Number, FuelPurchase_Date, Fuel_TOD,
-- adjust count by deducting row number of the previous break
rn + 1 - max(brk) over (partition by Account_Number order by Fuel_DT) as Seq
from dataZ;
or
with dataX as (
select *,
cast(FuelPurchase_Date as datetime) + cast(Fuel_TOD as datetime) as Fuel_DT
from #TempFuelPurchase
), dataY as (
select *,
lag(Fuel_DT, 1, dateadd(day, -1, Fuel_DT)) over (partition by Account_Number order by Fuel_DT) as last_DT
from dataX
), dataZ as (
select *,
-- rather than keeping row numbers, tag each row as part of a group
sum(
case when datediff(second, last_DT, Fuel_DT) > 3600 then 1 end
) over (partition by Account_Number order by Fuel_DT) as grp
from dataY
)
select Account_Number, FuelPurchase_Date, Fuel_TOD,
row_number() over (partition by Account_Number, grp order by Fuel_DT) as Seq
from dataZ;