sqloracle-databasegaps-and-islandsgrouporacle18c

Assign GROUP_ID to rows based on start/end tags


I have an Oracle 18c table called LOGS:

create table logs (id number, log_tags varchar2(4000));

insert into logs (id, log_tags) values ( 1,'<Event time="Sat Apr 15 1:13:17.750" type="Debug" thread="2fec: Main CIM worker thread" elapsed="0" ');
insert into logs (id, log_tags) values ( 2,'Database: C:\2023 Files\GDBs_4\New Mobile Geodatabase.geodatabase');
insert into logs (id, log_tags) values ( 3,'      SQL: SELECT main.ACTIVE_TRANSPORTATION.OBJECTID,main.active_transportation_nt_flag_vw.flag FRO');
insert into logs (id, log_tags) values ( 4,'</Event>');

insert into logs (id, log_tags) values ( 5,'<Event time="Sat Apr 15 1:13:17.749" type="Debug" thread="2fec: Main CIM worker thread" elapsed="1" ');
insert into logs (id, log_tags) values ( 6,null);
insert into logs (id, log_tags) values ( 7,'</Event>');

insert into logs (id, log_tags) values ( 8,'<Event time="Sat Apr 15 1:13:17.749" type="Debug" thread="2fec: Main CIM worker thread" elapsed="2" ');
insert into logs (id, log_tags) values ( 9,'Database: C:\2023 Files\GDBs_4\New Mobile Geodatabase.geodatabase');
insert into logs (id, log_tags) values (10,'      SQL: SELECT main.ACTIVE_TRANSPORTATION.OBJECTID,main.active_transportation_nt_flag_vw.flag FRO');
insert into logs (id, log_tags) values (11,'      Number of features returned: 100');
insert into logs (id, log_tags) values (12,'</Event>');

insert into logs (id, log_tags) values (13,'<Event time="Sat Apr 15 1:13:17.749" type="Debug" thread="2fec: Main CIM worker thread" elapsed="0" ');
insert into logs (id, log_tags) values (14,null);
insert into logs (id, log_tags) values (15,'</Event>');

insert into logs (id, log_tags) values (16,'<Event time="Sat Apr 15 1:13:17.747" type="Debug" thread="2fec: Main CIM worker thread" elapsed="0" ');
insert into logs (id, log_tags) values (17,'Database: C:\2023 Files\GDBs_4\New Mobile Geodatabase.geodatabase');
insert into logs (id, log_tags) values (18,'      SQL: SELECT main.ACTIVE_TRANSPORTATION.OBJECTID,main.active_transportation_nt_flag_vw.flag FRO');
insert into logs (id, log_tags) values (19,'</Event>');

insert into logs (id, log_tags) values (20,'<Event time="Sat Apr 15 1:13:17.747" type="Debug" thread="2fec: Main CIM worker thread" elapsed="2" ');
insert into logs (id, log_tags) values (21,'Database: C:\2023 Files\GDBs_4\New Mobile Geodatabase.geodatabase');
insert into logs (id, log_tags) values (22,'      SQL: SELECT main.ACTIVE_TRANSPORTATION.OBJECTID,main.active_transportation_nt_flag_vw.flag FRO');
insert into logs (id, log_tags) values (23,'      Number of features returned: 100');
insert into logs (id, log_tags) values (24,'</Event>');

insert into logs (id, log_tags) values (25,'<Event time="Sat Apr 15 1:13:17.746" type="Debug" thread="2fec: Main CIM worker thread" elapsed="1" ');
insert into logs (id, log_tags) values (26,null);
insert into logs (id, log_tags) values (27,'</Event>');

insert into logs (id, log_tags) values (28,'<Event time="Sat Apr 15 1:13:17.746" type="Debug" thread="2fec: Main CIM worker thread" elapsed="0" ');
insert into logs (id, log_tags) values (29,null);
insert into logs (id, log_tags) values (30,'</Event>');

insert into logs (id, log_tags) values (31,'<Event time="Sat Apr 15 1:13:17.744" type="Debug" thread="2fec: Main CIM worker thread" elapsed="0" ');
insert into logs (id, log_tags) values (32,'Database: C:\2023 Files\GDBs_4\New Mobile Geodatabase.geodatabase');
insert into logs (id, log_tags) values (33,'      SQL: SELECT main.ACTIVE_TRANSPORTATION.OBJECTID,main.active_transportation_nt_flag_vw.flag FRO');
insert into logs (id, log_tags) values (34,'</Event>');

insert into logs (id, log_tags) values (35,'<Event time="Sat Apr 15 1:13:17.743" type="Debug" thread="2fec: Main CIM worker thread" elapsed="1" ');
insert into logs (id, log_tags) values (36,null);
insert into logs (id, log_tags) values (37,'</Event>');

db<>fiddle

  1. The log groups always start with a <Event ... row and end with a </Event> row.
  2. There are a minimum of 3 rows per group and a maximum of 5 rows per group.
  3. The rows are in sequential order via the ID column.

For each group, I want to assign a GROUP_ID number:

enter image description here


Question:

In a SQL query, how can I assign GROUP_IDs to the rows based on the start/end tags?

Related:

Fill in rows per group for pivoting


Solution

  • You have a gaps and islands problem, one option is to use the window function sum() combined with case when clause to define the required groups :

    select 1 + sum(case when log_tags = '</Event>' then 1 else 0 end) over (order by id) 
      - case when log_tags = '</Event>' then 1 else 0 end as GROUP_ID,
      log_tags
    from logs
    

    Demo here