postgresqlrecursionamazon-rdscommon-table-expressionaws-batch

How do I use a PostgreSQL recursive CTE when the iteration number would be part of the "where ? between ?" clause


I have put together a query, using a CTE, to track the status of a set of pg_cron batch jobs running on our RDS server. I would like to make it more dynamic by changing it to be recursive.

Any help would be greatly appreciated.

    with bji as (
        SELECT ROW_NUMBER() over (order by id) rnum, id, beg_id, end_id, status, pt, start_time, end_time, cmd
        from (
            SELECT coalesce(cjd.jobid, cjm.jobid) id,
                   replace(substring(lower(cjm.jobname), 6), '_from_xml', '') pt,
                   cjd.status,
                   cjd.start_time,
                   cjd.end_time,
                   substring(split_part(split_part(coalesce(cjd.command, cjm.command), '.', 2), '(',1), 30) cmd,
                   split_part(split_part(split_part(coalesce(cjd.command, cjm.command), '(', 2), '> ',2), ',', 1)::integer beg_id,
                   split_part(split_part(split_part(coalesce(cjd.command, cjm.command), '(', 2), '> ',3), ')', 1)::integer end_id,
                 row_number() over (partition by coalesce(cjd.jobid, cjm.jobid) order by cjd.runid desc) jnum
            FROM cron.job_run_details cjd
            full outer join cron.job cjm on cjd.jobid = cjm.jobid
            where cjd.jobid > 32 or cjm.jobid > 32 
        ) rji
        where rji.jnum=1
        order by id desc
        ),
    cct as (
        select 1 rnum, **<-- This 1**
               count(case when already_done = true then 1 end) done,
               count(case when already_done = false then 1 end) not_done
        from info._backfill_json_case_create_tracker
        where backfill_json_case_create_tracker_id between (SELECT BEG_ID FROM BJI where rnum = 1) and (SELECT END_ID FROM BJI where rnum = 1) **<-- These 1**
        union
        select 2 rnum, **<-- This 2**
               count(case when already_done = true then 1 end) done,
               count(case when already_done = false then 1 end) not_done
        from info._backfill_json_case_create_tracker
        where backfill_json_case_create_tracker_id between (SELECT BEG_ID FROM BJI where rnum = 2) and (SELECT END_ID FROM BJI where rnum = 2) **<-- These 2**
        union
        select 3 rnum, **<-- This 3**
               count(case when already_done = true then 1 end) done,
               count(case when already_done = false then 1 end) not_done
        from info._backfill_json_case_create_tracker
        where backfill_json_case_create_tracker_id between (SELECT BEG_ID FROM BJI where rnum = 3) and (SELECT END_ID FROM BJI where rnum = 3) **<-- These 3**
        union
        ...
        union
        select 22 rnum, **<-- This 22**
               count(case when already_done = true then 1 end) done,
               count(case when already_done = false then 1 end) not_done
        from info._backfill_json_case_create_tracker
        where backfill_json_case_create_tracker_id between (SELECT BEG_ID FROM BJI where rnum = 22) and (SELECT END_ID FROM BJI where rnum = 22) **<-- These 22**
        )
    select bji.id, bji.pt, bji.cmd, bji.beg_id, bji.start_time, bji.end_time, cct.done + cct.not_done total, to_char((cct.done::real/(cct.done + cct.not_done))*100,'990D99%') pct, cct.done, cct.not_done
    from cct
    inner join bji on cct.rnum = bji.rnum
    order by bji.end_time desc, bji.id
    ;

Data:

    insert into info._backfill_json_case_create_tracker <- has 1,978,000 rows
    (backfill_json_case_create_tracker_id', 'case_json_id', 'receipt_number', 'xml_trans_ts', 'xml_name_ts', 'already_done)
    values
    (100000, 3316249, 'TST3316249', '2020-08-25 00:00:00+00', '2020-10-02 16:36:12+00', false),
    (150000, 3411875, 'TST3411875', '2020-10-19 00:00:00+00', '2020-11-01 21:57:01+00', true),
    (200000, 3510717, 'TST3510717', '2020-11-05 00:00:00+00', '2020-11-20 16:23:20+00', false),
    (300000, 3678759, 'TST3678759', '2020-12-07 00:00:00+00', '2021-01-06 07:59:12+00', false),
    (400000, 4574881, 'TST4574881', '2020-11-02 00:00:00+00', '2021-02-10 04:45:55+00', true),
    (450000, 3845374, 'TST3845374', '2020-11-05 00:00:00+00', '2021-03-04 01:31:11+00', false),
    (500000, 3929583, 'TST3929583', '2021-03-08 00:00:00+00', '2021-04-02 03:29:29+00', true),
    (600000, 4123662, 'TST4123662', '2021-05-06 00:00:00+00', '2021-05-25 14:26:32+00', true),
    (700000, 4759912, 'TST4759912', '2021-08-09 00:00:00+00', '2021-08-13 21:48:47+00', true),
    (750000, 4364893, 'TST4364893', '2021-10-11 00:00:00+00', '2021-10-26 19:52:01+00', true),
    (800000, 4782096, 'TST4782096', '2021-12-21 00:00:00+00', '2022-01-10 21:45:28+00', true),
    (900000, 1190583, 'TST1190583', '2022-06-07 00:00:00+00', '2022-06-07 22:35:20+00', true),
    (1000000, 1268009, 'TST1268009', '2022-10-17 00:00:00+00', '2022-10-19 11:09:36+00', true),
    (1050000, 1308993, 'TST1308993', '2022-12-05 00:00:00+00', '2022-12-21 13:39:45+00', true),
    (1100000, 2089480, 'TST2089480', '2023-02-15 00:00:00+00', '2023-02-17 00:15:54+00', true),
    (1200000, 1465150, 'TST1465150', '2023-05-23 00:00:00+00', '2023-05-24 20:11:08+00', true),
    (1300000, 1560670, 'TST1560670', '2023-08-15 00:00:00+00', '2023-08-17 07:59:43+00', true),
    (1350000, 1609272, 'TST1609272', '2023-09-29 00:00:00+00', '2023-10-02 10:22:51+00', true),
    (1400000, 1656601, 'TST1656601', '2023-10-30 00:00:00+00', '2023-11-02 02:57:55+00', true),
    (1500000, 1755817, 'TST1755817', '2024-01-03 00:00:00+00', '2024-01-04 22:23:23+00', true),
    (1600000, 1847374, 'TST1847374', '2024-03-05 00:00:00+00', '2024-03-07 20:08:44+00', true),
    (1650000, 1891154, 'TST1891154', '2024-03-22 00:00:00+00', '2024-04-15 21:54:46+00', true),
    (1700000, 1931001, 'TST1931001', '2024-04-19 00:00:00+00', '2024-06-04 00:32:38+00', false),
    (1800000, 2030726, 'TST2030726', '2024-08-27 00:00:00+00', '2024-08-30 09:09:32+00', false),
    (1900000, 4818546, 'TST4818546', '2024-10-29 00:00:00+00', '2024-11-12 15:36:14+00', true),
    (1950000, 4866727, 'TST4866727', '2024-12-10 00:00:00+00', '2024-12-12 08:36:09+00', false)

    insert into cron.job
    (jobid, schedule, command, nodename, nodeport, database, username, active, jobname)
    values
    (35, '* * * * *', 'call info._backfill_call_put_i485_json_partial(i_bf_beg_num => 1318615, i_bf_end_num => 1483249)', 'localhost', 5432, 'tstcte', 'tstcte_dba', true, 'Load_pt4_from_XML'),
    (47, '* * * * *', 'call info._backfill_call_put_i485_json_part2(i_bf_beg_num => 329596, i_bf_end_num => 465729)', 'localhost', 5432, 'tstcte', 'tstcte_dba', true, 'Load_pt16_from_XML'),
    (48, '* * * * *', 'call info._backfill_call_put_i485_json_part2(i_bf_beg_num => 249249, i_bf_end_num => 329595)', 'localhost', 5432, 'tstcte', 'tstcte_dba', true, 'Load_pt17_from_XML'),
    (49, '* * * * *', 'call info._backfill_call_put_i485_json_part2(i_bf_beg_num => 136293, i_bf_end_num => 249248)', 'localhost', 5432, 'tstcte', 'tstcte_dba', true, 'Load_pt18_from_XML'),
    (50, '* * * * *', 'call info._backfill_call_put_i485_json_part2(i_bf_beg_num => 1, i_bf_end_num => 136292)', 'localhost', 5432, 'tstcte', 'tstcte_dba', true, 'Load_pt19_from_XML'),
    (51, '* * * * *', 'call info._backfill_call_put_i485_json_part2(i_bf_beg_num => 1898635, i_bf_end_num => 1973161)', 'localhost', 5432, 'tstcte', 'tstcte_dba', true, 'Load_pt20_from_XML'),
    (52, '* * * * *', 'call info._backfill_call_put_i485_json_part2(i_bf_beg_num => 1760313, i_bf_end_num => 1898634)', 'localhost', 5432, 'tstcte', 'tstcte_dba', true, 'Load_pt21_from_XML'),
    (53, '* * * * *', 'call info._backfill_call_put_i485_json_part2(i_bf_beg_num => 1653555, i_bf_end_num => 1760312)', 'localhost', 5432, 'tstcte', 'tstcte_dba', true, 'Load_pt22_from_XML')
    ;

    insert into cron.job_run_details
    (jobid, runid, job_pid, 'database', username, command, status, return_message, start_time, end_time)
    values
    (51, 19414, 19374, 'tstcte', 'tstcte_dba', 'call info._backfill_call_put_i485_json_part2(i_bf_beg_num => 1898635, i_bf_end_num => 1973161)', 'running', 'INFO: end:2025-02-10 19:28:44.827407-beg:2025-02-10 19:28:44.51443', '2025-02-10 14:46:00.110677+00', ''),
    (35, 19413, 19202, 'tstcte', 'tstcte_dba', 'call info._backfill_call_put_i485_json_partial(i_bf_beg_num => 1318615, i_bf_end_num => 1483249)', 'succeeded', 'CALL', '2025-02-10 14:45:00.775289+00', '2025-02-10 14:45:01.025047+00'),
    (35, 19412, 19016, 'tstcte', 'tstcte_dba', 'call info._backfill_call_put_i485_json_partial(i_bf_beg_num => 1318615, i_bf_end_num => 1483249)', 'succeeded', 'CALL', '2025-02-10 14:44:00.09484+00', '2025-02-10 14:44:00.271751+00'),
    (35, 19411, 18833, 'tstcte', 'tstcte_dba', 'call info._backfill_call_put_i485_json_partial(i_bf_beg_num => 1318615, i_bf_end_num => 1483249)', 'succeeded', 'CALL', '2025-02-10 14:43:00.410764+00', '2025-02-10 14:43:00.613852+00'),
    (50, 17984, 23626, 'tstcte', 'tstcte_dba', 'call info._backfill_call_put_i485_json_part2(i_bf_beg_num => 1, i_bf_end_num => 136292)', 'running', 'INFO: end:2025-02-10 19:28:42.784481-beg:2025-02-10 19:28:42.491703', '2025-02-09 14:57:01.247114+00', ''),
    (50, 17982, , 'tstcte', 'tstcte_dba', 'call info._backfill_call_put_i485_json_part2(i_bf_beg_num => 1, i_bf_end_num => 136292)', 'failed', 'could not start background process; more details may be available in the server log', '2025-02-09 14:56:04.076997+00', '2025-02-09 14:56:14.937666+00'),
    (48, 17979, 23231, 'tstcte', 'tstcte_dba', 'call info._backfill_call_put_i485_json_part2(i_bf_beg_num => 249249, i_bf_end_num => 329595)', 'running', 'INFO: end:2025-02-10 19:28:43.580903-beg:2025-02-10 19:28:43.267186', '2025-02-09 14:54:00.480279+00', ''),
    (49, 17978, 23230, 'tstcte', 'tstcte_dba', 'call info._backfill_call_put_i485_json_part2(i_bf_beg_num => 136293, i_bf_end_num => 249248)', 'running', 'INFO: end:2025-02-10 19:28:44.093299-beg:2025-02-10 19:28:43.822929', '2025-02-09 14:54:00.475505+00', ''),
    (47, 13645, 3577, 'tstcte', 'tstcte_dba', 'call info._backfill_call_put_i485_json_part2(i_bf_beg_num => 329596, i_bf_end_num => 465729)', 'running', 'INFO: end:2025-02-10 19:28:43.740104-beg:2025-02-10 19:28:43.439418', '2025-02-08 03:08:16.457146+00', '')
    ;

Output:

id pt cmd beg_id start_time end_time total pct done not_done
47 pt16 part2 329596 2025-02-08 03:08:16.457146+00 136134 72.87% 99203 36931
48 pt17 part2 249249 2025-02-09 14:54:00.480279+00 80347 49.30% 39615 40732
49 pt18 part2 136293 2025-02-09 14:54:00.475505+00 112956 32.98% 37258 75698
50 pt19 part2 1 2025-02-09 14:57:01.247114+00 136292 31.62% 43092 93200
51 pt20 part2 1898635 2025-02-10 14:46:00.110677+00 74527 2.20% 1640 72887
52 pt21 part2 1760313 138322 0.00% 0 138322
53 pt22 part2 1653555 106758 0.00% 1 106757
35 pt4 partial 1318615 2025-02-10 14:45:00.775289+00 2025-02-10 14:45:01.025047+00 164635 100.00% 164635 0

table definitions:

CREATE TABLE IF NOT EXISTS info._backfill_json_case_create_tracker
(
    backfill_json_case_create_tracker_id bigint NOT NULL GENERATED ALWAYS AS IDENTITY ( INCREMENT 1 START 1 MINVALUE 1 MAXVALUE 9223372036854775807 CACHE 1 ),
    case_json_id bigint,
    receipt_number character varying COLLATE pg_catalog."default",
    xml_trans_ts timestamp with time zone,
    xml_name_ts timestamp with time zone,
    already_done boolean DEFAULT false,
    CONSTRAINT backfill_json_case_create_tracker_pkey PRIMARY KEY (backfill_json_case_create_tracker_id)
)
    CREATE TABLE IF NOT EXISTS cron.job
    (
        jobid bigint NOT NULL DEFAULT nextval('cron.jobid_seq'::regclass),
        schedule text COLLATE pg_catalog."default" NOT NULL,
        command text COLLATE pg_catalog."default" NOT NULL,
        nodename text COLLATE pg_catalog."default" NOT NULL DEFAULT 'localhost'::text,
        nodeport integer NOT NULL DEFAULT inet_server_port(),
        database text COLLATE pg_catalog."default" NOT NULL DEFAULT current_database(),
        username text COLLATE pg_catalog."default" NOT NULL DEFAULT CURRENT_USER,
        active boolean NOT NULL DEFAULT true,
        jobname text COLLATE pg_catalog."default",
        CONSTRAINT job_pkey PRIMARY KEY (jobid),
        CONSTRAINT jobname_username_uniq UNIQUE (jobname, username)
    )

    CREATE TABLE IF NOT EXISTS cron.job_run_details
    (
        jobid bigint,
        runid bigint NOT NULL DEFAULT nextval('cron.runid_seq'::regclass),
        job_pid integer,
        database text COLLATE pg_catalog."default",
        username text COLLATE pg_catalog."default",
        command text COLLATE pg_catalog."default",
        status text COLLATE pg_catalog."default",
        return_message text COLLATE pg_catalog."default",
        start_time timestamp with time zone,
        end_time timestamp with time zone,
        CONSTRAINT job_run_details_pkey PRIMARY KEY (runid)
    )

Solution

  • You can get what you are looking for by using the generate_sequence() function and joining it with existing query in the ctt CTE. You do not need a recursive CTE. Further rather than using the between operation I convert the beg_id and end_id from separate columns into a range and operations on it. Additionally I modified determining done vs undone counting from case ... to count()* filter ... (imho simpler).

    with 
         bji as ( select row_number() over (order by id) rnum
                       , id
                       , int8range(beg_id,end_id, '[]') id_rng
                       , status
                       , pt
                       , start_time
                       , end_time
                       , cmd
                  from ( select coalesce(cjd.jobid, cjm.jobid) id
                              , replace(substring(lower(cjm.jobname), 6), '_from_xml', '') pt
                              , cjd.status
                              , cjd.start_time
                              , cjd.end_time
                              , substring(split_part(split_part(coalesce(cjd.command, cjm.command), '.', 2), '(',1), 30) cmd
                              , split_part(split_part(split_part(coalesce(cjd.command, cjm.command), '(', 2), '> ',2), ',', 1)::bigint beg_id
                              , split_part(split_part(split_part(coalesce(cjd.command, cjm.command), '(', 2), '> ',3), ')', 1)::bigint end_id
                              , row_number() over (partition by coalesce(cjd.jobid, cjm.jobid) order by cjd.runid desc) jnum
                           from job_run_details cjd
                           full outer join job  cjm 
                                        on cjd.jobid = cjm.jobid
                          where cjd.jobid > 32 or cjm.jobid > 32 
                        ) rji
                 where rji.jnum=1
                 order by id desc
            )   -- select * from bji;         
       , cct as (
            select rnum,  
                   count(*) filter (where already_done) done,
                   count(*) filter (where not already_done) not_done
            from _backfill_json_case_create_tracker
            join generate_series(1,22) gn(rnum) 
              on true
            where backfill_json_case_create_tracker_id  <@ (select id_rng from bji where rnum = gn.rnum ) 
            group by gn.rnum
            ) --select * from cct;
        select bji.id
             , bji.pt
             , bji.cmd
             , lower(bji.id_rng)
             , bji.start_time
             , bji.end_time
             , cct.done + cct.not_done total
             , to_char((cct.done::real/(cct.done + cct.not_done))*100,'990d99%') pct
             , cct.done
             , cct.not_done
        from cct
        inner join bji on cct.rnum = bji.rnum
        order by bji.end_time desc, bji.id;
    

    I could not reproduce the exact desired results due simply to the data volume. There are not enough rows to your counts. I did, however, reproduce the first four columns.