oracle-databasejoinmatch-recognize

Match_recognize and JOIN ORA-00918


This is a follow up to a question I had a few days ago so I apologize in advance for starting a new question if its improper etiquette. I am trying to use march_recognize to fund customers with purchases for 10+ consecutive days. The query below is working and gives me the result I wanted.

I tried using a JOIN to modify the query to show the customers first and last name in the output and I was unsuccessful and got the error below, which I can't seem to resolve. I was hoping someone could show me how to fix the problem. Thanks to all who respond. ORA-00918: column ambiguously defined


ALTER SESSION SET NLS_TIMESTAMP_FORMAT = 'DD-MON-YYYY  HH24:MI:SS.FF';
CREATE TABLE customers 
(CUSTOMER_ID, FIRST_NAME, LAST_NAME) AS
SELECT 1, 'John', 'Doe' FROM DUAL UNION ALL
SELECT 2, 'Ann', 'Smith' FROM DUAL UNION ALL
SELECT 3, 'Mike', 'Jones' FROM DUAL;


create table purchases(
  ORDER_ID NUMBER GENERATED BY DEFAULT AS IDENTITY (START WITH 1) NOT NULL,
  customer_id   number, 
  PRODUCT_ID NUMBER, 
  QUANTITY NUMBER, 
  purchase_date timestamp
);

insert  into purchases (customer_id, product_id, quantity, purchase_date)
    select  1 customer_id, 101 product_id, 1 quantity,
             DATE '2024-04-08' + INTERVAL '13' HOUR + ((LEVEL-1) * INTERVAL '1 00:00:01' DAY TO SECOND)  * -1
           as purchase_date
    from    dual
    connect by level <= 15 UNION all
    select  2, 102, 1,
             DATE '2024-03-08' + INTERVAL '14' HOUR + ((LEVEL-1) * INTERVAL '1 00:00:00' DAY TO SECOND) * -1
   from    dual
   connect by level <= 5 UNION ALL 
select  3, 103, 1,
             DATE '2024-02-08' + INTERVAL '15' HOUR + ((LEVEL-1) * INTERVAL '0 23:59:59' DAY TO SECOND) * -1
   from    dual
   connect by level <= 5;

/* current query works fine thanks to Alex Poole */

select customer_id,
            first_date,
            last_date,
          trunc(last_date) - trunc(first_date) + 1 as consecutive_days
from purchases 
match_recognize(
    partition by customer_id
    order by purchase_date
    measures
        first(purchase_date) as first_date,
        last(purchase_date) as last_date
    one row per match
    pattern(start_date P{9,})
    define P as purchase_date >= prev(trunc(purchase_date)) + interval '1' day
      and purchase_date < prev(trunc(purchase_date)) + interval '2' day
);

/* Desired modified output */

CUSTOMER_ID FIRST_NAME LAST_NAME START_DATE END_DATE CONTINUOUS_DAYS
1 John Doe 25-MAR-2024  12:59:46.000000
08-APR-2024  13:00:00.000000 15


/* one of many failed attempts */
select pur customer_id,
           c.first_name,
            c.last_name,
            first_date,
            last_date,
          trunc(last_date) - trunc(first_date) + 1 as consecutive_days
FROM purchases pur LEFT OUTER JOIN customers c ON c.custimer_id = pur.customer_id  
match_recognize(
    partition by pur.customer_id
    order by pur.purchase_date
    measures
        first(purchase_date) as first_date,
        last(purchase_date) as last_date
    one row per match
    pattern(start_date P{9,})
    define P as purchase_date >= prev(trunc(purchase_date)) + interval '1' day
      and purchase_date < prev(trunc(purchase_date)) + interval '2' day
);


Solution

  • Join to the MATCH_RECOGNIZE rather than to the purchases table:

    select m.customer_id,
           c.first_name,
           c.last_name,
           m.first_date,
           m.last_date,
           trunc(m.last_date) - trunc(m.first_date) + 1 as consecutive_days
    FROM   purchases pur  
           match_recognize(
             partition by customer_id
             order by purchase_date
             measures
               first(purchase_date) as first_date,
               last(purchase_date) as last_date
             one row per match
             pattern(start_date P{9,})
             define P as
                   purchase_date >= prev(trunc(purchase_date)) + interval '1' day
               and purchase_date < prev(trunc(purchase_date)) + interval '2' day
           ) m
           LEFT OUTER JOIN customers c ON c.customer_id = m.customer_id;
    

    Which, for the sample data, outputs:

    CUSTOMER_ID FIRST_NAME LAST_NAME FIRST_DATE LAST_DATE CONSECUTIVE_DAYS
    1 John Doe 2024-03-25 12:59:46.000000 2024-04-08 13:00:00.000000 15

    You can do it before but its more awkward as you need to join the two tables in a sub-query (and disambiguate the columns - which is where the error was coming from as both purchases and customers have a customer_id column) and then use MATCH_RECOGNIZE and make sure the additional columns are also carried through:

    select m.customer_id,
           m.first_name,
           m.last_name,
           m.first_date,
           m.last_date,
           trunc(m.last_date) - trunc(m.first_date) + 1 as consecutive_days
    FROM   (
             SELECT p.*, c.first_name, c.last_name
             FROM   purchases p
                    LEFT OUTER JOIN customers c
                    ON c.customer_id = p.customer_id
           ) cp
           MATCH_RECOGNIZE(
             PARTITION BY customer_id
             ORDER BY     purchase_date
             MEASURES
               MIN(first_name)      AS first_name,
               MIN(last_name)       AS last_name,
               FIRST(purchase_date) AS first_date,
               LAST(purchase_date)  AS last_date
             ONE ROW PER MATCH
             PATTERN ( consecutive{9,} final_date )
             DEFINE consecutive as
                   purchase_date >= NEXT(TRUNC(purchase_date)) - interval '1' day
               AND purchase_date <  NEXT(TRUNC(purchase_date))
           ) m;
    

    fiddle