sqloracle-databaseplsql

How to Split a String by Delimiters and Handle Empty Values as NULL in Oracle PL/SQL?


I have a string in Oracle PL/SQL that looks like this:

280,1,2,3,3 | 120,,0,2,3 | 280,1,2,3,3

Each section is separated by a | character, and within each section, values are separated by commas. The challenge I’m facing is correctly extracting the values, and for missing or empty values

For example, the data I expect is:

Here’s what I have tried:

DECLARE
    v_entry VARCHAR2(500); 
    v_countries VARCHAR2(500) := '280,1,2,3,3 | 120,,0,2,3 | 280,1,2,3,3';
    v_country_id VARCHAR2(50);
    v_ein VARCHAR2(50);
    v_ssn VARCHAR2(50);
    v_itin VARCHAR2(50);
    v_atin VARCHAR2(50);
BEGIN
    FOR i IN 1 .. REGEXP_COUNT(v_countries, '\|') + 1 LOOP
        v_entry := REGEXP_SUBSTR(v_countries, '[^|]+', 1, i);
    
        v_country_id := REGEXP_SUBSTR(v_entry, '([^,]*)', 1, 1);
        v_ein := REGEXP_SUBSTR(v_entry, '([^,]*)', 1, 2);
        v_ssn := REGEXP_SUBSTR(v_entry, '([^,]*)', 1, 3);
        v_itin := REGEXP_SUBSTR(v_entry, '([^,]*)', 1, 4);
        v_atin := REGEXP_SUBSTR(v_entry, '([^,]*)', 1, 5);

        v_country_id := CASE WHEN v_country_id IS NULL OR v_country_id = '' THEN 'NULL' ELSE v_country_id END;
        v_ein := CASE WHEN v_ein IS NULL OR v_ein = '' THEN 'NULL' ELSE v_ein END;
        v_ssn := CASE WHEN v_ssn IS NULL OR v_ssn = '' THEN 'NULL' ELSE v_ssn END;
        v_itin := CASE WHEN v_itin IS NULL OR v_itin = '' THEN 'NULL' ELSE v_itin END;
        v_atin := CASE WHEN v_atin IS NULL OR v_atin = '' THEN 'NULL' ELSE v_atin END;

        DBMS_OUTPUT.PUT_LINE('Country ID: ' || v_country_id);
        DBMS_OUTPUT.PUT_LINE('EIN: ' || v_ein);
        DBMS_OUTPUT.PUT_LINE('SSN: ' || v_ssn);
        DBMS_OUTPUT.PUT_LINE('ITIN: ' || v_itin);
        DBMS_OUTPUT.PUT_LINE('ATIN: ' || v_atin);
    END LOOP;
END;

The issue: I'm expecting to handle empty fields between commas as NULL, but the output doesn’t reflect this correctly.

For example, for the second section 120,,0,2,3, I expect:

But instead, I get unexpected behavior where either values aren’t correctly assigned or the NULL values don’t show up properly.


Solution

  • You can use simple string expressions:

    DECLARE
      v_entry      VARCHAR2(500); 
      v_countries  VARCHAR2(500) := '280,1,2,3,3 | 120,,0,2,3 | 280,1,2,3,3';
      v_country_id VARCHAR2(50);
      v_ein        VARCHAR2(50);
      v_ssn        VARCHAR2(50);
      v_itin       VARCHAR2(50);
      v_atin       VARCHAR2(50);
      v_spos       PLS_INTEGER := 1;
      v_epos       PLS_INTEGER;
      v_entry_spos PLS_INTEGER;
      v_entry_epos PLS_INTEGER;
    BEGIN
      LOOP
        v_epos  := INSTR(v_countries, '|', v_spos);
        v_entry := CASE v_epos
                   WHEN 0
                   THEN SUBSTR(v_countries, v_spos)
                   ELSE SUBSTR(v_countries, v_spos, v_epos - v_spos)
                   END;
        v_entry_spos := 1;
        v_entry_epos := INSTR(v_entry, ',', v_entry_spos);
        v_country_id := SUBSTR(v_entry, v_entry_spos, v_entry_epos - v_entry_spos);
        v_entry_spos := v_entry_epos + 1;
        v_entry_epos := INSTR(v_entry, ',', v_entry_spos);
        v_ein        := SUBSTR(v_entry, v_entry_spos, v_entry_epos - v_entry_spos);
        v_entry_spos := v_entry_epos + 1;
        v_entry_epos := INSTR(v_entry, ',', v_entry_spos);
        v_ssn        := SUBSTR(v_entry, v_entry_spos, v_entry_epos - v_entry_spos);
        v_entry_spos := v_entry_epos + 1;
        v_entry_epos := INSTR(v_entry, ',', v_entry_spos);
        v_itin       := SUBSTR(v_entry, v_entry_spos, v_entry_epos - v_entry_spos);
        v_entry_spos := v_entry_epos + 1;
        v_atin       := SUBSTR(v_entry, v_entry_spos);
        
        DBMS_OUTPUT.PUT_LINE('Country ID: ' || COALESCE(v_country_id, 'NULL'));
        DBMS_OUTPUT.PUT_LINE('EIN: ' || COALESCE(v_ein, 'NULL'));
        DBMS_OUTPUT.PUT_LINE('SSN: ' || COALESCE(v_ssn, 'NULL'));
        DBMS_OUTPUT.PUT_LINE('ITIN: ' || COALESCE(v_itin, 'NULL'));
        DBMS_OUTPUT.PUT_LINE('ATIN: ' || COALESCE(v_atin, 'NULL'));
        EXIT WHEN v_epos = 0;
        v_spos := v_epos + 1;
      END LOOP;
    END;
    /
    

    Which outputs:

    Country ID: 280
    EIN: 1
    SSN: 2
    ITIN: 3
    ATIN: 3 
    Country ID:  120
    EIN: NULL
    SSN: 0
    ITIN: 2
    ATIN: 3 
    Country ID:  280
    EIN: 1
    SSN: 2
    ITIN: 3
    ATIN: 3
    

    If you want to use regular expressions then:

    DECLARE
      v_countries  VARCHAR2(500) := '280,1,2,3,3 | 120,,0,2,3 | 280,1,2,3,3';
      v_country_id VARCHAR2(50);
      v_ein        VARCHAR2(50);
      v_ssn        VARCHAR2(50);
      v_itin       VARCHAR2(50);
      v_atin       VARCHAR2(50);
    BEGIN
      FOR i IN 1 .. REGEXP_COUNT(v_countries, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)') LOOP
        v_country_id := REGEXP_SUBSTR( v_countries, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i, NULL, 1);
        v_ein        := REGEXP_SUBSTR( v_countries, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i, NULL, 2);
        v_ssn        := REGEXP_SUBSTR( v_countries, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i, NULL, 3);
        v_itin       := REGEXP_SUBSTR( v_countries, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i, NULL, 4);
        v_atin       := REGEXP_SUBSTR( v_countries, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i, NULL, 5);
        
        DBMS_OUTPUT.PUT_LINE('Country ID: ' || COALESCE(v_country_id, 'NULL'));
        DBMS_OUTPUT.PUT_LINE('EIN: ' || COALESCE(v_ein, 'NULL'));
        DBMS_OUTPUT.PUT_LINE('SSN: ' || COALESCE(v_ssn, 'NULL'));
        DBMS_OUTPUT.PUT_LINE('ITIN: ' || COALESCE(v_itin, 'NULL'));
        DBMS_OUTPUT.PUT_LINE('ATIN: ' || COALESCE(v_atin, 'NULL'));
      END LOOP;
    END;
    /
    

    Which outputs the same.


    If you want it as a query then:

    WITH data (value) AS (
      SELECT '280,1,2,3,3 | 120,,0,2,3 | 280,1,2,3,3' FROM DUAL
    ),
    countries (value, i, max_i, country_id, ein, ssn, itin, atin) AS (
      SELECT value,
             1,
             REGEXP_COUNT(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)'),
             REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, 1, NULL, 1),
             REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, 1, NULL, 2),
             REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, 1, NULL, 3),
             REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, 1, NULL, 4),
             REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, 1, NULL, 5)
      FROM   data
    UNION ALL
      SELECT value,
             i + 1,
             max_i,
             REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i + 1, NULL, 1),
             REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i + 1, NULL, 2),
             REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i + 1, NULL, 3),
             REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i + 1, NULL, 4),
             REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i + 1, NULL, 5)
      FROM   countries
      WHERE  i < max_i
    )
    SELECT i, country_id, ein, ssn, itin, atin
    FROM   countries;
    

    Which outputs:

    I COUNTRY_ID EIN SSN ITIN ATIN
    1 280 1 2 3 3
    2 120 null 0 2 3
    3 280 1 2 3 3

    fiddle