I have a string in Oracle PL/SQL that looks like this:
280,1,2,3,3 | 120,,0,2,3 | 280,1,2,3,3
Each section is separated by a | character, and within each section, values are separated by commas. The challenge I’m facing is correctly extracting the values, and for missing or empty values
For example, the data I expect is:
Here’s what I have tried:
DECLARE
v_entry VARCHAR2(500);
v_countries VARCHAR2(500) := '280,1,2,3,3 | 120,,0,2,3 | 280,1,2,3,3';
v_country_id VARCHAR2(50);
v_ein VARCHAR2(50);
v_ssn VARCHAR2(50);
v_itin VARCHAR2(50);
v_atin VARCHAR2(50);
BEGIN
FOR i IN 1 .. REGEXP_COUNT(v_countries, '\|') + 1 LOOP
v_entry := REGEXP_SUBSTR(v_countries, '[^|]+', 1, i);
v_country_id := REGEXP_SUBSTR(v_entry, '([^,]*)', 1, 1);
v_ein := REGEXP_SUBSTR(v_entry, '([^,]*)', 1, 2);
v_ssn := REGEXP_SUBSTR(v_entry, '([^,]*)', 1, 3);
v_itin := REGEXP_SUBSTR(v_entry, '([^,]*)', 1, 4);
v_atin := REGEXP_SUBSTR(v_entry, '([^,]*)', 1, 5);
v_country_id := CASE WHEN v_country_id IS NULL OR v_country_id = '' THEN 'NULL' ELSE v_country_id END;
v_ein := CASE WHEN v_ein IS NULL OR v_ein = '' THEN 'NULL' ELSE v_ein END;
v_ssn := CASE WHEN v_ssn IS NULL OR v_ssn = '' THEN 'NULL' ELSE v_ssn END;
v_itin := CASE WHEN v_itin IS NULL OR v_itin = '' THEN 'NULL' ELSE v_itin END;
v_atin := CASE WHEN v_atin IS NULL OR v_atin = '' THEN 'NULL' ELSE v_atin END;
DBMS_OUTPUT.PUT_LINE('Country ID: ' || v_country_id);
DBMS_OUTPUT.PUT_LINE('EIN: ' || v_ein);
DBMS_OUTPUT.PUT_LINE('SSN: ' || v_ssn);
DBMS_OUTPUT.PUT_LINE('ITIN: ' || v_itin);
DBMS_OUTPUT.PUT_LINE('ATIN: ' || v_atin);
END LOOP;
END;
The issue: I'm expecting to handle empty fields between commas as NULL, but the output doesn’t reflect this correctly.
For example, for the second section 120,,0,2,3, I expect:
But instead, I get unexpected behavior where either values aren’t correctly assigned or the NULL values don’t show up properly.
You can use simple string expressions:
DECLARE
v_entry VARCHAR2(500);
v_countries VARCHAR2(500) := '280,1,2,3,3 | 120,,0,2,3 | 280,1,2,3,3';
v_country_id VARCHAR2(50);
v_ein VARCHAR2(50);
v_ssn VARCHAR2(50);
v_itin VARCHAR2(50);
v_atin VARCHAR2(50);
v_spos PLS_INTEGER := 1;
v_epos PLS_INTEGER;
v_entry_spos PLS_INTEGER;
v_entry_epos PLS_INTEGER;
BEGIN
LOOP
v_epos := INSTR(v_countries, '|', v_spos);
v_entry := CASE v_epos
WHEN 0
THEN SUBSTR(v_countries, v_spos)
ELSE SUBSTR(v_countries, v_spos, v_epos - v_spos)
END;
v_entry_spos := 1;
v_entry_epos := INSTR(v_entry, ',', v_entry_spos);
v_country_id := SUBSTR(v_entry, v_entry_spos, v_entry_epos - v_entry_spos);
v_entry_spos := v_entry_epos + 1;
v_entry_epos := INSTR(v_entry, ',', v_entry_spos);
v_ein := SUBSTR(v_entry, v_entry_spos, v_entry_epos - v_entry_spos);
v_entry_spos := v_entry_epos + 1;
v_entry_epos := INSTR(v_entry, ',', v_entry_spos);
v_ssn := SUBSTR(v_entry, v_entry_spos, v_entry_epos - v_entry_spos);
v_entry_spos := v_entry_epos + 1;
v_entry_epos := INSTR(v_entry, ',', v_entry_spos);
v_itin := SUBSTR(v_entry, v_entry_spos, v_entry_epos - v_entry_spos);
v_entry_spos := v_entry_epos + 1;
v_atin := SUBSTR(v_entry, v_entry_spos);
DBMS_OUTPUT.PUT_LINE('Country ID: ' || COALESCE(v_country_id, 'NULL'));
DBMS_OUTPUT.PUT_LINE('EIN: ' || COALESCE(v_ein, 'NULL'));
DBMS_OUTPUT.PUT_LINE('SSN: ' || COALESCE(v_ssn, 'NULL'));
DBMS_OUTPUT.PUT_LINE('ITIN: ' || COALESCE(v_itin, 'NULL'));
DBMS_OUTPUT.PUT_LINE('ATIN: ' || COALESCE(v_atin, 'NULL'));
EXIT WHEN v_epos = 0;
v_spos := v_epos + 1;
END LOOP;
END;
/
Which outputs:
Country ID: 280
EIN: 1
SSN: 2
ITIN: 3
ATIN: 3
Country ID: 120
EIN: NULL
SSN: 0
ITIN: 2
ATIN: 3
Country ID: 280
EIN: 1
SSN: 2
ITIN: 3
ATIN: 3
If you want to use regular expressions then:
DECLARE
v_countries VARCHAR2(500) := '280,1,2,3,3 | 120,,0,2,3 | 280,1,2,3,3';
v_country_id VARCHAR2(50);
v_ein VARCHAR2(50);
v_ssn VARCHAR2(50);
v_itin VARCHAR2(50);
v_atin VARCHAR2(50);
BEGIN
FOR i IN 1 .. REGEXP_COUNT(v_countries, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)') LOOP
v_country_id := REGEXP_SUBSTR( v_countries, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i, NULL, 1);
v_ein := REGEXP_SUBSTR( v_countries, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i, NULL, 2);
v_ssn := REGEXP_SUBSTR( v_countries, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i, NULL, 3);
v_itin := REGEXP_SUBSTR( v_countries, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i, NULL, 4);
v_atin := REGEXP_SUBSTR( v_countries, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i, NULL, 5);
DBMS_OUTPUT.PUT_LINE('Country ID: ' || COALESCE(v_country_id, 'NULL'));
DBMS_OUTPUT.PUT_LINE('EIN: ' || COALESCE(v_ein, 'NULL'));
DBMS_OUTPUT.PUT_LINE('SSN: ' || COALESCE(v_ssn, 'NULL'));
DBMS_OUTPUT.PUT_LINE('ITIN: ' || COALESCE(v_itin, 'NULL'));
DBMS_OUTPUT.PUT_LINE('ATIN: ' || COALESCE(v_atin, 'NULL'));
END LOOP;
END;
/
Which outputs the same.
If you want it as a query then:
WITH data (value) AS (
SELECT '280,1,2,3,3 | 120,,0,2,3 | 280,1,2,3,3' FROM DUAL
),
countries (value, i, max_i, country_id, ein, ssn, itin, atin) AS (
SELECT value,
1,
REGEXP_COUNT(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)'),
REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, 1, NULL, 1),
REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, 1, NULL, 2),
REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, 1, NULL, 3),
REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, 1, NULL, 4),
REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, 1, NULL, 5)
FROM data
UNION ALL
SELECT value,
i + 1,
max_i,
REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i + 1, NULL, 1),
REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i + 1, NULL, 2),
REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i + 1, NULL, 3),
REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i + 1, NULL, 4),
REGEXP_SUBSTR(value, '(.*?),(.*?),(.*?),(.*?),(.*?)(\||$)', 1, i + 1, NULL, 5)
FROM countries
WHERE i < max_i
)
SELECT i, country_id, ein, ssn, itin, atin
FROM countries;
Which outputs:
I | COUNTRY_ID | EIN | SSN | ITIN | ATIN |
---|---|---|---|---|---|
1 | 280 | 1 | 2 | 3 | 3 |
2 | 120 | null | 0 | 2 | 3 |
3 | 280 | 1 | 2 | 3 | 3 |