Need to separate the following long string using oracle regexp -
Row1 has following value- 'van dam is brother of Prince Charles(12345). Mathew Perker is son of Prince Charles(12345).'
Row2- 'Madam Currie is grandmother of Albert Eistine(56789). Pieer Currie is grandfather of Albert Eistine(56789). CV Raman is friend of Albert Eistine(56789).'
Now by split I need the following separate string -
From Row1 - 'van dam is brother of Prince Charles(12345).' 'Mathew Perker is son of Prince Charles(12345).'
From Row2- 'Madam Currie is grandmother of Albert Eistine(56789).' 'Pieer Currie is grandfather of Albert Eistine(56789).' 'CV Raman is friend of Albert Eistine(56789).'
These separate strings can be presented in separate column. The numbers in brackets are actually ID stored in ID field of the table.
Is it possible to achieve such split using Oracle regexp?
You can use:
SELECT REGEXP_SUBSTR(
column_name,
'(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*',
1,
1
) AS relationship1,
REGEXP_SUBSTR(
column_name,
'(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*',
1,
2
) AS relationship2,
REGEXP_SUBSTR(
column_name,
'(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*',
1,
3
) AS relationship3,
REGEXP_SUBSTR(
column_name,
'(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*',
1,
4
) AS relationship4
FROM table_name
Which, for the sample data:
CREATE TABLE table_name(column_name) AS
SELECT 'van dam is brother of Prince Charles(12345). Mathew Perker is son of Prince Charles(12345).' FROM DUAL UNION ALL
SELECT 'Madam Currie is grandmother of Albert Eistine(56789). Pieer Currie is grandfather of Albert Eistine(56789). CV Raman is friend of Albert Eistine(56789).' FROM DUAL;
Outputs:
RELATIONSHIP1 | RELATIONSHIP2 | RELATIONSHIP3 | RELATIONSHIP4 |
---|---|---|---|
van dam is brother of Prince Charles(12345). | Mathew Perker is son of Prince Charles(12345). | null | null |
Madam Currie is grandmother of Albert Eistine(56789). | Pieer Currie is grandfather of Albert Eistine(56789). | CV Raman is friend of Albert Eistine(56789). | null |
If you want a more detailed breakdown, you can extract the sub-groups from the expression:
SELECT REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, 1, NULL, 1) AS from1,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, 1, NULL, 2) AS relationship1,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, 1, NULL, 3) AS to1,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, 1, NULL, 4) AS id1,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, 2, NULL, 1) AS from2,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, 2, NULL, 2) AS relationship2,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, 2, NULL, 3) AS to2,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, 2, NULL, 4) AS id2,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, 3, NULL, 1) AS from3,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, 3, NULL, 2) AS relationship3,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, 3, NULL, 3) AS to3,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, 3, NULL, 4) AS id3,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, 4, NULL, 1) AS from4,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, 4, NULL, 2) AS relationship4,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, 4, NULL, 3) AS to4,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, 4, NULL, 4) AS id4
FROM table_name
Which outputs:
FROM1 | RELATIONSHIP1 | TO1 | ID1 | FROM2 | RELATIONSHIP2 | TO2 | ID2 | FROM3 | RELATIONSHIP3 | TO3 | ID3 | FROM4 | RELATIONSHIP4 | TO4 | ID4 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
van dam | brother | Prince Charles | 12345 | Mathew Perker | son | Prince Charles | 12345 | null | null | null | null | null | null | null | null |
Madam Currie | grandmother | Albert Eistine | 56789 | Pieer Currie | grandfather | Albert Eistine | 56789 | CV Raman | friend | Albert Eistine | 56789 | null | null | null | null |
If you want it to have a dynamic number of matches then output the data in rows, not columns:
SELECT item,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, item ) AS relationship
FROM table_name
CROSS APPLY (
SELECT LEVEL AS item
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT(column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*')
)
or:
SELECT item,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, item, NULL, 1) AS from_name,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, item, NULL, 2) AS relationship,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, item, NULL, 3) AS to_name,
REGEXP_SUBSTR( column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*', 1, item, NULL, 4) AS id
FROM table_name
CROSS APPLY (
SELECT LEVEL AS item
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT(column_name, '(.*?) is (.*?) of (.*?)\((\d+)\)\.\s*')
)
Which the latter outputs:
ITEM | FROM_NAME | RELATIONSHIP | TO_NAME | ID |
---|---|---|---|---|
1 | van dam | brother | Prince Charles | 12345 |
2 | Mathew Perker | son | Prince Charles | 12345 |
1 | Madam Currie | grandmother | Albert Eistine | 56789 |
2 | Pieer Currie | grandfather | Albert Eistine | 56789 |
3 | CV Raman | friend | Albert Eistine | 56789 |