regexoracle-databasereplaceregexp-replace

How to find the exact match of a string and replace in Oracle?


I am trying to replace the words in a sentence if the word exits in "words" column with a hyperlink along with it's word and id. The table contains the column id word and sentence. Below code I got it from one of the helpful fellow mate here.Thank you.

https://dbfiddle.uk/?rdbms=oracle_18&fiddle=8fe103264dc650ad4bd87b20f9c6931a

Create table temp(
  id       NUMBER,
  word     VARCHAR2(1000),
  Sentence VARCHAR2(2000)
);

insert into temp
SELECT 1,'automation testing', 'automtestingation testing is popular kind of testing' FROM DUAL UNION ALL
SELECT 2,'testing','manual testing' FROM DUAL UNION ALL
SELECT 3,'manual testing','this is an old method of testing' FROM DUAL UNION ALL
SELECT 5,'B-number analysis','B-number analysis table' FROM DUAL UNION ALL
SELECT 6,'B-number analysis table','testing B-number analysis' FROM DUAL;

MERGE INTO temp dst
USING (
  WITH ordered_words ( rn, id, word ) AS (
    SELECT ROW_NUMBER() OVER ( ORDER BY LENGTH( word ) ASC, word DESC ),
           id,
           word
    FROM   temp
  ),
  sentences ( rid, sentence, rn ) AS (
    SELECT ROWID,
           sentence,
           COUNT(*) OVER () + 1
    FROM temp
  UNION ALL
    SELECT s.rid,
           REGEXP_REPLACE(
             REGEXP_REPLACE(
               s.sentence,
               '(^|[^a-z])' || w.word || '($|[^a-z])',
               '\1' || 'http://localhost/'|| w.id ||'/<u>'||w.word ||'<u>' || '\2',
               1,
               0,
               'i'
              ),
             '(^|[^a-z])' || w.word || '($|[^a-z])',
             '\1' || w.word || '\2',
               1,
               0,
               'i'
              ),
           s.rn - 1
    FROM   sentences s
           INNER JOIN ordered_words w
           ON ( s.rn - 1 = w.rn ) 
  )
  SELECT rid, sentence
  FROM   sentences
  WHERE  rn = 1
) src
ON ( dst.ROWID = src.RID )
WHEN MATCHED THEN
  UPDATE
  SET    sentence = src.sentence;

The value to be replaced is https://localhost/"id"/"word" If you see the value for id = 5 (B-number analysis) the sentence is https://localhost/6/localhost/5/B-number analysis table But the actual value is supposed to be https://localhost/6/B-number analysis table.

Current output:

ID   WORD                SENTENCE
1   automation testing  automtestingation http://localhost/2/<u>testing<u> 
                        is popular kind of http://localhost/2/<u>testing<u>
2   testing             http://localhost/3/<u>manual 
                        http://localhost/2/<u>testing<u><u>
3   manual testing      this is an old method of 
                        http://localhost/2/<u>testing<u>
5   B-number analysis   http://localhost/6/<u>http://localhost/5/<u>B- 
                        number analysis<u> table<u>
6   B-number analysis table http://localhost/2/<u>testing<u> 
                        http://localhost/5/<u>B-number analysis<u>

Solution

  • Do it in two steps:

    1. First replace the strings with the ids in some wrapper that is not going to appear in your text (i.e. testing maps to ${2})
    2. Then, once all the replacements have been done, replace the wrapped ids with the urls (i.e. ${2} maps to http://localhost/2/<u>testing</u>)

    Oracle Setup:

    Create table temp(
      id       NUMBER,
      word     VARCHAR2(1000),
      Sentence VARCHAR2(2000)
    );
    
    insert into temp
    SELECT 1,'automation testing', 'automtestingation testing is popular kind of testing' FROM DUAL UNION ALL
    SELECT 2,'testing','manual testing' FROM DUAL UNION ALL
    SELECT 3,'manual testing','this is an old method of testing' FROM DUAL UNION ALL
    SELECT 4,'punctuation','automation testing,manual testing,punctuation,automanual testing-testing' FROM DUAL UNION ALL
    SELECT 5,'B-number analysis','B-number analysis table' FROM DUAL UNION ALL
    SELECT 6,'B-number analysis table','testing B-number analysis' FROM DUAL UNION ALL
    SELECT 7,'Not Matched','testing testing testing' FROM DUAL;
    

    Merge:

    MERGE INTO temp dst
    USING (
      WITH ordered_words ( rn, id, word ) AS (
        SELECT ROW_NUMBER() OVER ( ORDER BY LENGTH( word ) ASC, word DESC ),
               id,
               word
        FROM   temp
      ),
      sentences_with_ids ( rid, sentence, rn ) AS (
        SELECT ROWID,
               sentence,
               ( SELECT COUNT(*) + 1 FROM ordered_words )
        FROM   temp
      UNION ALL
        SELECT s.rid,
               REGEXP_REPLACE(
                 REGEXP_REPLACE(
                   s.sentence,
                   '(^|\W)' || w.word || '($|\W)',
                   '\1${'|| w.id ||'}\2'
                  ),
                 '(^|\W)' || w.word || '($|\W)',
                 '\1${' || w.id || '}\2'
               ),
               s.rn - 1
        FROM   sentences_with_ids s
               INNER JOIN ordered_words w
               ON ( s.rn - 1 = w.rn ) 
      ),
      sentences_with_words ( rid, sentence, rn ) AS (
        SELECT rid,
               sentence,
               ( SELECT COUNT(*) + 1 FROM ordered_words )
        FROM   sentences_with_ids
        WHERE  rn = 1
      UNION ALL
        SELECT s.rid,
               REPLACE(
                 s.sentence,
                 '${' || w.id || '}',
                 'http://localhost/' || w.id || '/<u>' || w.word || '</u>'
               ),
               s.rn - 1
        FROM   sentences_with_words s
               INNER JOIN ordered_words w
               ON ( s.rn - 1 = w.rn ) 
      )
      SELECT rid, sentence
      FROM   sentences_with_words
      WHERE  rn = 1
    ) src
    ON ( dst.ROWID = src.RID )
    WHEN MATCHED THEN
      UPDATE
      SET    sentence = src.sentence;
    

    Output:

    ID | WORD                    | SENTENCE                                                                                                                                                                                                  
    -: | :---------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
     1 | automation testing      | automtestingation http://localhost/2/<u>testing</u> is popular kind of http://localhost/2/<u>testing</u>                                                                                                  
     2 | testing                 | http://localhost/3/<u>manual testing</u>                                                                                                                                                                  
     3 | manual testing          | this is an old method of http://localhost/2/<u>testing</u>                                                                                                                                                
     4 | punctuation             | http://localhost/1/<u>automation testing</u>,http://localhost/3/<u>manual testing</u>,http://localhost/4/<u>punctuation</u>,automanual http://localhost/2/<u>testing</u>-http://localhost/2/<u>testing</u>
     5 | B-number analysis       | http://localhost/6/<u>B-number analysis table</u>                                                                                                                                                         
     6 | B-number analysis table | http://localhost/2/<u>testing</u> http://localhost/5/<u>B-number analysis</u>                                                                                                                             
     7 | Not Matched             | http://localhost/2/<u>testing</u> http://localhost/2/<u>testing</u> http://localhost/2/<u>testing</u>                                                                                                     
    

    db<>fiddle here


    Update:

    Escape any special regular expression characters in the words:

    MERGE INTO temp dst
    USING (
      WITH ordered_words ( rn, id, word, regex_safe_word ) AS (
        SELECT ROW_NUMBER() OVER ( ORDER BY LENGTH( word ) ASC, word DESC ),
               id,
               word,
               REGEXP_REPLACE( word, '([][)(}{|^$\.*+?])', '\\\1' )
        FROM   temp
      ),
      sentences_with_ids ( rid, sentence, rn ) AS (
        SELECT ROWID,
               sentence,
               ( SELECT COUNT(*) + 1 FROM ordered_words )
        FROM   temp
      UNION ALL
        SELECT s.rid,
               REGEXP_REPLACE(
                 REGEXP_REPLACE(
                   s.sentence,
                   '(^|\W)' || w.regex_safe_word || '($|\W)',
                   '\1${'|| w.id ||'}\2'
                  ),
                 '(^|\W)' || w.regex_safe_word || '($|\W)',
                 '\1${' || w.id || '}\2'
               ),
               s.rn - 1
        FROM   sentences_with_ids s
               INNER JOIN ordered_words w
               ON ( s.rn - 1 = w.rn ) 
      ),
      sentences_with_words ( rid, sentence, rn ) AS (
        SELECT rid,
               sentence,
               ( SELECT COUNT(*) + 1 FROM ordered_words )
        FROM   sentences_with_ids
        WHERE  rn = 1
      UNION ALL
        SELECT s.rid,
               REPLACE(
                 s.sentence,
                 '${' || w.id || '}',
                 'http://localhost/' || w.id || '/<u>' || w.word || '</u>'
               ),
               s.rn - 1
        FROM   sentences_with_words s
               INNER JOIN ordered_words w
               ON ( s.rn - 1 = w.rn ) 
      )
      SELECT rid, sentence
      FROM   sentences_with_words
      WHERE  rn = 1
    ) src
    ON ( dst.ROWID = src.RID )
    WHEN MATCHED THEN
      UPDATE
      SET    sentence = src.sentence;
    

    Output:

    ID | WORD                    | SENTENCE                                                                                                                                                                                                  
    -: | :---------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
     1 | automation testing      | automtestingation http://localhost/2/<u>testing</u> is popular kind of http://localhost/2/<u>testing</u>                                                                                                  
     2 | testing                 | http://localhost/3/<u>manual testing</u>                                                                                                                                                                  
     3 | manual testing          | this is an old method of http://localhost/2/<u>testing</u>                                                                                                                                                
     4 | punctuation             | http://localhost/1/<u>automation testing</u>,http://localhost/3/<u>manual testing</u>,http://localhost/4/<u>punctuation</u>,automanual http://localhost/2/<u>testing</u>-http://localhost/2/<u>testing</u>
     5 | B-number analysis       | http://localhost/6/<u>B-number analysis table</u>                                                                                                                                                         
     6 | B-number analysis table | http://localhost/2/<u>testing</u> http://localhost/5/<u>B-number analysis</u>                                                                                                                             
     7 | Not Matched             | http://localhost/2/<u>testing</u> http://localhost/2/<u>testing</u> http://localhost/2/<u>testing</u>                                                                                                     
     8 | ^[($                    | http://localhost/2/<u>testing</u> characters http://localhost/8/<u>^[($</u> that need escaping in a regular expression                                                                                    
    

    db<>fiddle here