cpcrepcre2

Replace all matches in pcre2_substitute in C


I replace the first occurrence of the match with pcre2_substitute,

#define PCRE2_CODE_UNIT_WIDTH 8

#include <stdio.h>
#include <string.h>
#include <pcre2.h>
int main(int argc, char **argv)
{
    PCRE2_SPTR pattern = "(\\d+)";
    PCRE2_SPTR subject = "1 something 849 for 993";
    PCRE2_SPTR replacement = "XXX";

    pcre2_code *re;
    int errornumber;
    int i;
    int rc;

    PCRE2_SIZE erroroffset;
    PCRE2_SIZE *ovector;

    size_t subject_length;
    size_t replacement_length = strlen((char *)replacement);

    pcre2_match_data *match_data;

    subject_length = strlen((char *)subject);

    PCRE2_UCHAR output[1024] = "";
    PCRE2_SIZE outlen = sizeof(output) / sizeof(PCRE2_UCHAR);

    re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, &errornumber, &erroroffset, NULL);
    if (re == NULL)
    {
        PCRE2_UCHAR buffer[256];
        pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
        printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset, buffer);
    }

    match_data = pcre2_match_data_create_from_pattern(re, NULL);
    rc = pcre2_substitute(re, subject, subject_length, 0, 0, match_data, NULL, replacement, 
         replacement_length, output, &outlen);
    printf("Output: %s", output);
    return 0;
}

I know that I should repeat pcre2_substitute in a loop for replacing the next match, but I am not sure about the safest way to feed output as the subject of the next step.


Solution

  • You can replace all with a single call to pcre2_substitute using extended option PCRE2_SUBSTITUTE_GLOBAL:

    rc = pcre2_substitute(re, subject, subject_length, 0,
        PCRE2_SUBSTITUTE_GLOBAL | PCRE2_SUBSTITUTE_EXTENDED,
        match_data, NULL, replacement, replacement_length, output, &outlen);