I'm running into multiple issues with the following code:
while
loop runs indefinitely.matches[count] = malloc(sizeof(char) * match_length);
and matches[count][match_length] = 0;
are Invalid size 1 write. I think my allocation code sucks (sizeof(matches) / sizeof(matches[0])
after my while
loops compute to 1
).strncpy
would add a null byte at the end of my string automatically but it appears it doesn't 🤷while
loops, if I print each element of the array, they contains a newline, which shouldn't be the case, only way to get rid of those is to run matches[count][match_length] = 0;
I think I misunderstood lots of part about regex, strncpy
and malloc
. I haven't done C programming since college and wanted to get back at it.
char **matches = (char **)malloc(sizeof(char *));
char *line = NULL;
size_t l = 0, count = 0, size = 1;
ssize_t read = 0;
regex_t rgx;
if (regcomp(&rgx, "((gopher|https?)://[[:alnum:][:punct:]]+)", REG_ICASE | REG_EXTENDED)) {
return 0;
}
while ((read = getline(&line, &l, file)) != -1) {
regmatch_t match = { 0, 0 };
int match_length, error;
if ((error = regexec(&rgx, line + match.rm_eo, 1, &match, 0)) != 0)
continue;
do {
printf("Error: %d, Line: %s\n", error, line + match.rm_eo);
match_length = match.rm_eo - match.rm_so;
matches[count] = malloc(sizeof(char) * match_length);
strncpy(matches[count], line + match.rm_so, match_length);
matches[count][match_length] = 0;
if (++count == size) {
size *= 2;
matches = realloc(matches, sizeof(char *) * size);
if (!matches) {
regfree(&rgx);
free(line);
return 0;
}
}
} while ((error = regexec(&rgx, line + match.rm_eo + 1, 1, &match, REG_NOTBOL)) == 0);
}
There are multiple problems:
you should allocate one more byte in matches[count] = malloc(sizeof(char) * match_length);
for the null terminator.
strncpy
does not set a null byte at the end of the destination array if the source string has a length greater or equal to the third length argument. This function semantics are confusing and error prone, learn why you should stop using strncpy
already!
instead of strncpy(matches[count], line + match.rm_so, match_length);
you should use:
memcpy(matches[count], line + match.rm_so, match_length);
matches[count][match_length] = '\0';
Note that allocating and copying can be achieved with:
matches[count] = strndup(line + match.rm_so, match_length);
matches[count][match_length] = 0;
is causing the valgrind error message as you write beyond the allocation length of match_length
.
matches = realloc(matches, sizeof(char *) * size);
causes a memory leak if realloc
fails: the original pointer matches
is lost and the memory it pointed to is still allocated.
you can strip the newline at the end of line
before applying the regex matcher.
the positions stored in the match
structure are relative to the beginning of the string argument to regexec
, but you pass line + match.rm_so
and line + match.rm_eo + 1
so the offsets are not relative to the line
array and cannot be used.
after the loop, you cannot use (sizeof(matches) / sizeof(matches[0])
to determine the number of entries in the array. Both matches
and matches[0]
are pointers, hence the value 1
. You must pass count
back to the caller along with the matches
pointer.
Here is a modified version:
char **filter_urls(FILE *file, size_t *countp) {
size_t count = 0, size = 1;
char **matches = (char **)malloc(sizeof(char *) * size);
char *line = NULL;
size_t line_size = 0;
int fail = 0;
ssize_t len;
regex_t rgx;
if (matches == NULL)
return NULL;
if (regcomp(&rgx, "((gopher|https?)://[[:alnum:][:punct:]]+)", REG_ICASE | REG_EXTENDED)) {
free(matches);
return NULL;
}
while ((len = getline(&line, &line_size, file)) != -1) {
// strip the trailing newline if any (optional)
//if (len > 0 && line[len - 1] == '\n') {
// line[--len] == '\0'
//}
regmatch_t match = { 0, 0 };
int flags = 0;
char *ptr = line;
while (regexec(&rgx, ptr, 1, &match, flags) != 0) {
size_t match_length = match.rm_eo - match.rm_so;
matches[count] = strndup(ptr + match.rm_so, match_length);
if (matches[count] == NULL) {
fail = 1;
break;
}
if (++count == size) {
size_t new_size = size * 2;
char **new_matches = realloc(matches, sizeof(char *) * new_size);
if (new_matches == NULL) {
fail = 1;
break;
}
size = new_size;
matches = new_matches;
}
ptr += match.rm_eo;
flags = REG_NOTBOL;
}
}
regfree(&rgx);
free(line);
if (fail) {
// allocation failure: free all
while (count > 0) {
free(matches[--count]);
}
free(matches);
return NULL;
}
matches[count] = NULL;
*countp = count;
return matches;
}