I am trying to build a runtime command line tool and don't get why I get a segmentation fault when I try to tokenize the string input by the user.
In the str_utils.c
file, the expression printf("token: %s\n", tokens[i]);
is evaluated properly and the content of the array printed, but in the user_input.c
file, if I print the pointer with %p
, everything is fine, but as soon as try to print with %s
, I get a segmentation fault. So it looks like I am failing at storing the individual token in the array tokens
.
Here is the code:
// str_utils.c
int str_utils_split(char *str, char *delimiter, char **tokens,
int *tokens_count) {
char *local_str = strdup(str);
for (char *token = strtok(local_str, delimiter); token != NULL;
token = strtok(NULL, delimiter)) {
tokens = realloc(tokens, sizeof(char *) * (++(*tokens_count)));
if (tokens == NULL) {
fprintf(stderr, "realloc failed! Exiting!");
return 1;
}
/* Am doing something wrong here? I ended up doing this because
I read strtok was not doing any allocation
*/
tokens[*tokens_count - 1] =
strcpy((char *)calloc(strlen(token) + 1, sizeof(char)), token);
}
// or there?
tokens = realloc(tokens, sizeof(char *) * (*tokens_count + 1));
tokens[*tokens_count + 1] = 0;
for (int i = 0; i < *tokens_count; i++) {
printf("tokenin called fn: %s\n", tokens[i]);
}
free(local_str);
return 0;
}
// user_input.c
char command_line_buffer[100];
void ui_parse_command(char *line, ui_command_t *command_obj) {
char **cmd_tokens = malloc(sizeof(char *));
char *delimiter = " ";
int tokens_count = 0;
str_utils_split(line, delimiter, cmd_tokens, &tokens_count);
for (int i = 0; i < tokens_count; i++) {
printf("token in calling fn: %p\n", cmd_tokens[i]); // OK
printf("token in calling fn: %s\n", cmd_tokens[i]); // seg fault
}
}
void ui_start_prompt(void (*callback)(ui_command_t *)) {
printf("auralizer> ");
ui_command_t command_obj;
while (fgets(command_line_buffer, sizeof(command_line_buffer), stdin) !=
NULL) {
ui_parse_command(command_line_buffer, &command_obj);
callback(&command_obj);
printf("auralizer> ");
}
}
I tried with and without local copies, with and without a local tmp_tokens
array in the split function, I tried to start from a NULL pointer in user_input.c
, I turned it in all directions and it always compiles, but always goes into a segfault at the same place.
The correct output would be (the correct output just happened once and crashed again after I restarted the program):
auralizer> tralala lala
token in called fn: tralala
token in called fn: lala
token in calling fn: tralala
token in calling fn: lala
And after re-executing the program (printf("%s")
):
auralizer> tralala lala
token in called fn: tralala
token in called fn: lala
[1] 27102 segmentation fault (core dumped) ./bin/auralizer
Printing the pointers (printf("%p")
):
auralizer> tralala lala
token in called fn: tralala
token in called fn: lala
token in calling fn: 0x557717df4cb0
token in calling fn: 0x557717df4cd0
A few style notes.
strdup
To me this is suspect:
tokens[*tokens_count - 1] = strcpy((char *)calloc(strlen(token) + 1, sizeof(char)), token);
calloc
. Relevant reading: Should I cast the result of malloc (in C)?calloc
succeeded.strdup
once. Why not here? tokens[*tokens_count - 1] = strdup(token);
You've added a lot of complexity to your code to manage a growing dynamic array. You could simply work with a fixed size array and pass a maximum size parameter to your tokenizing function, avoiding this issue entirely, or you could develop a data type and accompanying functions to handle a growing dynamic array (or "vector") so that you don't need to mingle that logic with the tokenizing logic. This will aid in debugging.
Note that growing such a data structure by a single element is inefficient and involves many more realloc
calls, which are comparatively expensive. Better to grow by a factor of 2 or 1.5. The optimum growth factor is a subject of debate and veers heavily into opinion territory. You would likely also want sv_make_empty
to start a vector with a capacity of more than 1
.
Such an approach might look like the following, where the tokenizing logic is simply in main
.
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
typedef struct StringVec {
size_t cap;
size_t sz;
char **data;
} StringVec;
StringVec *sv_make_empty(void);
StringVec *sv_grow(StringVec *);
StringVec *sv_push(StringVec *, char *);
void sv_destroy(StringVec *);
int main(void) {
char line[] = "Hello world foo bar";
char *delim = " ";
StringVec *vec = sv_make_empty();
if (!vec) {
fprintf(stderr, "Allocation of string vector failed.\n");
return 1;
}
char *tok = strtok(line, delim);
while (tok) {
if (!sv_push(vec, strdup(tok))) {
fprintf(stderr, "Pushing onto vector failed.\n");
sv_destroy(vec);
return 1;
}
tok = strtok(NULL, delim);
}
for (size_t i = 0; i < vec->sz; i++) {
printf("%s\n", vec->data[i]);
}
sv_destroy(vec);
}
StringVec *sv_make_empty(void) {
StringVec *vec = malloc(sizeof(*vec));
if (!vec) return NULL;
vec->cap = 1;
vec->sz = 0;
vec->data = malloc(vec->cap * sizeof(char *));
if (!vec->data) {
free(vec);
return NULL;
}
return vec;
}
StringVec *sv_grow(StringVec *vec) {
size_t growth_factor = 2;
char **new_data = realloc(vec->data, vec->cap * growth_factor);
if (!new_data) {
return NULL;
}
vec->data = new_data;
vec->cap *= growth_factor;
return vec;
}
StringVec *sv_push(StringVec *vec, char *str) {
if (!vec) return NULL;
if (vec->sz == vec->cap && !sv_grow(vec)) return NULL;
vec->data[vec->sz++] = str;
return vec;
}
void sv_destroy(StringVec *vec) {
if (!vec) return;
free(vec->data);
free(vec);
}
Now with the vector growing the way it does, it's possible a function returning a StringVec
is keeping more memory alive than need be. You can keep the memory under control without having to do the costly grow-by-one method by implementing a sv_shrink
function.
StringVec *sv_shrink(StringVec *vec) {
if (!vec) return NULL;
if (vec->cap <= 1 || vec->cap == vec->sz) return vec;
char **new_data = realloc(vec->data, sizeof(char *) * vec->sz);
if (!new_data) return NULL;
vec->data = new_data;
vec->cap = vec->sz;
return vec;
}
While passing a char ***
argument also works, being a three star programmer is not a good thing.