cmemory-managementmallocmemset

Using memset() after malloc() causes assertion


I have a struct like this:

struct state {
    bool isfinal;
    bool *isfull;
    char **board;
};

isfull is an array and board is a 2D array.

I can allocate memory for the arrays with this function:

struct state new_state(struct state state)
{
    int i;
    struct state new_state = {};
    new_state.isfull = (bool *)malloc(BOARD_WIDTH * sizeof(bool));
    new_state.board = (char **)malloc(BOARD_HIGHT * sizeof(char *));
    for (i = 0; i < BOARD_HIGHT; ++i)
        new_state.board[i] = (char *)malloc(BOARD_WIDTH * sizeof(char));
    if (state.board) {
        memcpy(new_state.isfull, state.isfull, BOARD_WIDTH * sizeof(bool));
        for (i = 0; i < BOARD_HIGHT; ++i)
            memcpy(new_state.board[i], state.board[i], BOARD_WIDTH * sizeof(char));
    } else {
        getchar();
        memset(new_state.isfull, 0, BOARD_WIDTH * sizeof(bool));
        for (i = 0; i < BOARD_HIGHT; ++i)
            memset(new_state.board[i], 0, BOARD_WIDTH * sizeof(char *));
    }
    return new_state;
}

First time this function is called with 0 and the else block will be executed. This results in an assertion failure.
Also in case of assertion, the function executes until the return statement and only then the program breaks.

I've solved the problem by using calloc() instead.
this works:

struct state new_state(struct state state)
{
    int i;
    struct state new_state = {};
    new_state.isfull = (bool *)calloc(BOARD_WIDTH, sizeof(bool));
    new_state.board = (char **)calloc(BOARD_HIGHT, sizeof(char *));
    for (i = 0; i < BOARD_HIGHT; ++i)
        new_state.board[i] = (char *)calloc(BOARD_WIDTH, sizeof(char));
    if (state.board) {
        memcpy(new_state.isfull, state.isfull, BOARD_WIDTH * sizeof(bool));
        for (i = 0; i < BOARD_HIGHT; ++i)
            memcpy(new_state.board[i], state.board[i], BOARD_WIDTH * sizeof(char));
    } else {
        // memset(new_state.isfull, 0, BOARD_WIDTH * sizeof(bool));
        // for (i = 0; i < BOARD_HIGHT; ++i)
        //     memset(new_state.board[i], 0, BOARD_WIDTH * sizeof(char *));
    }
    return new_state;
}

and interestingly using a getchar() function as a form of delay also works!
like this:

struct state new_state(struct state state)
{
    int i;
    struct state new_state = {};
    new_state.isfull = (bool *)malloc(BOARD_WIDTH * sizeof(bool));
    new_state.board = (char **)malloc(BOARD_HIGHT * sizeof(char *));
    for (i = 0; i < BOARD_HIGHT; ++i)
        new_state.board[i] = (char *)malloc(BOARD_WIDTH * sizeof(char));
    if (state.board) {
        memcpy(new_state.isfull, state.isfull, BOARD_WIDTH * sizeof(bool));
        for (i = 0; i < BOARD_HIGHT; ++i)
            memcpy(new_state.board[i], state.board[i], BOARD_WIDTH * sizeof(char));
    } else {
        getchar();
        memset(new_state.isfull, 0, BOARD_WIDTH * sizeof(bool));
        for (i = 0; i < BOARD_HIGHT; ++i)
            memset(new_state.board[i], 0, BOARD_WIDTH * sizeof(char *));
    }
    return new_state;
}

searching the internet, I've found using memset() after malloc() is a common practice.
so first question: why doesn't the first version of my function work?
and second: why using getchar() make it work?


Solution

  • The problem is an incorrect size passed to memset in:

    memset(new_state.board[i], 0, BOARD_WIDTH * sizeof(char *));
    

    You mean to clear the char array allocated for new_state.board[i] to all bits zero, but you compute the number of bytes using the size of a char * instead of a char, which produces a much larger size, causing memset to overwrite bytes beyond the end of the allocated objects. As written, the code has undefined behavior, and in your case it causes an assertion violation later, probably because you overwrote internal data structures used by malloc to keep track of the memory allocation state, which a later call to malloc, realloc or free detects and reports.

    Your second version using calloc() is fine because you removed the bogus code. Using calloc() is a good approach because the memory returned by the C library call is in a consistent known state, at little or no penalty, which will prevent many potential cases of unpredictable behavior.

    The third example, you have the same undefined behavior as the initial code, but because of differing circumstances (an extra call to getchar() that might allocate memory for its own purpose), the undefined behavior has different consequences... This is inherent to undefined behavior: anything can happen and it is impossible to predict if, when and how undesired side effects might manifest.

    Here are basic recommendations to try and avoid undefined behavior:

    Also note these remarks:

    Here is a modified version:

    #include <stdlib.h>
    
    // assuming BOARD_WIDTH and BOARD_HEIGHT have been defined as constant expressions
    
    struct state {
        bool isfinal;
        bool isfull[BOARD_WIDTH];
        char board[BOARD_HEIGHT][BOARD_WIDTH];
    };
    
    /* allocate a copy of a state */ 
    struct state *new_state(const struct state *state)
    {
        struct state *new_state = calloc(1, sizeof(*new_state));
        if (new_state && state) {
            *new_state = *state;
            new_state->isfinal = false;
        }
        return new_state;
    }