cgdbbinary-treevalgrindwatchpoint

Why does a GDB watchpoint stop on an irrelevant line when swapping binary tree nodes?


I am trying to swap two nodes A and B in a binary tree so that the places they are actually stored in memory change but the tree topology is not changed. I added special handling for swapping a node with its parent, but it still seems that it doesn't work. I'm using Valgrind with vgdb so that I can catch memory errors and also get consistent addresses. If I have a tree like

78
  \
   40
  /  \
5c   c5

And then I try to swap A=40 and B=5c, the links get messed up. Specifically, 40->right. Setting a watchpoint on it (watch -l), I found that 40->right is being set to 5c->right (NULL) by memcpy as it should be, but then also that it is being changed to A later by if(a_l.left == b){ which is clearly impossible. I've had a watchpoint report the wrong line like this before when I was using movq instead of movb in assembly, but I'm pretty sure I have the sizes right this time because I didn't at first and it didn't make it through any swaps but I fixed it and now it makes it through around a dozen. I sanity check the tree after every operation so the error is here. Here is the simplest demonstration I could manage:

#include <stdlib.h>
#include <string.h>
#include <assert.h>

typedef struct avl_node avl_node;
struct avl_node{
    avl_node *left, *right, *parent;
    signed char balance;
    char data[];
};

avl_node *avl_root(avl_node *n){
    while(n && n->parent){
        n = n->parent;
    }
    return n;
}

inline static int avl_check_links(avl_node *n){
    if(!n)return 1;
    if(n->left){
        if(n->left->parent != n){
            return 0;
        }
        if(!avl_check_links(n->left)){
            return 0;
        }
    }
    if(n->right){
        if(n->right->parent != n){
            return 0;
        }
        if(!avl_check_links(n->right)){
            return 0;
        }
    }
    return 1;
}

void avl_swap_nodes(avl_node *a, avl_node *b, size_t size){
    avl_node a_l = *a, b_l = *b;
    char tmp[sizeof(avl_node) + size];
    memcpy(tmp, a, sizeof(avl_node) + size);
    memcpy(a, b, sizeof(avl_node) + size);
    memcpy(b, tmp, sizeof(avl_node) + size);
    if(a_l.left){
        a_l.left->parent = b;
    }
    if(a_l.right){
        a_l.right->parent = b;
    }
    if(b_l.left){
        b_l.left->parent = a;
    }
    if(b_l.right){
        b_l.right->parent = a;
    }
    if(a_l.parent){
        if(a_l.parent->left == a){
            a_l.parent->left = b;
        }else{
            a_l.parent->right = b;
        }
    }
    if(b_l.parent){
        if(b_l.parent->left == b){
            b_l.parent->left = a;
        }else{
            b_l.parent->right = a;
        }
    }
    if(a_l.parent == b){
        if(b_l.left == a){
            b->left = a_l.left;
            a->left = b;
        }else{
            b->right = a_l.right;
            a->right = b;
        }
        a->parent = b_l.parent;
        b->parent = a;
    }else if(b_l.parent == a){//GDB stops here on a watch -l a->right
        if(a_l.left == b){
            a->left = b_l.left;
            b->left = a;
        }else{
            a->right = b_l.right;
            b->right = a;
        }
        b->parent = a_l.parent;
        a->parent = b;
    }
    assert(avl_check_links(avl_root(a)));
    assert(avl_check_links(avl_root(b)));
}

int main(void){
    avl_node a, b, c, d;
    a = (avl_node){.right=&b};
    b = (avl_node){.left=&c, .right=&d, .parent=&a};
    c = (avl_node){.parent=&b};
    d = (avl_node){.parent=&b};
    assert(avl_check_links(avl_root(&a)));
    avl_swap_nodes(&b, &c, 0);
}

Why does GDB stop on the wrong line? I think it may have to do with the fact that I am using vgdb: it also skips some lines when I single step. Also why is a->right changed a second time at all? Thank you.

You can get this file to run with reasonably recent versions of gcc, gdb, and valgrind by doing gcc -g -o main main.c, valgrind --vgdb=yes --vgdb-error=0 ./main&, gdb main, tar rem | vgdb, b avl_swap_nodes, c, watch -l a->right, and then get rid of the vgdb process neatly by doing c repeatedly and then Ctrl-d or kill and then Ctrl-d.


Solution

  • I figured this out and it isn't fun so I'm going to answer my own question. The node swapping code is wrong. Here is a version that works

    #include <stddef.h>
    void avl_swap_nodes(avl_node *a, avl_node *b, size_t size){
        avl_node a_l = *a, b_l = *b;
        char tmp[offsetof(avl_node, data) + size];
        memcpy(tmp, a, offsetof(avl_node, data) + size);
        memcpy(a, b, offsetof(avl_node, data) + size);
        memcpy(b, tmp, offsetof(avl_node, data) + size);
        if(a_l.parent == b){
            if(b_l.left == a){
                a->left = b;
            }else{
                a->right = b;
            }
            b->parent = a;
            if(a->parent){
                if(a->parent->left == b){
                    a->parent->left = a;
                }else{
                    a->parent->right = a;
                }
            }
        }else if(b_l.parent == a){
            if(a_l.left == b){
                b->left = a;
            }else{
                b->right = a;
            }
            a->parent = b;
            if(b->parent){
                if(b->parent->left == a){
                    b->parent->left = b;
                }else{
                    b->parent->right = b;
                }
            }
        }else{
            if(a->parent){
                if(b->parent == a->parent){
                    if(a->parent->left == b){
                        a->parent->left = a;
                        b->parent->right = b;
                    }else{
                        a->parent->right = a;
                        b->parent->left = b;
                    }
                }else{
                    if(a->parent->left == b){
                        a->parent->left = a;
                    }else{
                        a->parent->right = a;
                    }
                }
            }
            if(b->parent && b->parent != a->parent){
                if(b->parent->left == a){
                    b->parent->left = b;
                }else{
                    b->parent->right = b;
                }
            }
        }
        if(a->left){
            a->left->parent = a;
        }
        if(a->right){
            a->right->parent = a;
        }
        if(b->left){
            b->left->parent = b;
        }
        if(b->right){
            b->right->parent = b;
        }
        ASSERT_ALL(avl_root(a));
        ASSERT_ALL(avl_root(b));
    }
    

    The reason why GDB is reporting the watchpoint on the wrong line is because a previous memory write overflows. This can happen for example when you use movq instead of movb in assembly, or when you do char a; ((int*)&a) = (int)0; in C, or when you memcpy more than you meant to. This last one is what is causing problems in my code. Consider the struct struct A{int a; char b[];);. sizeof(struct A) is probably 8 because of structure padding, but offsetof(struct A, b) is probably 4. Therefore if we calculate the size of the struct A together with the data in the flexible array at the end by adding the data size to sizeof(struct A), we will calculate a value 4 bytes greater than it should be. The solution is to use offsetof(struct A, b); instead.

    The reason why GDB is skipping lines is because I was using valgrind --vgdb=yes instead of valgrind --vgdb=full.