I am trying to swap two nodes A
and B
in a binary tree so that the places they are actually stored in memory change but the tree topology is not changed. I added special handling for swapping a node with its parent, but it still seems that it doesn't work. I'm using Valgrind with vgdb so that I can catch memory errors and also get consistent addresses. If I have a tree like
78
\
40
/ \
5c c5
And then I try to swap A=40
and B=5c
, the links get messed up. Specifically, 40->right
. Setting a watchpoint on it (watch -l
), I found that 40->right
is being set to 5c->right
(NULL
) by memcpy
as it should be, but then also that it is being changed to A
later by if(a_l.left == b){
which is clearly impossible. I've had a watchpoint report the wrong line like this before when I was using movq
instead of movb
in assembly, but I'm pretty sure I have the sizes right this time because I didn't at first and it didn't make it through any swaps but I fixed it and now it makes it through around a dozen. I sanity check the tree after every operation so the error is here. Here is the simplest demonstration I could manage:
#include <stdlib.h>
#include <string.h>
#include <assert.h>
typedef struct avl_node avl_node;
struct avl_node{
avl_node *left, *right, *parent;
signed char balance;
char data[];
};
avl_node *avl_root(avl_node *n){
while(n && n->parent){
n = n->parent;
}
return n;
}
inline static int avl_check_links(avl_node *n){
if(!n)return 1;
if(n->left){
if(n->left->parent != n){
return 0;
}
if(!avl_check_links(n->left)){
return 0;
}
}
if(n->right){
if(n->right->parent != n){
return 0;
}
if(!avl_check_links(n->right)){
return 0;
}
}
return 1;
}
void avl_swap_nodes(avl_node *a, avl_node *b, size_t size){
avl_node a_l = *a, b_l = *b;
char tmp[sizeof(avl_node) + size];
memcpy(tmp, a, sizeof(avl_node) + size);
memcpy(a, b, sizeof(avl_node) + size);
memcpy(b, tmp, sizeof(avl_node) + size);
if(a_l.left){
a_l.left->parent = b;
}
if(a_l.right){
a_l.right->parent = b;
}
if(b_l.left){
b_l.left->parent = a;
}
if(b_l.right){
b_l.right->parent = a;
}
if(a_l.parent){
if(a_l.parent->left == a){
a_l.parent->left = b;
}else{
a_l.parent->right = b;
}
}
if(b_l.parent){
if(b_l.parent->left == b){
b_l.parent->left = a;
}else{
b_l.parent->right = a;
}
}
if(a_l.parent == b){
if(b_l.left == a){
b->left = a_l.left;
a->left = b;
}else{
b->right = a_l.right;
a->right = b;
}
a->parent = b_l.parent;
b->parent = a;
}else if(b_l.parent == a){//GDB stops here on a watch -l a->right
if(a_l.left == b){
a->left = b_l.left;
b->left = a;
}else{
a->right = b_l.right;
b->right = a;
}
b->parent = a_l.parent;
a->parent = b;
}
assert(avl_check_links(avl_root(a)));
assert(avl_check_links(avl_root(b)));
}
int main(void){
avl_node a, b, c, d;
a = (avl_node){.right=&b};
b = (avl_node){.left=&c, .right=&d, .parent=&a};
c = (avl_node){.parent=&b};
d = (avl_node){.parent=&b};
assert(avl_check_links(avl_root(&a)));
avl_swap_nodes(&b, &c, 0);
}
Why does GDB stop on the wrong line? I think it may have to do with the fact that I am using vgdb: it also skips some lines when I single step. Also why is a->right
changed a second time at all? Thank you.
You can get this file to run with reasonably recent versions of gcc, gdb, and valgrind by doing gcc -g -o main main.c
, valgrind --vgdb=yes --vgdb-error=0 ./main&
, gdb main
, tar rem | vgdb
, b avl_swap_nodes
, c
, watch -l a->right
, and then get rid of the vgdb process neatly by doing c
repeatedly and then Ctrl-d
or kill
and then Ctrl-d
.
I figured this out and it isn't fun so I'm going to answer my own question. The node swapping code is wrong. Here is a version that works
#include <stddef.h>
void avl_swap_nodes(avl_node *a, avl_node *b, size_t size){
avl_node a_l = *a, b_l = *b;
char tmp[offsetof(avl_node, data) + size];
memcpy(tmp, a, offsetof(avl_node, data) + size);
memcpy(a, b, offsetof(avl_node, data) + size);
memcpy(b, tmp, offsetof(avl_node, data) + size);
if(a_l.parent == b){
if(b_l.left == a){
a->left = b;
}else{
a->right = b;
}
b->parent = a;
if(a->parent){
if(a->parent->left == b){
a->parent->left = a;
}else{
a->parent->right = a;
}
}
}else if(b_l.parent == a){
if(a_l.left == b){
b->left = a;
}else{
b->right = a;
}
a->parent = b;
if(b->parent){
if(b->parent->left == a){
b->parent->left = b;
}else{
b->parent->right = b;
}
}
}else{
if(a->parent){
if(b->parent == a->parent){
if(a->parent->left == b){
a->parent->left = a;
b->parent->right = b;
}else{
a->parent->right = a;
b->parent->left = b;
}
}else{
if(a->parent->left == b){
a->parent->left = a;
}else{
a->parent->right = a;
}
}
}
if(b->parent && b->parent != a->parent){
if(b->parent->left == a){
b->parent->left = b;
}else{
b->parent->right = b;
}
}
}
if(a->left){
a->left->parent = a;
}
if(a->right){
a->right->parent = a;
}
if(b->left){
b->left->parent = b;
}
if(b->right){
b->right->parent = b;
}
ASSERT_ALL(avl_root(a));
ASSERT_ALL(avl_root(b));
}
The reason why GDB is reporting the watchpoint on the wrong line is because a previous memory write overflows. This can happen for example when you use movq
instead of movb
in assembly, or when you do char a; ((int*)&a) = (int)0;
in C, or when you memcpy
more than you meant to. This last one is what is causing problems in my code. Consider the struct struct A{int a; char b[];);
. sizeof(struct A)
is probably 8 because of structure padding, but offsetof(struct A, b)
is probably 4. Therefore if we calculate the size of the struct A
together with the data in the flexible array at the end by adding the data size to sizeof(struct A)
, we will calculate a value 4 bytes greater than it should be. The solution is to use offsetof(struct A, b);
instead.
The reason why GDB is skipping lines is because I was using valgrind --vgdb=yes
instead of valgrind --vgdb=full
.