cdata-structuresbinary-treeavl-treeinsertion

Debugging an erroneous AVL tree 'insert' operation


I am attempting to create an AVL tree data structure which can handle duplicate element keys. I have based my insertion algorithm on the following: https://www.sanfoundry.com/c-program-implement-avl-tree/#google_vignette.

However, my avl_tree_insert implementation cannot seem to handle certain duplicate keys, and I do not know why; basically, I have determined that the insertion works unless all the following conditions are met* (*This comes from my own understanding of the checks performed by my code, combined with the debug output. It may be faulty logic, as noted by at least one comment. Read the code for actual details):

  1. The key being inserted already exists within the tree.
  2. The root node has null left and right child fields.
  3. The root node is unbalanced (meaning a right rotation on the root node is required).
  4. The key being inserted is greater than or equal to the key to the left of the root (meaning a left rotation on the left node is required).

When the code checks for 4, a left rotation on the left node is invoked, which crashes because of a null pointer dereference (because ( *root ).left and ( *root ).right are null, see 2).

What confuses me is how to figure out how this tree state actually comes about, as well as how to fix it; for example, invoking avl_tree_insert with random keys usually works hundreds or thousands of times, for many different randomized states of the tree, but then suddenly the tree reaches a state like the one described above, and everything fails abruptly; this is what I do not know how to debug.

Below is some test driver code which will cause a crash using avl_tree_insert operations. I attempted to include all relevant parts of the tree data structure and insertion algorithm below, while omitting anything extraneous, and also included some debug print statements.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

// Adjust for your hardware.
typedef unsigned long long int  u64;
typedef signed long long int    i64;
typedef signed short int        i16;

/** @brief Type and instance definitions for generic boolean type. */
typedef _Bool bool;
#define true    ( ( bool ) 1 )
#define false   ( ( bool ) 0 )

/** @brief Type definition for an AVL tree node. */
typedef struct node_t
{
    i64             key;
    u64             depth;

    struct node_t*  left;
    struct node_t*  right;

    void*           content;
}
node_t;

/** @brief Type definition for an AVL tree. */
typedef struct
{
    u64         element_width;
    u64         element_count;

    u64         node_count;
    node_t*     root;
}
avl_tree_t;

// Global op count.
static u64 op_count = 0;

// Global debug flag.
static bool debug_flag;

bool
avl_tree_create
(   u64             element_width
,   avl_tree_t**    tree_
)
{
    if ( !element_width )
    {
        printf ( "avl_tree_create: Value of element_width argument must be non-zero.\n" );
        return false;
    }
    if ( !tree_ )
    {
        printf ( "avl_tree_create: Missing argument: tree (output buffer).\n" );
        return false;
    }

    avl_tree_t* tree = malloc ( sizeof ( avl_tree_t ) );
    tree->element_width = element_width;
    tree->element_count = 0;
    tree->node_count = 0;
    tree->root = 0;
    
    *tree_ = tree;
    return true;
}

u64
avl_tree_recompute_depth
(   node_t* root
)
{
    u64 left;
    u64 right;
    
    if ( !root )
    {
        return 0;
    }

    if ( !root->left )
    {
        left = 0;
    }
    else
    {
        left = 1 + root->left->depth;
    }

    if ( !root->right )
    {
        right = 0;
    }
    else
    {
        right = 1 + root->right->depth;
    }

    return ( left > right ) ? left : right;
}

i64
avl_tree_balance_factor
(   node_t* root
)
{
    u64 left;
    u64 right;
    
    if ( !root )
    {
        return 0;
    }

    if ( !root->left )
    {
        left = 0;
    }
    else
    {
        left = 1 + root->left->depth;
    }

    if ( !root->right )
    {
        right = 0;
    }
    else
    {
        right = 1 + root->right->depth;
    }

    return left - right;
}

node_t*
avl_tree_rotate_left
(   node_t* root
)
{
    // Rotate left.
    node_t* right = root->right;
    if ( !right ) printf ( "Failure on append #%llu. No node to the right of %i.\n" , op_count , root->key );
    root->right = right->left;
    right->left = root;

    // Update depth.
    root->depth = avl_tree_recompute_depth ( root );
    right->depth = avl_tree_recompute_depth ( right );

    return right;
}

node_t*
avl_tree_rotate_right
(   node_t* root
)
{
    // Rotate right.
    node_t* left = root->left;
    if ( !left ) printf ( "Failure on append #%llu. No node to the left of %i.\n" , op_count , root->key );
    root->left = left->right;
    left->right = root;

    // Update depth.
    root->depth = avl_tree_recompute_depth ( root );
    left->depth = avl_tree_recompute_depth ( left );

    return left;
}

node_t*
_avl_tree_insert
(   avl_tree_t* tree
,   node_t*     root
,   const void* src
,   const i64   key
)
{
    // CASE: End of branch.
    if ( !root )
    {
        // Initialize a new element node.
        node_t* node = malloc ( sizeof ( node_t ) + tree->element_width );
        memset ( node , 0 , sizeof ( node_t ) );
        ( *node ).key = key;
        ( *node ).content = ( void* )( ( ( u64 ) node ) + sizeof ( node_t ) );
        memcpy ( node->content , src , tree->element_width );

        // Insert the node into the tree.
        root = node;

        // Update state.
        tree->node_count += 1;
        tree->element_count += 1;
    }

    // CASE: Key > root key (recurse right).
    else if ( key > root->key )
    {
        root->right = _avl_tree_insert ( tree
                                       , root->right
                                       , src
                                       , key
                                       );

        // Rebalance? Y/N
        if ( avl_tree_balance_factor ( root ) < -1 )
        {
            if ( key <= root->right->key )
            {
                root->right = avl_tree_rotate_right ( root->right );
            }
            root = avl_tree_rotate_left ( root );
        }
    }

    // CASE: Key <= root key (recurse left).
    else
    {
        if ( key == root->key )
        {
            debug_flag = true;
            printf ( "Keys match: %i, left key: %i, right key: %i\n"
                   , key
                   , ( root->left ? root->left->key : -1 )
                   , ( root->right ? root->right->key : -1 )
                   );
        }
        root->left = _avl_tree_insert ( tree
                                      , root->left
                                      , src
                                      , key
                                      );

        // Rebalance? Y/N
        if ( avl_tree_balance_factor ( root ) > 1 )
        {
            if ( key >= root->left->key )
            {
                if ( debug_flag ) printf ("Rotate left required.\n" );
                root->left = avl_tree_rotate_left ( root->left );
            }
            root = avl_tree_rotate_right ( root );
        }
    }
    
    root->depth = avl_tree_recompute_depth ( root );
    return root;
}

void
avl_tree_insert
(   avl_tree_t* tree
,   const void* src
,   const i64   key
)
{
    debug_flag = false;
    tree->root = _avl_tree_insert ( tree
                                  , tree->root
                                  , src
                                  , key
                                  );
}

// Test driver.
int
main
( void )
{
    srand ( time ( 0 ) );

    avl_tree_t* tree;
    avl_tree_create ( sizeof ( i16 ) , &tree );

    for ( u64 i = 0; i < 1000000; ++i )
    {
        i16 key;
        do
        {
            key = rand ();
        }
        while ( key == -1 );
        op_count += 1;
        avl_tree_insert ( tree , &key , key ); // Value can just be same as key here.
    }
    printf ( "Num elements in tree: %llu\n" , tree->element_count );

    return 0;
}

Sample output (-1 indicates null key):

Keys match: 2214, left key: -1, right key: -1
Keys match: 5145, left key: -1, right key: -1
Keys match: 7141, left key: -1, right key: -1
Keys match: 16453, left key: -1, right key: -1
Keys match: 15365, left key: -1, right key: -1
Keys match: 22779, left key: 22764, right key: 22816
Keys match: 11855, left key: -1, right key: -1
Rotate left required.
Failure on append #857. No node to the right of 11855.
// (OS crashes the program)

Solution

  • This could be reproduced with inserting just the following three values: 2, 1, 1.

    The immediate cause of the problem is with how you determine that a double rotation is needed. The condition you have for performing a right-then-left rotation is:

    if ( key <= ( *( ( *root ).right ) ).key )
    

    ...and for the mirror case you have:

    if ( key >= ( *( ( *root ).left ) ).key )
    

    That is wrong. The condition for a double rotation is the presence of an inner grandchild. So correct these two conditions with the following:

    if (root->right->left)
    

    and

    if (root->left->right)
    

    I didn't test your code beyond this point, but this was at least the cause of the problem you encountered.