visual-c++b-tree

B-tree insertion problem, error: vector subscript out of range


I'm implementing a B-tree insert function.

But it gets into problems. For inserting 50 elements and below it seems to work fine, but inserting 100 elements yields the error:

vector subscript out of range

Here is the code and the debug outputs that I tried to implement in my program:

#include <iostream>
#include <vector>
#include <algorithm>
#include <queue>
#include <stack>
#include <ctime>
using namespace std;
struct Node {
    vector<int>key;
    vector<Node*>children;
    Node* parent = NULL;
    int index = 0;
    bool is_leaf()
    {
        if (children.size() == 0)
        {
            return true;
        }
        return false;
    }
    Node(const vector<int>&copyvect)
    {
        key = copyvect;
    }
    Node(int data)
    {
        key.push_back(data);
    }
    Node()
    {

    }
};
int binarySearch(vector<int>&arr, int x)
{
    int l = 0;
    int r = arr.size() - 1;
    if (arr.size() == 0)
    {
        return -1;
    }
    int mid = 0;
    while (l <= r)
    {
        mid = (l + r) / 2;
        if (arr[mid] < x)
        {
            l = mid + 1;
        }
        else if (arr[mid] > x)
        {
            r = mid - 1;
        }
        else {
            return mid;
        }
    }
    return mid;
}
Node* insert_node(Node* root, int key, int order)
{
    int minkey = order / 2 - 1;
    int maxkey = order - 1;
    int maxchild = order;

    if (root->key.size() == 0)
    {
        root->key.push_back(key);
        return root;
    }
    else
    {
        Node* p = root;
        int searchindex = 0;

        while (!p->is_leaf())
        {
            searchindex = binarySearch(p->key, key);
            cout << "searchindex: " << searchindex << endl;

            if (p->key[searchindex] < key)
            {
                p->children[searchindex + 1]->index = searchindex + 1;
                p = p->children[searchindex + 1];
            }
            else
            {
                p->children[searchindex]->index = searchindex;
                p = p->children[searchindex];
            }
        }

        int searchindex2 = binarySearch(p->key, key);
        cout << "searchindex2: " << searchindex2 << endl;

        if (p->key[searchindex2] < key)
        {
            p->key.insert(p->key.begin() + searchindex2 + 1, key);
        }
        else
        {
            p->key.insert(p->key.begin() + searchindex2, key);
        }

        while (p != NULL)
        {
            if (p->key.size() == maxkey)
            {
                int medianIndex = p->key.size() / 2;
                int mediankey = p->key[medianIndex];
                Node* newNode = new Node;
                newNode->parent = p->parent;
                newNode->key.assign(p->key.begin(), p->key.begin() + medianIndex);
                cout << "newnode key size: " << newNode->key.size() << endl;
                p->key.erase(p->key.begin(), p->key.begin() + medianIndex + 1);
                cout << "p->key size after erase: " << p->key.size() << endl;

                if (!p->is_leaf())
                {
                    newNode->children.assign(p->children.begin(), p->children.begin() + p->children.size() / 2 + 1);
                    cout << "newNode children size: " << newNode->children.size() << endl;
                    p->children.erase(p->children.begin(), p->children.begin() + p->children.size() / 2 + 1);
                    cout << "p children size after erase: " << p->children.size() << endl;
                }

                if (p->parent != NULL)
                {
                    cout << "index is: " << p->index << endl;
                    p->parent->children.insert(p->parent->children.begin() + p->index, newNode);
                    cout << "p->parent->children size: " << p->parent->children.size() << endl;
                    p->parent->key.insert(p->parent->key.begin() + p->index, mediankey);
                    cout << "p->parent->key size: " << p->parent->key.size() << endl;
                }
                else
                {
                    Node* newParent = new Node(mediankey);
                    newParent->children.push_back(newNode);
                    newParent->children.push_back(p);
                    newNode->parent = newParent;
                    p->parent = newParent;
                    return newParent;
                }
            }
            if (p->parent == NULL)
            {
                return p;
            }
            p = p->parent;
        }
        return root;
    }
}

void levelorder(Node* root)
{
    if (root == nullptr)
        return;

    queue<Node*> q;
    q.push(root);

    while (!q.empty())
    {
        int countNode = q.size();
        while (countNode > 0)
        {
            Node* current = q.front();
            q.pop();
            for (int i = 0; i < current->key.size(); i++)
            {
                cout << current->key[i] << " ";
                if (!current->is_leaf())
                {
                    q.push(current->children[i]);
                    if (i == current->key.size() - 1)
                    {
                        q.push(current->children.back());
                    }
                }
            }
            cout << " ";
            countNode--;
        }
        cout << '\n';
    }
}

int main()
{   
    srand(time(NULL));
    int n;
    cin >> n;
    Node* root = new Node;
    for (int i = 0; i < n; i++)
    {   
        root = insert_node(root, rand(), 6);
    }
    cout << '\n';
    levelorder(root);
}

I tried debugging it by printing all the things that could potentially cause this error. But upon examining it, I didn't know why it is acting like that.

Here is the output:

searchindex2: 0
searchindex2: 1
searchindex2: 2
searchindex2: 3
newnode key size: 2
p->key size after erase: 2
searchindex: 0
searchindex2: 1
searchindex: 0
searchindex2: 2
searchindex: 0
searchindex2: 0
newnode key size: 2
p->key size after erase: 2
index is: 1
p->parent->children size: 3
p->parent->key size: 2
searchindex: 1
searchindex2: 0
searchindex: 1
searchindex2: 1
searchindex: 1
searchindex2: 2
searchindex: 1
searchindex2: 2
searchindex: 1
searchindex2: 2
newnode key size: 2
p->key size after erase: 2
index is: 2
p->parent->children size: 4
p->parent->key size: 3
searchindex: 0
searchindex2: 3
newnode key size: 2
p->key size after erase: 2
index is: 1
p->parent->children size: 5
p->parent->key size: 4
searchindex: 3
searchindex2: 1
searchindex: 3
searchindex2: 1
searchindex: 0
searchindex2: 0
searchindex: 3
searchindex2: 2
searchindex: 2
searchindex2: 0
searchindex: 0
searchindex2: 1
searchindex: 3
searchindex2: 3
newnode key size: 2
p->key size after erase: 2
index is: 3
p->parent->children size: 6
p->parent->key size: 5
newnode key size: 2
p->key size after erase: 2
newNode children size: 4
p children size after erase: 2
searchindex: 0
searchindex: 1

What am I doing wrong?


Solution

  • There are several issues, but before getting into them, I would like to make some suggestions to debug more efficiently:

    Functions to help in debugging

    To make the debugging output more "visual", I would write a function that can print the tree in a way that indicates which keys appear in which nodes and which nodes are children of which other nodes. The output generated by levelOrder leaves some doubt about those aspects as soon as you get a tree with three or more layers. You get easily lost in that output. Yet you would need to see what the structure is. An easy output format is printing the tree 90° rotated, so that the root node appears at the left (column), its children at the right of it, ...etc. For instance, imagine the following output:

        98
        96
      95
        94
        89
        86
      77
        76
        75
        74
    73
        72
        71
        69
      67
        66
        64
      63
        56
        55
    50
        42
        41
      40
        24
        20
      19
        18
        17
        15
    

    So here the root node has two keys: 50 and 73. It has three children. The first child has the keys 19 and 40. The second child has keys 63 and 67, and the last child of the root has keys 77 and 95. You get the idea...

    Here is a function for that:

    // Helper function that gets a second argument that helps indenting the output
    void printTree(Node *current, string tab) {
        for (int i = current->key.size(); i >=0; i--) {
            if (!current->is_leaf()) {
                printTree(current->children[i], tab + "  ");
            }
            if (i == 0) break;
            cout << tab << current->key[i-1] << "\n";
        }
    }
    
    void printTree(Node *root) {
        cout << "TREE:\n";
        printTree(root, "");
        cout << "\n";
    }
    

    Secondly, you'll want to check the consistency of your tree after every insertion. You could check things like:

    If you call such a function at a moment that the tree should be in a consistent state, you'll detect errors early.

    Here is a function you could use for that purpose:

    #include <climits>
    
    // Helper function that gets the window of allowed keys and expected parent
    void verifyTree(Node *current, int low, int high, Node *parent) {
        // Verify parent back-reference
        if (current->parent != parent) {
            cout << "parent inconsistency at " << current->key[0] << " and its parent\n";
            exit(1);
        }
        bool isLeaf = current->is_leaf();
        int numKeys = current->key.size();
        // Verify this node has the expected number of children
        int expectedChildren = isLeaf ? 0 : numKeys + 1;
        if (expectedChildren != current->children.size()) {
            cout << "inconcisitency in number of children at parent of "
                << current->key[0] << ". Expected " << expectedChildren
                << " but got " << current->children.size() << "\n";
            exit(1);
        }
        // Verify the order of the keys
        int prevKey = low;
        for (int i = 0; i < numKeys; i++) {
            int key = current->key[i];
            if (key < prevKey) {
                cout << "wrong key order: " << prevKey << " and " << key << "\n";
                exit(1);
            }
            if (!isLeaf) {
                verifyTree(current->children[i], prevKey, key, current);
            }
            prevKey = key;
        }
        if (!isLeaf) {
            verifyTree(current->children[numKeys], prevKey, high, current);
        }
    }
    
    void verifyTree(Node *root) {
        verifyTree(root, INT_MIN, INT_MAX, nullptr);
    }
    

    Finally, to find smaller inputs for which things go wrong, don't pass 6 as value for the order parameter, but 4. And don't produce such large integers, but limited to two digits (using rand() % 100).

    With this configuration it soon turned out that there were inputs with only 9 values that would trigger an inconsistency, for example with these input values:

     6, 3, 5, 4, 1, 8, 7, 2, 9
    

    Issues in your code

    With the above functions it becomes much easier to pinpoint where things go wrong:

    These bugfixes will be enough to make it run correctly.

    Still, there are some other remarks: