pythonheapmax-heap

Python MaxHeap Bubble_Down method


I am trying to write a bubble_down method for a MaxHeap class and am having trouble with, what looks like, indexing problems. I think what is mainly confusing me, is that it seems like the indexing in this problem is having to start at 1, which doesn't make sense on how that works if python indexing starts with 0. This is the code for the class...

class MaxHeap:
    def __init__(self):
        self.H = [None]
        
    def size(self):
        return len(self.H)-1
    
    def __repr__(self):
        return str(self.H[1:])
        
    def satisfies_assertions(self):
        for i in range(2, len(self.H)):
            assert self.H[i] <= self.H[i//2],  f'Maxheap property fails at position {i//2}, parent elt: {self.H[i//2]}, child elt: {self.H[i]}'
    
    def max_element(self):
        return self.H[1]
    
    def bubble_up(self, index):
        # your code here
        assert index >= 1
        if index == 1: 
            return 
        parent_index = index // 2
        if self.H[parent_index] > self.H[index]:
            return 
        else:
            self.H[parent_index], self.H[index] = self.H[index], self.H[parent_index]
            self.bubble_up(parent_index)
            
    
    def bubble_down(self, index):
        assert index >= 1 and index < len(self.H)
        lchild_index = 2 * index
        rchild_index = 2 * index + 1
        # set up the value of left child to the element at that index if valid, or else make it +Infinity
        lchild_value = self.H[lchild_index] if lchild_index < len(self.H) else float('inf')
        # set up the value of right child to the element at that index if valid, or else make it +Infinity
        rchild_value = self.H[rchild_index] if rchild_index < len(self.H) else float('inf')
        
        if self.H[index] >= max(lchild_value, rchild_value):
            return 
        # Otherwise, find the index and value of the greater of the two children.
         
        max_child_value, max_child_index = max ((lchild_value, lchild_index), (rchild_value, rchild_index))
        # Swap the current index with the max of its two children
        self.H[index], self.H[max_child_index] = self.H[max_child_index], self.H[index]
        # Bubble down on the maximum child index
        self.bubble_down(max_child_index)
        
        
    
    # Function: insert
    # Insert elt into minheap
    # Use bubble_up/bubble_down function
    def insert(self, elt):
        # your code here
        self.H.append(elt)
        self.bubble_up(self.size())
        
    # Function: delete_max
    # delete the largest element in the heap. Use bubble_up/bubble_down
    def delete_max(self):
        # your code here
        if self.size() > 1:
            max_index = self.H.index(self.max_element())
            self.H[max_index] = self.H[self.size()]
            del self.H[-1]
            self.bubble_down(1)
        else:
            self.H = []

Here is the code for testing....

h = MaxHeap()
print('Inserting: 5, 2, 4, -1 and 7 in that order.')
h.insert(5)
print(f'\t Heap = {h}')
assert(h.max_element() == 5)
h.insert(2)
print(f'\t Heap = {h}')
assert(h.max_element() == 5)
h.insert(4)
print(f'\t Heap = {h}')
assert(h.max_element() == 5)
h.insert(-1)
print(f'\t Heap = {h}')
assert(h.max_element() == 5)
h.insert(7)
print(f'\t Heap = {h}')
assert(h.max_element() == 7)
h.satisfies_assertions()

print('Deleting maximum element')
h.delete_max()
print(f'\t Heap = {h}')
assert(h.max_element() == 5)
h.delete_max()
print(f'\t Heap = {h}')
assert(h.max_element() == 4)
h.delete_max()
print(f'\t Heap = {h}')
assert(h.max_element() == 2)
h.delete_max()
print(f'\t Heap = {h}')
assert(h.max_element() == -1)
# Test delete_max on heap of size 1, should result in empty heap.
h.delete_max()
print(f'\t Heap = {h}')
print('All tests passed')

The error i'm getting is an index error...

IndexError                                Traceback (most recent call last)
<ipython-input-18-93fd0a645314> in <module>
     19 
     20 print('Deleting maximum element')
---> 21 h.delete_max()
     22 print(f'\t Heap = {h}')
     23 assert(h.max_element() == 5)

<ipython-input-17-b6b6738e88d1> in delete_max(self)
     66             self.H[max_index] = self.H[self.size()]
     67             del self.H[-1]
---> 68             self.bubble_down(1)
     69         else:
     70             self.H = []

<ipython-input-17-b6b6738e88d1> in bubble_down(self, index)
     46         self.H[index], self.H[max_child_index] = self.H[max_child_index], self.H[index]
     47         # Bubble down on the minimum child index
---> 48         self.bubble_down(max_child_index)
     49 
     50 

<ipython-input-17-b6b6738e88d1> in bubble_down(self, index)
     44         max_child_value, max_child_index = max ((lchild_value, lchild_index), (rchild_value, rchild_index))
     45         # Swap the current index with the least of its two children
---> 46         self.H[index], self.H[max_child_index] = self.H[max_child_index], self.H[index]
     47         # Bubble down on the minimum child index
     48         self.bubble_down(max_child_index)

IndexError: list index out of range

Solution

  • The cause of the problem is in these lines:

    lchild_value = self.H[lchild_index] if lchild_index < len(self.H) else float('inf')
    rchild_value = self.H[rchild_index] if rchild_index < len(self.H) else float('inf')
    

    If a node does not have both children, then this code will use Infinity as default value, but then the next if can never be true:

    if self.H[index] >= max(lchild_value, rchild_value):
        return 
    

    That is wrong.

    This can be fixed by using -Infinity instead:

    lchild_value = self.H[lchild_index] if lchild_index < len(self.H) else float('-inf')
    rchild_value = self.H[rchild_index] if rchild_index < len(self.H) else float('-inf')
    

    Other remarks

    I didn't check all your code, and only focused on the problem at hand, but I couldn't help noticing two issues in delete_max:

    First, there is a useless call if index. This method should not be used in a heap, as it has O(n) time complexity and would kill all the performance benefits that a heap has to offer. Moreover, the index of the max element is known. So change this:

    max_index = self.H.index(self.max_element())
    

    to:

    max_index = 1
    

    Secondly, the else block resets the H list to an empty list, but that is wrong. You always need to keep that first dummy element at index 0. So change:

    else:
        self.H = []
    

    to:

    else:
        self.H = [None]