There was a task in school: to implement a dictionary based on an AVL-balanced search tree, in which data is stored in leaves.
I wrote a dictionary class: an AVL-balanced search tree in which values are stored in nodes. How can I correct it correctly so that the values are stored only in the leaves? I would appreciate any help.
class Node:
def __init__(self, key, value=None, left=None, right=None, inv=False):
if inv:
left, right = right, left
self.key = key
self.value = value
self.left = left
self.right = right
self.exist = 1
if left is not None and right is not None:
self.height = 1 + max(self.left.height, self.right.height)
elif left is not None:
self.height = 1 + self.left.height
elif right is not None:
self.height = 1 + self.right.height
else:
self.height = 1
class Dict:
def __init__(self):
self.root = None
self.len = 0
def __len__(self):
return self.len
def __getitem__(self, key):
if not self.root:
raise KeyError()
else:
return self.get(self.root, key)
def __contains__(self, key):
try:
self.__getitem__(self.root, key)
return True
except:
return False
def __setitem__(self, key, value):
if not self.root:
self.root = Node(key, value)
else:
self.root = self.put(self.root, key, value)
self.len += 1
def __delitem__(self, key):
if key not in self:
raise KeyError()
else:
self.delete(self.root, key)
self.len -= 1
def delete(self, tree, key):
if key == tree.key:
tree.exist = 0
return
return self.delete(self.children(tree, key < tree.key)[1], key)
def height(self, tree): return 0 if tree is None else tree.height
def children(self, tree, inv): return (tree.right, tree.left) if inv else (tree.left, tree.right)
def reassoc(self, tree, inv):
l, r = self.children(tree, inv)
rl, rr = self.children(r, inv)
return Node(r.key, r.value, Node(tree.key, tree.value, l, rl, inv), rr, inv)
def avl(self, tree, inv):
l, r = self.children(tree, inv)
if self.height(r) - self.height(l) < 2:
return tree
rl, rr = self.children(r, inv)
if self.height(rl) - self.height(rr) == 1:
r = self.reassoc(r, not inv)
return self.reassoc(Node(tree.key, tree.value, l, r, inv), inv)
def put(self, tree, key, value):
if tree is None:
return Node(key, value, None, None)
if tree.key == key:
self.len -= 1
if tree.exist==0:self.len+=1
return Node(key, value, tree.left, tree.right)
inv = key < tree.key
left, right = self.children(tree, inv)
return self.avl(Node(tree.key, tree.value, left,
self.put(right, key, value), inv), inv)
def get(self, tree, key):
if tree is None:
raise KeyError()
if key == tree.key and tree.exist == 0:
raise KeyError()
elif key == tree.key and tree.exist != 0:
return tree.value
return self.get(self.children(tree, key < tree.key)[1], key)
def print_tree(tree, indent = 0):
if tree == None: print()
print(' '*indent + str(tree.key) + ' -> ' + str(tree.value))
if tree.left: print_tree(tree.left, indent + 2)
if tree.right: print_tree(tree.right, indent + 2)
t = Dict()
for v, k in enumerate([5,7,2,1,3,6,2,7]):
t.__setitem__(k, v)
print_tree(t.root)
print()
To have the values in the leaves, you would first find the leaf node where the insertion should happen, and replace it with a value-less parent with two children: the original leaf, and the new node.
This means that the tree will always be full, i.e. all internal nodes have two children, never one. Rotations -- when needed to rebalance the tree -- will not affect this property.
There is a slight challenge however with finding the value for a key, as now the key could be found in an internal node, and it could be unclear in which subtree the leaf with that key should be found. Although the insertion algorithm could initially agree to always put such a leaf in the left subtree, rotations will not maintain this property, so that in general the targeted leaf could be in either subtree.
There are several ways to solve this. One naive way is to add a dictionary to your tree that maps each key to a node. This would make several methods like __contains__
, __getitem__
and __delitem__
quite efficient as they don't actually need to traverse the tree. But if this is not an acceptable solution, then maybe it would be acceptable to reuse the value
attribute of internal nodes, as they no longer have a use for this attribute. Instead of holding a value, they could have a reference to the leaf node having the same key. For this to work, it is essential that leaf nodes are never discarded (as being replaced by a new leaf for the same key) but are updated with the new value.
Here is an implementation of put
that realises this idea:
def put(self, tree, key, value):
# Make a leaf node the base case for the recursion pattern
if not tree.left: # Is leaf? (Note that each node has 2 or no children)
if key == tree.key:
self.len -= tree.exist
# Mutate leaf so references to it remain valid
tree.exist = 1
tree.value = value
return tree
# Otherwise this leaf node becomes an internal node:
# Set its value attribute to reference the newly created leaf
elif key < tree.key:
return Node(tree.key, tree, Node(key, value), tree)
else:
return Node(tree.key, tree, tree, Node(key, value))
if tree.key == key: # Key matches internal node's key
# Follow the internal node's reference to the leaf having the same key
self.put(tree.value, key, value)
return tree
inv = key < tree.key
left, right = self.children(tree, inv)
return self.avl(Node(tree.key, tree.value, left,
self.put(right, key, value), inv), inv)
Similarly, get
needs to be updated so it uses that internal reference:
def get(self, tree, key):
if not tree.left: # It is a leaf
if key != tree.key or not tree.exist:
raise KeyError()
return tree.value
if key == tree.key:
# Follow the internal node's reference to leaf
return self.get(tree.value, key)
return self.get(self.children(tree, key < tree.key)[1], key)
And you would want to avoid that the value attribute of internal nodes is printed by the print_tree
function. I personally prefer an in-order traversal so that the output looks like a 90° rotated tree:
def print_tree(tree, indent = ""):
if tree.right:
print_tree(tree.right, indent + " ")
if tree.left == tree.right: # Is leaf
print(f"{indent}{tree.key} -> {tree.value}")
else: # Is internal (then don't print value attribute)
print(f"{indent}{tree.key}")
if tree.value is None:
raise ValueError("unexpected None value")
if tree.left:
print_tree(tree.left, indent + " ")