I wrote a dictionary class of an avl search tree with values in leaves, but it is necessary to add immutability to it, that is, when deleting and adding anything (whether it is changing an element with an existing key or adding a new element), a new tree should be created based on an existing one. I can't figure out how to write the delete function. It should return a new tree based on the old one, but without the deleted element. I will be very grateful for any help(question of life and death)
class Node:
def __init__(self,key,value=None,left = None, right = None, inv = False):
self.key = key
self.value = value
if inv:
left,right = right,left
if left and right:
self.height = max(left.height,right.height)+1
if left:
self.height = left.height+1
if right:
self.height = right.height+1
else:
self.height = 1
self.left = left
self.right = right
class Dict:
def __init__(self):
self.root = None
self.len = 0
def __len__(self):
return self.len
def __getitem__(self,key):
if self.root:
return self.get(self.root,key)
raise KeyError()
def __contains__(self,key):
try:
self.__getitem__(key)
return True
except:
return False
def __setitem__(self,key,value):
if not self.root:
self.root = Node(key,value)
else:
self.root = self.put(self.root,key,value)
self.len+=1
def __delitem__(self,key):
if key not in self:
raise KeyError()
else:
self.root = self.delete(self.root,key)
self.len-=1
#def delete(self,tree,key):
#???
def height(self,tree): return tree.height if tree else 0
def children(self,tree, inv): return (tree.right,tree.left) if inv else (tree.left,tree.right)
def put(self,tree,key,value):
if not tree.left:
if key == tree.key:
self.len-=1
return Node(key,value,tree.left,tree.right)
elif key < tree.key:
return Node(tree.key,tree,Node(key,value),tree)
else:
return Node(tree.key, tree, tree, Node(key, value))
if tree.key == key:
self.put(tree.value,key,value)
return tree
inv = key < tree.key
left, right = self.children(tree, inv)
return self.avl(Node(tree.key, tree.value, left, self.put(right, key, value), inv), inv)
def get(self,tree,key):
if not tree.left:
if key != tree.key:
raise KeyError()
return tree.value
if key == tree.key:
return self.get(tree.value,key)
return self.get(self.children(tree, key < tree.key)[1], key)
def avl(self,tree,inv):
l,r = self.children(tree,inv)
if self.height(r) - self.height(l) < 2:
return tree
rl, rr = self.children(r, inv)
if self.height(rl) - self.height(rr) == 1:
r = self.reassoc(r, not inv)
return self.reassoc(Node(tree.key, tree.value, l, r, inv), inv)
def reassoc(self, tree, inv):
l, r = self.children(tree, inv)
rl, rr = self.children(r, inv)
return Node(r.key, r.value, Node(tree.key, tree.value, l, rl, inv), rr, inv)
def print_tree(tree, indent = ""):
if tree.right:
print_tree(tree.right, indent + " ")
if tree.left == tree.right:
print(f"{indent}{tree.key} -> {tree.value}")
else:
print(f"{indent}{tree.key}")
if tree.value is None:
raise ValueError
if tree.left:
print_tree(tree.left, indent + " ")
t = Dict()
for v, k in enumerate([5,7,2,1,3,6,2,7]):
t.__setitem__(k, v)
print_tree(t.root)
print()
The put
method needs correction:
When a leaf node with the same key is replaced, this new node does not get referenced by a new parent node. If you would get it referenced by a parent node, you would also need to adjust the value
references to that leaf.
This means that value
reference in internal nodes loses a bit of its usefulness. I would therefore go for another approach where the value
attribute of internal nodes will always be None
. When the targeted key is found in an internal node, then let the drill down continue in both left and right directions. One of these will successfully find the leaf node with that key, and the other not.
To make sure that nodes are not replaced when they occur in the subtree that does not change -- as this is an unnecessary burden for memory management -- you could write a getNode
method that would take the existing node, and the parameters for which you are about to create a new node. It would only create a node when the parameters (key, value, ...etc) are not exactly those of the node that already exists. Otherwise it will return the node as-is.
The delete operation is not too difficult. Since your tree has all the values in its leaves, apply this procedure:
None
to the parent of this node, to indicate its deletionSo, this makes a leaf from a previous parent of (two) leaves.
Another challenge is what to do with a key that is deleted, but still occurs in internal nodes. In principle this is not a problem, but for the put
algorithm it is easier if the presence of a key in an internal node is a guarantee that it exists in a leaf too. Surely it could be solved, but I went for an approach where internal keys are replaced when the corresponding key was deleted from the tree. This replacement key could bubble up from recursion too, as it will be the key of the sibling leaf, next to the deleted leaf.
Apply the avl
logic as you did with put
.
Here is your adapted code. Comments indicate where the most important changes were applied:
class Node:
def __init__(self,key, value=None, left=None, right=None, inv=False):
self.key = key
self.value = value
self.left, self.right = (right, left) if inv else (left, right)
self.height = max(left.height if left else 0, right.height if right else 0) + 1
class Dict:
def __init__(self):
self.root = None
self.len = 0
def __len__(self):
return self.len
def __getitem__(self, key):
if self.root:
return self.get(self.root, key)
raise KeyError()
def __contains__(self, key):
try:
self.__getitem__(key)
return True
except:
return False
def __setitem__(self, key, value):
self.root = self.put(self.root, key, value) if self.root else Node(key, value)
self.len += 1
def __delitem__(self, key):
if key not in self:
raise KeyError()
else:
self.root = self.delete(self.root, key)[0]
self.len -= 1
# This method intends to return a new node, except when the current node has exactly
# the same attributes: in that case it returns that node without creating a new one
# Using this is optional, but it can save on the number of nodes that is created
def getNode(self, tree, key, value, left=None, right=None, inv=False):
if inv:
left, right = right, left
if (tree.key, tree.value, tree.left, tree.right) != (key, value, left, right):
return Node(key, value, left, right)
return tree
# delete: returns the root of the given subtree after deletion, and also a new key that can be
# used in ancestor nodes that happen to have the key that was deleted. This way we guarantee
# that keys in internal nodes always correspond to a key that exist in a leaf node.
def delete(self, tree, key):
if not tree.left: # is leaf
if key == tree.key:
return None, None # indicate removal to parent, who will have to pull the sibling up
return tree, None # not found here: no removal (node will be found in an alternative path)
if key == tree.key:
# we assume this means the key is present as a leaf somewhere: try both sides
left, leftkey = self.delete(tree.left, key)
right, rightkey = self.delete(tree.right, key)
# If direct child was removed, pull up its sibling
# and use its key for ancestors that have old key
if not right:
return left, left.key
if not left:
return right, right.key
inv = leftkey is None
if inv:
left, right = right, left
treekey = newkey = leftkey
else:
inv = key > tree.key
left, right = self.children(tree, inv)
left, newkey = self.delete(left, key)
if not left: # this child was the node that was deleted
return right, right.key # pull up sibling
treekey = tree.key
return self.avl(self.getNode(tree, treekey, None, left, right, inv), inv), newkey
def height(self, tree):
return tree.height if tree else 0
def children(self, tree, inv):
return (tree.right, tree.left) if inv else (tree.left, tree.right)
def put(self, tree, key, value, onlyreplace=False):
if not tree.left: # It is a leaf
if key == tree.key: # Replacing
self.len -= 1
return self.getNode(tree, key, value)
if onlyreplace: # Not found
return tree # Don't create node
if key < tree.key:
return Node(tree.key, None, Node(key, value), tree)
return Node(tree.key, None, tree, Node(key, value))
if key == tree.key:
# we assume this means the key is present as a leaf somewhere
return self.getNode(tree, key, None, self.put(tree.left, key, value, True),
self.put(tree.right, key, value, True))
inv = key < tree.key
left, right = self.children(tree, inv)
return self.avl(self.getNode(tree, tree.key, None, left, self.put(right, key, value, onlyreplace), inv), inv)
def get(self,tree,key):
if not tree.left:
if key != tree.key:
raise KeyError()
return tree.value
if key == tree.key:
# try both sides...
try:
return self.get(tree.left, key)
except:
return self.get(tree.right, key)
return self.get(self.children(tree, key < tree.key)[1], key)
def avl(self,tree,inv):
l,r = self.children(tree,inv)
if self.height(r) - self.height(l) < 2:
return tree
rl, rr = self.children(r, inv)
if self.height(rl) - self.height(rr) == 1:
r = self.reassoc(r, not inv)
return self.reassoc(Node(tree.key, tree.value, l, r, inv), inv)
def reassoc(self, tree, inv):
l, r = self.children(tree, inv)
rl, rr = self.children(r, inv)
return Node(r.key, r.value, Node(tree.key, tree.value, l, rl, inv), rr, inv)