I am wondering how I can implement a size based LRU using OrderedDict. The part I am struggling with is to move the head of the linked list when I am calling __contains__
. The following implementation is working except the __contains__
method. It leads to infinite recursion. Any ideas how I can do that?
from collections import OrderedDict
class Cache(OrderedDict):
def __init__(self, *args, **kwds):
self.size_limit = kwds.pop("size_limit", None)
OrderedDict.__init__(self, *args, **kwds)
self.val_sum = 0
self.hit = 0
self.num_evicted = 0
self.total_req = 0
self._check_size_limit()
def __contains__(self, key):
self.total_req += 1
if OrderedDict.__contains__(self, key):
self.hit += 1
value = OrderedDict.__getitem__ (self,key)
self.move_item_to_the_top(key, value)
return True
else:
return False
def move_item_to_the_top(self, key, value):
OrderedDict.__setitem__(self, key, value)
def __setitem__(self, key, value):
OrderedDict.__setitem__(self, key, value)
self.val_sum += value
self._check_size_limit()
def _check_size_limit(self):
if self.size_limit is not None:
while self.val_sum > self.size_limit:
key, value = self.popitem(last=False)
self.val_sum -= value
self.num_evicted += 1
def get_cache_size(self):
return self.val_sum
def get_number_evicted(self):
return self.num_evicted
def get_hit_ratio(self):
return 1.00 * self.hit / self.total_req
def get_total_req(self):
return self.total_req
def get_hits(self):
return self.hit
This is how I am using this:
if __name__ == "__main__":
cache_size_B = 10
cache = Cache(size_limit=cache_size_B)
items = [(1,3), (2,3), (1,3), (3,4), (1,3), (5,5)]
for item in items:
cache_key = str(item[0])
obj_size = item[1]
print item
if cache_key not in cache:
cache[cache_key] = int(obj_size)
print cache
Running your code I get the following error:
python cache.py
(1, 3)
(2, 3)
(1, 3)
Traceback (most recent call last):
File "cache.py", line 68, in <module>
if cache_key not in cache:
File "cache.py", line 20, in __contains__
self.move_item_to_the_top(key, value)
File "cache.py", line 26, in move_item_to_the_top
OrderedDict.__setitem__(self, key, value)
File "/usr/lib/python2.7/collections.py", line 75, in __setitem__
if key not in self:
File "cache.py", line 20, in __contains__
self.move_item_to_the_top(key, value)
File "cache.py", line 26, in move_item_to_the_top
OrderedDict.__setitem__(self, key, value)
File "/usr/lib/python2.7/collections.py", line 75, in __setitem__
if key not in self:
[...]
File "cache.py", line 26, in move_item_to_the_top
OrderedDict.__setitem__(self, key, value)
File "/usr/lib/python2.7/collections.py", line 75, in __setitem__
if key not in self:
File "cache.py", line 20, in __contains__
self.move_item_to_the_top(key, value)
File "cache.py", line 26, in move_item_to_the_top
OrderedDict.__setitem__(self, key, value)
RuntimeError: maximum recursion depth exceeded in __instancecheck__
Looking at line 75 of collections.py
it reveals that your calling back Cache.__contains__
which leads to the infinite loop.
You could rewrite the Cache
class without overiding __contains__
and instead use Cache.__getitem__
to track access to the cache:
from collections import OrderedDict
class Cache(OrderedDict):
def __init__(self, *args, **kwds):
self.size_limit = kwds.pop("size_limit", None)
OrderedDict.__init__(self, *args, **kwds)
self.val_sum = 0
self.hit = 0
self.num_evicted = 0
self.total_req = 0
self._check_size_limit()
def move_item_to_the_top(self, key, value):
del self[key]
OrderedDict.__setitem__(self, key, value)
def __getitem__(self, key):
self.total_req += 1
try:
value = OrderedDict.__getitem__(self, key)
except KeyError:
raise
else:
self.hit += 1
self.move_item_to_the_top(key, value)
return value
def __setitem__(self, key, value):
OrderedDict.__setitem__(self, key, value)
self.val_sum += value
self._check_size_limit()
def _check_size_limit(self):
if self.size_limit is not None:
while self.val_sum > self.size_limit:
key, value = self.popitem(last=False)
self.val_sum -= value
self.num_evicted += 1
def get_cache_size(self):
return self.val_sum
def get_number_evicted(self):
return self.num_evicted
def get_hit_ratio(self):
return 1.00 * self.hit / self.total_req
def get_total_req(self):
return self.total_req
def get_hits(self):
return self.hit
if __name__ == "__main__":
cache_size_B = 10
cache = Cache(size_limit=cache_size_B)
items = [(1,3), (2,3), (1,3), (3,4), (1,3), (5,5)]
for item in items:
cache_key = str(item[0])
obj_size = item[1]
print item
try:
cache[cache_key]
except KeyError:
cache[cache_key] = int(obj_size)
print cache
You can still use foo not in cache
but this will not count as a miss or hit. If you want to count any access use the prefered syntax try/except
[1] , e.g.:
if __name__ == "__main__":
cache_size_B = 10
cache = Cache(size_limit=cache_size_B)
items = [(1,3), (2,3), (1,3), (3,4), (1,3), (5,5)]
for item in items:
cache_key = str(item[0])
obj_size = item[1]
print item
try:
cache[cache_key]
except KeyError:
cache[cache_key] = int(obj_size)
print cache
[1] This is the prefered syntax to conditionaly do something based on the existance or not of an item in a list or dict because it requires only a single access to the container.