pythondictionaryordereddictionarylru

Python using OrderedDict for sized based caching with LRU


I am wondering how I can implement a size based LRU using OrderedDict. The part I am struggling with is to move the head of the linked list when I am calling __contains__. The following implementation is working except the __contains__ method. It leads to infinite recursion. Any ideas how I can do that?

from collections import OrderedDict

class Cache(OrderedDict):
  def __init__(self, *args, **kwds):
    self.size_limit = kwds.pop("size_limit", None)
    OrderedDict.__init__(self, *args, **kwds)
    self.val_sum = 0
    self.hit = 0
    self.num_evicted = 0
    self.total_req = 0
    self._check_size_limit()

  def __contains__(self, key):
    self.total_req += 1
    if OrderedDict.__contains__(self, key):
       self.hit += 1
       value = OrderedDict.__getitem__ (self,key)
       self.move_item_to_the_top(key, value)
       return True
    else:
       return False

  def move_item_to_the_top(self, key, value):
    OrderedDict.__setitem__(self, key, value)

  def __setitem__(self, key, value):
    OrderedDict.__setitem__(self, key, value)
    self.val_sum += value
    self._check_size_limit()

  def _check_size_limit(self):
    if self.size_limit is not None:
      while self.val_sum > self.size_limit:
        key, value = self.popitem(last=False)
        self.val_sum -= value 
        self.num_evicted += 1

  def get_cache_size(self):
    return self.val_sum

  def get_number_evicted(self):
    return self.num_evicted

  def get_hit_ratio(self):
    return 1.00 * self.hit / self.total_req

  def get_total_req(self):
    return self.total_req

  def get_hits(self):
    return self.hit

This is how I am using this:

if __name__ == "__main__":


  cache_size_B = 10
  cache = Cache(size_limit=cache_size_B)

  items = [(1,3), (2,3), (1,3), (3,4), (1,3), (5,5)]


  for item in items:

    cache_key = str(item[0])
    obj_size = item[1]
    print item

    if cache_key not in cache:
        cache[cache_key] = int(obj_size)

    print cache

Solution

  • Running your code I get the following error:

    python cache.py
    (1, 3)
    (2, 3)
    (1, 3)
    Traceback (most recent call last):
      File "cache.py", line 68, in <module>
        if cache_key not in cache:
      File "cache.py", line 20, in __contains__
        self.move_item_to_the_top(key, value)
      File "cache.py", line 26, in move_item_to_the_top
        OrderedDict.__setitem__(self, key, value)
      File "/usr/lib/python2.7/collections.py", line 75, in __setitem__
        if key not in self:
      File "cache.py", line 20, in __contains__
        self.move_item_to_the_top(key, value)
      File "cache.py", line 26, in move_item_to_the_top
        OrderedDict.__setitem__(self, key, value)
      File "/usr/lib/python2.7/collections.py", line 75, in __setitem__
        if key not in self:
    
    [...]
    
      File "cache.py", line 26, in move_item_to_the_top
        OrderedDict.__setitem__(self, key, value)
      File "/usr/lib/python2.7/collections.py", line 75, in __setitem__
        if key not in self:
      File "cache.py", line 20, in __contains__
        self.move_item_to_the_top(key, value)
      File "cache.py", line 26, in move_item_to_the_top
        OrderedDict.__setitem__(self, key, value)
    RuntimeError: maximum recursion depth exceeded in __instancecheck__
    

    Looking at line 75 of collections.py it reveals that your calling back Cache.__contains__ which leads to the infinite loop.

    You could rewrite the Cache class without overiding __contains__ and instead use Cache.__getitem__ to track access to the cache:

    from collections import OrderedDict
    
    
    class Cache(OrderedDict):
    
        def __init__(self, *args, **kwds):
            self.size_limit = kwds.pop("size_limit", None)
            OrderedDict.__init__(self, *args, **kwds)
            self.val_sum = 0
            self.hit = 0
            self.num_evicted = 0
            self.total_req = 0
            self._check_size_limit()
    
        def move_item_to_the_top(self, key, value):
            del self[key]
            OrderedDict.__setitem__(self, key, value)
    
        def __getitem__(self, key):
            self.total_req += 1
            try:
                value = OrderedDict.__getitem__(self, key)
            except KeyError:
                raise
            else:
                self.hit += 1
                self.move_item_to_the_top(key, value)
                return value
    
        def __setitem__(self, key, value):
            OrderedDict.__setitem__(self, key, value)
            self.val_sum += value
            self._check_size_limit()
    
        def _check_size_limit(self):
            if self.size_limit is not None:
                while self.val_sum > self.size_limit:
                    key, value = self.popitem(last=False)
                    self.val_sum -= value 
                    self.num_evicted += 1
    
        def get_cache_size(self):
            return self.val_sum
    
        def get_number_evicted(self):
            return self.num_evicted
    
        def get_hit_ratio(self):
            return 1.00 * self.hit / self.total_req
    
        def get_total_req(self):
            return self.total_req
    
        def get_hits(self):
            return self.hit
    
    
    if __name__ == "__main__":
        cache_size_B = 10
        cache = Cache(size_limit=cache_size_B)
    
        items = [(1,3), (2,3), (1,3), (3,4), (1,3), (5,5)]
    
        for item in items:
    
            cache_key = str(item[0])
            obj_size = item[1]
            print item
    
            try:
                cache[cache_key]
            except KeyError:
                cache[cache_key] = int(obj_size)
    
        print cache
    

    You can still use foo not in cache but this will not count as a miss or hit. If you want to count any access use the prefered syntax try/except [1] , e.g.:

    if __name__ == "__main__":
        cache_size_B = 10
        cache = Cache(size_limit=cache_size_B)
    
        items = [(1,3), (2,3), (1,3), (3,4), (1,3), (5,5)]
    
        for item in items:
    
            cache_key = str(item[0])
            obj_size = item[1]
            print item
    
            try:
                cache[cache_key]
            except KeyError:
                cache[cache_key] = int(obj_size)
    
        print cache
    

    [1] This is the prefered syntax to conditionaly do something based on the existance or not of an item in a list or dict because it requires only a single access to the container.