Rustaceans. when I start to write a BloomFilter example in rust. I found I have serveral problems have to solve. I struggle to solve them but no progress in a day. I need help, any suggestion will help me a lot, Thanks.
// let bits = self.hash(value); // how to solve such lifetime error without use 'static storage?
// Below is a workaround code but need to computed in advanced.
let bits = Box::new(self.hash(value).collect::<Vec<u64>>().into_iter());
self.0.set(bits);
bloom_filter
?// cyclic-dependency:
// RedisCache -> BloomFilter -> Storage
// | ^
// ------------<impl>------------
//
// v--- cache ownership has moved here
let filter = BloomFilter::by(Box::new(cache));
cache.1.replace(filter);
null
value, How can I solve the cyclic-dependency initialization without any stubs?let mut cache = RedisCache(
Client::open("redis://localhost").unwrap(),
// I found can use Weak::new() to solve it,but need to downgrade a Rc reference.
// v-- need a BloomFilter stub to create RedisCache
RefCell::new(BloomFilter::new()),
);
#![allow(unused)]
mod bloom_filter {
use std::{hash::Hash, marker::PhantomData};
pub type BitsIter = Box<dyn Iterator<Item = u64>>;
pub trait Storage {
fn set(&mut self, bits: BitsIter);
fn contains_all(&self, bits: BitsIter) -> bool;
}
pub struct BloomFilter<T: Hash>(Box<dyn Storage>, PhantomData<T>);
impl<T: Hash> BloomFilter<T> {
pub fn new() -> BloomFilter<T> {
return Self::by(Box::new(ArrayStorage([0; 5000])));
struct ArrayStorage<const N: usize>([u8; N]);
impl<const N: usize> Storage for ArrayStorage<N> {
fn set(&mut self, bits: BitsIter) {
let size = self.0.len() as u64;
bits.map(|bit| (bit % size) as usize)
.for_each(|index| self.0[index] = 1);
}
fn contains_all(&self, bits: BitsIter) -> bool {
let size = self.0.len() as u64;
bits.map(|bit| (bit % size) as usize)
.all(|index| self.0[index] == 1)
}
}
}
pub fn by(storage: Box<dyn Storage>) -> BloomFilter<T> {
BloomFilter(storage, PhantomData)
}
pub fn add(&mut self, value: T) {
// let bits = self.hash(value); // how to solve such lifetime error?
let bits = Box::new(self.hash(value).collect::<Vec<u64>>().into_iter());
self.0.set(bits);
}
pub fn contains(&self, value: T) -> bool {
// lifetime problem same as Self::add(T)
let bits = Box::new(self.hash(value).collect::<Vec<u64>>().into_iter());
self.0.contains_all(bits)
}
fn hash<'a, H: Hash + 'a>(&self, _value: H) -> Box<dyn Iterator<Item = u64> + 'a> {
todo!()
}
}
}
mod spi {
use super::bloom_filter::*;
use redis::{Client, Commands, RedisResult};
use std::{
cell::RefCell,
rc::{Rc, Weak},
};
pub struct RedisCache<'a>(Client, RefCell<BloomFilter<&'a str>>);
impl<'a> RedisCache<'a> {
pub fn new() -> RedisCache<'a> {
let mut cache = RedisCache(
Client::open("redis://localhost").unwrap(),
// v-- need a BloomFilter stub to create RedisCache
RefCell::new(BloomFilter::new()),
);
// v--- cache ownership has moved here
let filter = BloomFilter::by(Box::new(cache));
cache.1.replace(filter);
return cache;
}
pub fn get(&mut self, key: &str, load_value: fn() -> Option<String>) -> Option<String> {
let filter = self.1.borrow();
if filter.contains(key) {
if let Ok(value) = self.0.get::<&str, String>(key) {
return Some(value);
}
if let Some(actual_value) = load_value() {
let _: () = self.0.set(key, &actual_value).unwrap();
return Some(actual_value);
}
}
return None;
}
}
impl<'a> Storage for RedisCache<'a> {
fn set(&mut self, bits: BitsIter) {
todo!()
}
fn contains_all(&self, bits: BitsIter) -> bool {
todo!()
}
}
}
First, thanks @Colonel Thirty Two give me a lot of information that I haven't mastered and help me fixed the problem of the iterator lifetime.
The cyclic-dependency I have solved by break the responsibility of the Storage
into another struct RedisStorage
without modify the bloom_filter
module, but make the example bloated. Below is their relationships:
RedisCache -> BloomFilter -> Storage <---------------
| |
|-------> redis::Client <- RedisStorage ---<impl>---
I realized the ownership & lifetime system is not only used by borrow checker, but also Rustaceans need a bigger front design to obey the rules than in a GC language, e.g: java. Am I right?
mod bloom_filter {
use std::{
hash::{Hash, Hasher},
marker::PhantomData,
};
pub type BitsIter<'a> = Box<dyn Iterator<Item = u64> + 'a>;
pub trait Storage {
fn set(&mut self, bits: BitsIter);
fn contains_all(&self, bits: BitsIter) -> bool;
}
pub struct BloomFilter<T: Hash>(Box<dyn Storage>, PhantomData<T>);
impl<T: Hash> BloomFilter<T> {
#[allow(unused)]
pub fn new() -> BloomFilter<T> {
return Self::by(Box::new(ArrayStorage([0; 5000])));
struct ArrayStorage<const N: usize>([u8; N]);
impl<const N: usize> Storage for ArrayStorage<N> {
fn set(&mut self, bits: BitsIter) {
let size = self.0.len() as u64;
bits.map(|bit| (bit % size) as usize)
.for_each(|index| self.0[index] = 1);
}
fn contains_all(&self, bits: BitsIter) -> bool {
let size = self.0.len() as u64;
bits.map(|bit| (bit % size) as usize)
.all(|index| self.0[index] == 1)
}
}
}
pub fn by(storage: Box<dyn Storage>) -> BloomFilter<T> {
BloomFilter(storage, PhantomData)
}
pub fn add(&mut self, value: T) {
self.0.set(self.hash(value));
}
pub fn contains(&self, value: T) -> bool {
self.0.contains_all(self.hash(value))
}
fn hash<'a, H: Hash + 'a>(&self, value: H) -> BitsIter<'a> {
Box::new(
[3, 11, 31, 71, 131]
.into_iter()
.map(|salt| SimpleHasher(0, salt))
.map(move |mut hasher| hasher.hash(&value)),
)
}
}
struct SimpleHasher(u64, u64);
impl SimpleHasher {
fn hash<H: Hash>(&mut self, value: &H) -> u64 {
value.hash(self);
self.finish()
}
}
impl Hasher for SimpleHasher {
fn finish(&self) -> u64 {
self.0
}
fn write(&mut self, bytes: &[u8]) {
self.0 += bytes.iter().fold(0u64, |acc, k| acc * self.1 + *k as u64)
}
}
}
mod spi {
use super::bloom_filter::*;
use redis::{Client, Commands};
use std::{cell::RefCell, rc::Rc};
pub struct RedisCache<'a>(Rc<RefCell<Client>>, BloomFilter<&'a str>);
impl<'a> RedisCache<'a> {
pub fn new(client: Rc<RefCell<Client>>, filter: BloomFilter<&'a str>) -> RedisCache<'a> {
RedisCache(client, filter)
}
pub fn get<'f>(
&mut self,
key: &str,
load_value: fn() -> Option<&'f str>,
) -> Option<String> {
if self.1.contains(key) {
let mut redis = self.0.as_ref().borrow_mut();
if let Ok(value) = redis.get::<&str, String>(key) {
return Some(value);
}
if let Some(actual_value) = load_value() {
let _: () = redis.set(key, &actual_value).unwrap();
return Some(actual_value.into());
}
}
return None;
}
}
struct RedisStorage(Rc<RefCell<Client>>);
const BLOOM_FILTER_KEY: &str = "bloom_filter";
impl Storage for RedisStorage {
fn set(&mut self, bits: BitsIter) {
bits.for_each(|slot| {
let _: bool = self
.0
.as_ref()
.borrow_mut()
.setbit(BLOOM_FILTER_KEY, slot as usize, true)
.unwrap();
})
}
fn contains_all(&self, mut bits: BitsIter) -> bool {
bits.all(|slot| {
self.0
.as_ref()
.borrow_mut()
.getbit(BLOOM_FILTER_KEY, slot as usize)
.unwrap()
})
}
}
#[test]
fn prevent_cache_penetration_by_bloom_filter() {
let client = Rc::new(RefCell::new(Client::open("redis://localhost").unwrap()));
redis::cmd("FLUSHDB").execute(&mut *client.as_ref().borrow_mut());
let mut filter: BloomFilter<&str> = BloomFilter::by(Box::new(RedisStorage(client.clone())));
assert!(!filter.contains("Rust"));
filter.add("Rust");
assert!(filter.contains("Rust"));
let mut cache = RedisCache::new(client, filter);
assert_eq!(
cache.get("Rust", || Some("System Language")),
Some("System Language".to_string())
);
assert_eq!(
cache.get("Rust", || panic!("must never be called after cached")),
Some("System Language".to_string())
);
assert_eq!(
cache.get("Go", || panic!("reject to loading `Go` from external storage")),
None
);
}
}
pub type BitsIter = Box<dyn Iterator<Item = u64>>;
In this case, the object in the box must be valid for the 'static
lifetime. This isn't the case for the iterator returned by hash
- its limited to the lifetime of self
.
Try replacing with:
pub type BitsIter<'a> = Box<dyn Iterator<Item = u64> + 'a>;
Or using generics instead of boxed trait objects.
So your RedisClient
needs a BloomFilter
, but the BloomFilter
also needs the RedisClient
?
Your BloomFilter
should not use the RedisCache
that itself uses the BloomFilter
- that's a recipe for infinitely recursing calls (how do you know what calls to RedisCache::add
should update the bloom filter and which calls are from the bloom filter?).
If you really have to, you need some form of shared ownership, like Rc
or Arc
. Your BloomFilter
will also need to use a weak reference, or else the two objects will refer to each other and will never free.