Here is recursive function written in Rust. It is aimed to search complex chemical reactions mechanisms using a DB. The core function of the code is here:
fn parse_database<'a >(
db_object: &'a HashMap<&str, HashMap<&str,HashSet<&str> > >,
mechanism: &'a mut HashSet<&str>,
reactants: &'a mut HashSet<&str>,
) -> (HashSet<String>, HashSet<String>) {
let mut all_reactions: HashSet<&str> = db_object.keys().cloned().collect();
all_reactions.retain(|r_id| !mechanism.contains(r_id));
let mut found_reactants: HashSet<&str> = HashSet::new();
let mut found_reactions: HashSet<&str> = HashSet::new();
for r_id in all_reactions {
let reactants_ = &mut db_object[&r_id]["reagents"].clone();
if reactants_.is_subset(reactants) {
println!("{:?}", reactants_);
found_reactants.extend(db_object[&r_id]["products"].iter().cloned());
found_reactions.insert(r_id);
}
}
if !found_reactions.is_empty() {
println!("go to next iteration");
let next_itet_mechanism = &mut mechanism.union(&found_reactions).cloned().collect::<HashSet<_>>();
let next_itet_reactants = &mut reactants.union(&found_reactants).cloned().collect::<HashSet<_>>();
return parse_database(
db_object,
next_itet_mechanism,
next_itet_reactants
);
} else {
println!("last iteration");
println!("{:?}, {:?}", reactants, mechanism);
return (
reactants.iter().cloned().map(String::from).collect(),
mechanism.iter().cloned().map(String::from).collect(),
);
}}
let mut mech: HashSet<String> = HashSet::new();
let mut reactants: HashSet<String> =... some task;
let reaction_db:HashMap<&str, HashMap<&str, HashMap<&str, HashSet<&str>>>> = ... some serde magic and convertion to &str
//
let (reactants, mechanism) = parse_database(
&reaction_db[big_mech],
&mut mech,
&mut reactants,
and here is realisation in Py
def get_mechanism(big_mech, search_s):
search_s = set(search_s)
Data_for_matrix = dict()
def parse_database(db_object : Dict, mechanism : Set, reactants : Set):
all_reactions = set(db_object.keys())
all_reactions.difference_update(mechanism) # removes all seen reactions from the search
found_reactants = set()
found_reactions = set()
for r_id in all_reactions:
reactants_ = set(db_object[r_id]['reagents'])
reactants_ = filter_set(reactants_)
if reactants_.issubset(reactants):
found_reactants.update(set(db_object[r_id]['products']))
found_reactions.add(r_id)
Data_for_matrix[int(r_id) ]=dict()
Data_for_matrix[int(r_id) ]=[list(db_object[r_id]['products']),list(reactants_) ]
if len(found_reactions) != 0:
print('next iter')
return parse_database(db_object=db_object,
mechanism=mechanism.union(found_reactions),
reactants=reactants.union(found_reactants))
else:
print('last iter')
return reactants, mechanism, Data_for_matrix
and it is 20 times faster then Rust version. I'm novice in Rust so any help would be appreciated.
I tried to use structure, like these
fn parse_database(
db_object: &HashMap<String, Database>,
mechanism: &mut HashSet<String>,
reactants: &mut HashSet<String>,
) -> (HashSet<String>, HashSet<String>)
and tried most naive realisation with String not &str
parse_database<'a>(
db_object: &HashMap<String, HashMap<String, Vec<String> > >,
mechanism: &mut HashSet<String>,
reactants: &mut HashSet<String>,
data_for_matrix: &mut HashMap<String, (Vec<String>, Vec<String>)>
there are some little difference in elapsed time of code but that was a little help.
It's hard to say without your dataset, but probably because of unneccessary clone()
s all over the place.
Here is a slightly cleaned up version of the code:
pub fn parse_database<'a>(
db_object: &'a HashMap<&str, HashMap<&str, HashSet<&str>>>,
mechanism: &mut HashSet<&'a str>,
reactants: &mut HashSet<&'a str>,
) {
let mut found_reactants: HashSet<&str> = HashSet::new();
let mut found_reactions: HashSet<&str> = HashSet::new();
loop {
for r_id in db_object.keys().filter(|r_id| !mechanism.contains(*r_id)) {
if db_object[r_id]["reagents"].is_subset(&reactants) {
found_reactants.extend(&db_object[r_id]["products"]);
found_reactions.insert(r_id);
}
}
if found_reactions.is_empty() {
return;
}
mechanism.extend(&found_reactions);
reactants.extend(&found_reactants);
found_reactants.clear();
found_reactions.clear();
}
}