pythonperformancerust

Why my Rust code is much slower than Python?


Here is recursive function written in Rust. It is aimed to search complex chemical reactions mechanisms using a DB. The core function of the code is here:

fn parse_database<'a >(
    db_object: &'a HashMap<&str, HashMap<&str,HashSet<&str>  > >,
    mechanism: &'a mut HashSet<&str>,
    reactants: &'a mut HashSet<&str>,
) -> (HashSet<String>, HashSet<String>) {
    let mut all_reactions: HashSet<&str> = db_object.keys().cloned().collect();
    all_reactions.retain(|r_id| !mechanism.contains(r_id));
    let mut found_reactants: HashSet<&str> = HashSet::new();
    let mut found_reactions: HashSet<&str> = HashSet::new();

    for r_id in all_reactions {
        let reactants_ = &mut db_object[&r_id]["reagents"].clone();
        if reactants_.is_subset(reactants) {
            println!("{:?}", reactants_);
            found_reactants.extend(db_object[&r_id]["products"].iter().cloned());
            found_reactions.insert(r_id);
        }
    }

    if !found_reactions.is_empty() {
        println!("go to next iteration");
        let next_itet_mechanism = &mut mechanism.union(&found_reactions).cloned().collect::<HashSet<_>>();
        let next_itet_reactants =  &mut reactants.union(&found_reactants).cloned().collect::<HashSet<_>>();
        return parse_database(
            db_object,
            next_itet_mechanism,
            next_itet_reactants
        );
    } else {
        println!("last iteration");
        println!("{:?}, {:?}", reactants, mechanism);
        return (
            reactants.iter().cloned().map(String::from).collect(),
            mechanism.iter().cloned().map(String::from).collect(),
        );
    }}
 
let mut mech: HashSet<String> = HashSet::new();
 let mut reactants: HashSet<String> =... some task;

let reaction_db:HashMap<&str, HashMap<&str, HashMap<&str, HashSet<&str>>>> = ... some serde magic and convertion to &str

    //
    let (reactants, mechanism) = parse_database(
        &reaction_db[big_mech],
        &mut mech,
        &mut reactants,

and here is realisation in Py

def get_mechanism(big_mech, search_s):
    search_s = set(search_s)
    Data_for_matrix = dict()
    def parse_database(db_object : Dict, mechanism : Set, reactants : Set):
        all_reactions = set(db_object.keys())
        all_reactions.difference_update(mechanism) # removes all seen reactions from the search
    
        found_reactants = set()
        found_reactions = set()
        for r_id in all_reactions:
            reactants_ = set(db_object[r_id]['reagents'])
            reactants_ = filter_set(reactants_) 
            if reactants_.issubset(reactants):
                found_reactants.update(set(db_object[r_id]['products']))
                found_reactions.add(r_id)
                Data_for_matrix[int(r_id) ]=dict()
                Data_for_matrix[int(r_id) ]=[list(db_object[r_id]['products']),list(reactants_)  ] 
        if len(found_reactions) != 0:
            print('next iter')
            return parse_database(db_object=db_object, 
                                  mechanism=mechanism.union(found_reactions), 
                                  reactants=reactants.union(found_reactants))
        else:
            print('last iter')  
            return reactants, mechanism,  Data_for_matrix 

and it is 20 times faster then Rust version. I'm novice in Rust so any help would be appreciated.

I tried to use structure, like these

fn parse_database(
    db_object: &HashMap<String, Database>,
    mechanism: &mut HashSet<String>,
    reactants: &mut HashSet<String>,

) -> (HashSet<String>, HashSet<String>)

and tried most naive realisation with String not &str

parse_database<'a>(
        db_object: &HashMap<String, HashMap<String, Vec<String>  > >,
        mechanism: &mut HashSet<String>,
        reactants: &mut HashSet<String>,
        data_for_matrix: &mut HashMap<String, (Vec<String>, Vec<String>)>

there are some little difference in elapsed time of code but that was a little help.


Solution

  • It's hard to say without your dataset, but probably because of unneccessary clone()s all over the place. Here is a slightly cleaned up version of the code:

    pub fn parse_database<'a>(
        db_object: &'a HashMap<&str, HashMap<&str, HashSet<&str>>>,
        mechanism: &mut HashSet<&'a str>,
        reactants: &mut HashSet<&'a str>,
    ) {
        let mut found_reactants: HashSet<&str> = HashSet::new();
        let mut found_reactions: HashSet<&str> = HashSet::new();
        loop {
            for r_id in db_object.keys().filter(|r_id| !mechanism.contains(*r_id)) {
                if db_object[r_id]["reagents"].is_subset(&reactants) {
                    found_reactants.extend(&db_object[r_id]["products"]);
                    found_reactions.insert(r_id);
                }
            }
            if found_reactions.is_empty() {
                return;
            }
            mechanism.extend(&found_reactions);
            reactants.extend(&found_reactants);
            found_reactants.clear();
            found_reactions.clear();
        }
    }