c++c++11boostindexingintrusive-containers

Two [or more] containers with same underlying data, but different views on the data


MyClass below represents a data structure that I need to be able to search very fast in two ways. So say I store the MyClass in and std::vector so that similar names in it can be quickly deleted and inserted continuously. But, I also need to be able to sort the contents of MyClass based on anInt. That is where an intrusive container (or Multimap) would sort the contents of a [very likely] unsorted std::vector. Two containers performing two very different duties on the same underlying items. Also, if I deleted the items from the std::vector they would also automatically disappear from the intrusive container.

Here is sort of the idea

#include <vector>
#include <iostream>
#include <vector>
#include <functional>

#include <boost/intrusive/unordered_set.hpp>

namespace bic = boost::intrusive;

std::hash<std::string> hash_fn;

struct MyClass : bic::unordered_set_base_hook<bic::link_mode<bic::auto_unlink>>
{
    std::string name;
    int anInt1;
    mutable bool bIsMarkedToDelete;

    MyClass(std::string name, int i) : name(name), anInt1(i), bIsMarkedToDelete(false) {}

    bool operator==(MyClass const& o) const
    {
        return anInt1 == o.anInt1; //  need the items in the intrusive container to sort on number
    }

    struct hasher
    {
        size_t operator()(MyClass const& o) const
        {
            //return hash_fn(o.name);
            return o.anInt1; //  need the items in the intrusive container to sort on number
        }
    };
};

std::ostream& operator << (std::ostream& out, const MyClass& ac)
{
    out << ac.name << " " << ac.anInt1;

    return out;
}

typedef bic::unordered_set<MyClass, bic::hash<MyClass::hasher>, bic::constant_time_size<false> > HashTable;

int main()
{
    std::vector<MyClass> values
    {
        MyClass { "John",     0 },
        MyClass { "Mike",     0 },
        MyClass { "Dagobart", 25 },
        MyClass { "John",     5 },
        MyClass { "Mike",     25 },
        MyClass { "Dagobart", 26 },
        MyClass { "John",     10 },
        MyClass { "Mike",     25 },
        MyClass { "Dagobart", 27 },
        MyClass { "John",     15 },
        MyClass { "Mike",     27 }
    };

    HashTable::bucket_type buckets[100];
    HashTable hashtable(values.begin(), values.end(), HashTable::bucket_traits(buckets, 100));

    std::cout << "\nContents of std::vector<MyClass> values\n";

    for(auto& e: values)
        std::cout << e << " ";

    std::cout << "\nContents of HashTable hashtable\n";

    for(auto& b : hashtable)
        std::cout << b << '\n';

std::cout << "\nContents of HashTable hashtable\n";

    for(auto& b : hashtable)
        std::cout << b << '\n';

#if 0 // This code won't compile since there is no operator [] for hashtable
    for(int bucket = 0; bucket < 27; bucket++)
    {
        auto hit(hashtable[bucket].rbegin());
        auto hite(hashtable[bucket].rend());

        while (hit != hite)
        {
            MyClass mc = *hit;

            std::cout << mc << " ";

            hit++;
        }

        std::cout << '\n';
    }
#endif // 0
    std::cout << '\n';
    std::cout << "values size first " << values.size() << '\n';
    std::cout << "hash size fist " << hashtable.size() << '\n';

    for(auto& e: values)
        e.bIsMarkedToDelete |= ("Mike" == e.name);

    std::cout << "removing all bIsMarkedToDelete";
    for(auto& e: values)
        if(e.bIsMarkedToDelete)
            std::cout << e << " ";

    std::cout << '\n';

    // Note how easy and performance fast it is to delete items from the std::vector view
    // I need the intrsive view to automatically update as well
    values.erase(
        std::remove_if(std::begin(values), std::end(values), std::mem_fn(&MyClass::bIsMarkedToDelete)),
                       std::end(values));

#if 0 // This code won't compile since there is no operator [] for hashtable
      // If I did this now, it should show the "Mike" itens gone and the 
      /// rest of the items hanging off the same bucket still there.
    for(int bucket = 0; bucket < 27; bucket++)
    {
        auto hit(hashtable[bucket].rbegin());
        auto hite(hashtable[bucket].rend());

        while (hit != hite)
        {
            MyClass mc = *hit;

            std::cout << mc << " ";

            hit++;
        }

        std::cout << '\n';
    }
#endif // 0

    std::cout << "values size now " << values.size() << '\n';
    std::cout << "hash size now " << hashtable.size() << '\n';

    std::cout << "Contents of value after removing elements " << '\n';
    for(auto& e: values)
        std::cout << e << " ";


    std::cout << '\n';

    values.clear();

    std::cout << values.size() << '\n';
    std::cout << hashtable.size() << '\n';

    std::cout << "Done\n";

    int j;
    std::cin >> j;
}

Solution

  • You need fast lookups using different indices. This makes me think of Boost MultiIndex.

    Next:

    So say I store the MyClass in and std::vector so that similar names in it can be quickly deleted and inserted continuously

    combined with

    Also, if I deleted the items from the std::vector they would also automatically disappear

    makes it clear you cannot escape the cost of keeping all indices up to date anyways. In which case, Boost Multi Index is as good as it gets.

    Here's a sample:

    Live On Coliru

    #include <iostream>
    #include <vector>
    #include <boost/tuple/tuple_comparison.hpp>
    #include <boost/range/iterator_range.hpp>
    
    #include <boost/multi_index_container.hpp>
    #include <boost/multi_index/member.hpp>
    #include <boost/multi_index/random_access_index.hpp>
    #include <boost/multi_index/hashed_index.hpp>
    #include <boost/multi_index/ordered_index.hpp>
    
    namespace bmi = boost::multi_index;
    
    struct MyClass
    {
        std::string name;
        int         anInt1;
    
        MyClass(std::string name, int i) : name(name), anInt1(i) {}
    
        //bool operator==(MyClass const& o) const { return boost::tie(name, anInt1) == boost::tie(o.name, o.anInt1); }
        //bool operator< (MyClass const& o) const { return boost::tie(name, anInt1) <  boost::tie(o.name, o.anInt1); }
    
        friend std::ostream& operator << (std::ostream& out, const MyClass& ac) {
            return out << ac.name << " " << ac.anInt1;
        }
    };
    
    using Table = bmi::multi_index_container<MyClass,
          bmi::indexed_by<
            bmi::random_access<bmi::tag<struct as_vector> >,
            bmi::ordered_non_unique<bmi::tag<struct by_number>,
                bmi::member<MyClass, int, &MyClass::anInt1>
            >,
            bmi::hashed_non_unique<bmi::tag<struct by_name>,
                bmi::member<MyClass, std::string, &MyClass::name>
            >
          >
        >;
    
    void alternative_remove_mikes(Table& values);
    
    int main()
    {
        Table values {
            { "John",     0 },
            { "Mike",     0 },
            { "Dagobart", 25 },
            { "John",     5 },
            { "Mike",     25 },
            { "Dagobart", 26 },
            { "John",     10 },
            { "Mike",     25 },
            { "Dagobart", 27 },
            { "John",     15 },
            { "Mike",     27 },
        };
    
        auto& name_idx = values.get<by_name>();
    
        std::cout << "\nBEFORE: Ordered by number:\n";
        for(auto& e: values.get<by_number>()) 
            std::cout << e << "\n";
    
        std::cout << "\nBEFORE: O(1) lookup by name:\n";
        for(auto& e : boost::make_iterator_range(name_idx.equal_range("Mike")))
            std::cout << e << '\n';
    
        std::cout << "removing all Mikes\n";
        name_idx.erase("Mike");
        // alternative_remove_mikes(values);
    
        std::cout << "\nAFTER: Ordered by number:\n";
        for(auto& e: values.get<by_number>()) 
            std::cout << e << "\n";
    
        std::cout << "\nAFTER: O(1) lookup by name:\n";
        for(auto& e : boost::make_iterator_range(name_idx.equal_range("Mike")))
            std::cout << e << '\n';
    
        values.clear();
    
        std::cout << "Done\n----------------------------\n";
    }
    

    If you want to keep the code to remove similar to the code you had (which is not optimal, but hey, just in case):

    void alternative_remove_mikes(Table& values) {
        // this uses the same `remove_if` approach together with the `rearrange()` feature of `random_access` index
        std::vector<boost::reference_wrapper<MyClass const> > refs(values.begin(), values.end());
    
        // remove_if is not good enough since it will leave the removed end unspecified
        auto it = std::partition(refs.begin(), refs.end(), [](MyClass const& mc) { return "Mike" != mc.name; });
    
        std::cout << "Removing " << std::distance(it, refs.end()) << " items:\n";
    
        values.rearrange(refs.begin());
    
        auto newend = values.begin() + std::distance(refs.begin(), it);
        for (auto& e : boost::make_iterator_range(newend, values.end()))
            std::cout << " -- removing " << e << "\n";
    
        values.erase(newend, values.end());
    }
    

    Output:

    BEFORE: Ordered by number:
    John 0
    Mike 0
    John 5
    John 10
    John 15
    Dagobart 25
    Mike 25
    Mike 25
    Dagobart 26
    Dagobart 27
    Mike 27
    
    BEFORE: O(1) lookup by name:
    Mike 27
    Mike 25
    Mike 25
    Mike 0
    removing all Mikes
    
    AFTER: Ordered by number:
    John 0
    John 5
    John 10
    John 15
    Dagobart 25
    Dagobart 26
    Dagobart 27
    
    AFTER: O(1) lookup by name:
    Done
    ----------------------------