c++c++11boosthashboost-multi-index

Find element in boost multi_index_container


In my code I need to have a functionality to iterate over all elements and check if there some element already exists possibly as soon as possible, so my choice fell on boost multi index container where I can use vector and unordered_set interface for my class Animal at the same time. The problem is that I am not able to find some element through unordered_set interface since I replaced key from std::string to std::array<char, 50> and adjusted the code, and I don't know what I am doing wrong ?

code: https://wandbox.org/permlink/dnCaEzYVdXkTFBGo

#include <array>
#include <algorithm>
#include <iostream>
#include <chrono>
#include <string>
#include <vector>
#include <list>
#include <map>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include <memory>

#include <boost/multi_index_container.hpp>
#include <boost/multi_index/ordered_index.hpp>
#include <boost/multi_index/composite_key.hpp>
#include <boost/multi_index/hashed_index.hpp>
#include <boost/multi_index/sequenced_index.hpp>
#include <boost/multi_index/random_access_index.hpp>
#include <boost/multi_index/member.hpp>
#include <boost/multi_index/identity.hpp>

int constexpr elements_size{ 1'000'000 };

struct Animal
{
Animal(std::string name, std::string description, int leg, int age, double maxSpeed) noexcept :
    description_{std::move(description)}, leg_{leg}, age_{age}, maxSpeed_{maxSpeed}
{
    std::copy(name.begin(), name.end(), name_.data());
}

Animal(std::string const& name, std::string const& description) noexcept :
    description_{description}
{
    std::copy(name.begin(), name.end(), name_.data());
}

Animal(Animal&& animal) noexcept
{
    name_ = name_;
    description_ = std::move(animal).description_;
    leg_ = animal.leg_;
    age_ = animal.age_;
    maxSpeed_ = animal.maxSpeed_;
}

Animal(Animal const& animal) noexcept
{
    name_ = animal.name_;
    description_ = animal.description_;
    leg_ = animal.leg_;
    age_ = animal.age_;
    maxSpeed_ = animal.maxSpeed_;
}

Animal& operator=(Animal&& animal) noexcept
{
    name_ = name_;
    description_ = std::move(animal).description_;
    leg_ = animal.leg_;
    age_ = animal.age_;
    maxSpeed_ = animal.maxSpeed_;
    return *this;
}

Animal& operator=(Animal const& animal) noexcept
{
    name_ = animal.name_;
    description_ = animal.description_;
    leg_ = animal.leg_;
    age_ = animal.age_;
    maxSpeed_ = animal.maxSpeed_;
    return *this;
}

std::array<char, 50> name_;
std::string description_;
int leg_{0};
int age_{0};
double maxSpeed_{0.0};
};

struct Hasher
{
bool print_;
Hasher(bool print = false): print_{print} {}
std::size_t operator()(std::array<char, 50> const& name) const
{
    if (print_)
        std::cout << "array hash" << std::hash<std::string_view>{}(name.data()) << std::endl;
    return std::hash<std::string_view>{}(name.data());
}
std::size_t operator()(std::string const& name) const
{
    if (print_)
        std::cout << "string hash" << std::hash<std::string_view>{}(name.c_str()) << std::endl;
    return std::hash<std::string_view>{}(name.c_str());
}
std::size_t operator()(const char* name) const
{
    if (print_)
        std::cout << "char hash" << std::hash<std::string_view>{}(name) << std::endl;
    return std::hash<std::string_view>{}(name);
}
};

struct KeysComparator
{
bool operator()(std::array<char, 50> const& a1, std::array<char, 50> const& a2) const {return a1 == a2; }
template <typename T>
bool operator()(std::string const& n1, T const& t) const
{
    std::cout << "### value.name_" << t.value.name_.data() << ", n1: " << n1 << std::endl;
    return n1 == t.value.name_.data();
}
};

template<typename TimePoint>
std::string getElapsedTime(TimePoint const& start, TimePoint const& end)
{
auto micro = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
auto milli = std::chrono::duration_cast<std::chrono::milliseconds>(micro);
auto sec = std::chrono::duration_cast<std::chrono::seconds>(milli);

return {std::to_string(micro.count()) + " µs, " + std::to_string(milli.count()) + " ms, " + std::to_string(sec.count()) + " s"};
}

template<typename TimePoint>
void printStatistics(TimePoint const& emplace_start, TimePoint const& emplace_end, TimePoint const& iterate_start, TimePoint const& iterate_end,
TimePoint const& find_start, TimePoint const& find_end, intmax_t const sum, std::string target)
{
std::cout << "Elapsed time emplace: "  << getElapsedTime(emplace_start, emplace_end)
    << "  |  iterate: " << getElapsedTime(iterate_start, iterate_end)
    << "  |  find: " << getElapsedTime(find_start, find_end)
    << ", sum:" << sum << " , calculation for " << target << std::endl;
}

void test()
{
using namespace boost::multi_index;
using Animal_multi = multi_index_container<Animal, indexed_by<
    random_access<>,
    hashed_unique<
        composite_key<Animal, member<Animal, std::array<char, 50>, &Animal::name_>>,
        composite_key_hash<Hasher>,
        composite_key_equal_to<KeysComparator>>
>>;

Animal_multi container;
auto emplace_start = std::chrono::steady_clock::now();
for (auto i = 0; i < elements_size; ++i)
    container.emplace_back("the really long name of some animal 12345678910_" + std::to_string(i),
                           "bla bla bla bla bla bla bla bla bla bla bla bla bla", 4, i, i + 2);
auto emplace_end = std::chrono::steady_clock::now();

intmax_t sum{0};
auto iterate_start = std::chrono::steady_clock::now();
for (auto const& e : container)
    sum += e.age_;
auto iterate_end = std::chrono::steady_clock::now();

KeysComparator key_comparator;
Hasher hasher{true};
auto find_start = std::chrono::steady_clock::now();
auto &container_interface = container.get<1>();
auto isSucceeded = container_interface.count("the really long name of some animal 12345678910_" + std::to_string(elements_size-1),
                                             hasher, key_comparator);
if (not isSucceeded)
    std::cout << "WARN: Element has not been found." << std::endl;
auto find_end = std::chrono::steady_clock::now();

printStatistics(emplace_start, emplace_end, iterate_start, iterate_end, find_start, find_end, sum, "Animal_multi (boost multi_index)");
}

int main()
{
    test();
    return 0;
}

Solution

  • There are a number of bugs like in the move constructor:

    name_ = name_; // oops this does nothing at all
    

    Just follow Rule Of Zero. This will also inform you that std::string copy/assignment are not noexcept.

    The name copy should probably be length-limited:

    std::copy_n(name.begin(), std::min(name.size(), name_.size()), name_.data());
    

    At this point I notice something that might explain your trouble: you don't NUL-terminate, nor make sure that the array is 0-initialized.

    BINGO

    Indeed, just a few lines down I spot:

    return std::hash<std::string_view>{}(name.data());
    

    That's... UB! Your string_view might contain indeterminate data, but what's worse, you would NEVER have copied the terminating NUL character. So, std::string_view will model a string with indeterminate length which WILL likely exceed 50.

    Read here about Nasal Demons (UB)

    Such are the perils of skipping standard library types for the old C craft.

    First Dig

    So, here's the entirety of the class with equal/better characteristics:

    using Name = std::array<char, 50>;
    
    struct Animal {
        Animal(std::string_view name, std::string description,
               int leg = 0, int age = 0, double maxSpeed = 0) noexcept 
            : name_{0}, // zero initialize!
              description_{std::move(description)},
              leg_{leg},
              age_{age},
              maxSpeed_{maxSpeed}
        {
            constexpr auto Capacity = std::tuple_size<Name>::value;
            constexpr auto MaxLen = Capacity - 1; // reserve NUL char
    
            assert(name.length() < MaxLen);
            std::copy_n(name.data(), std::min(name.length(), MaxLen), name_.data());
        }
    
        //Animal            ( Animal&&      animal  ) noexcept = default;
        //Animal            ( Animal const& animal  )          = default;
        //Animal& operator= ( Animal&&      animal  ) noexcept = default;
        //Animal& operator= ( Animal const& animal  )          = default;
    
        Name        name_;
        std::string description_;
        int         leg_{0};
        int         age_{0};
        double      maxSpeed_{0.0};
    };
    

    Improving: FixedString

    This just screams for a better Name type. How about, FixedString:

    template <size_t N> struct FixedString {
        static_assert(N > 1); // require space for NUL char
    
        FixedString(std::string_view s) : data_{0} {
            if (s.length() >= N)
                throw std::length_error("FixedString");
            std::copy_n(s.data(), std::min(s.length(), N - 1), data());
        }
    
        std::string_view str() const { return { data(), size() }; }
        operator std::string_view() const { return str(); }
    
        auto data()   const { return data_.data(); }
        auto data()         { return data_.data(); }
        auto c_str()  const { return data_.data(); }
        auto c_str()        { return data_.data(); }
    
        auto begin() const { return data_.begin(); }
        auto end()   const { return data_.end();   }
        auto begin()       { return data_.begin(); }
        auto end()         { return data_.end();   }
    
        size_t size() const {
            auto terminator = std::memchr(data(), 0, data_.max_size());
            return terminator
                ? static_cast<char const*>(terminator) - data()
                : data_.max_size();
        };
    
        bool operator<(FixedString const& rhs)       const { return str() <  rhs.str(); }
        bool operator==(FixedString const& rhs)      const { return str() == rhs.str(); }
        bool operator!=(FixedString const& rhs)      const { return str() != rhs.str(); }
        // optimizations:
        bool operator<(std::string_view const& rhs)  const { return str() <  rhs.substr(0, N-1); }
        bool operator==(std::string_view const& rhs) const { return str() == rhs.substr(0, N-1); }
        bool operator!=(std::string_view const& rhs) const { return str() != rhs.substr(0, N-1); }
    
      private:
        std::array<char, N> data_;
    };
    

    Now you can simply

    using Name = FixedString<50>;
    

    And all your Names will magically (and safely) convert to and from string views.

    using Name = FixedString<50>;
    
    struct Animal {
        Animal(std::string_view name, std::string description,
               int leg = 0, int age = 0, double maxSpeed = 0) noexcept 
            : name_{name}, description_{std::move(description)},
              leg_{leg}, age_{age}, maxSpeed_{maxSpeed}
        { }
    
        Name        name_;
        std::string description_;
        int         leg_{0};
        int         age_{0};
        double      maxSpeed_{0.0};
    };
    

    Everything Simplifies With The Right Abstraction

    This is the most important lesson I think I learned in my programming career: choosing the right abstraction leads to simplicity. Here, we evaporate two messy helpers:

    using Hasher         = std::hash<std::string_view>;
    using KeysComparator = std::equal_to<Name>;
    

    Boom. They do everything you had, but better.

    Now, The Missing Element

    After simplifying the whole thing to this it should become pretty obvious that a std::array<char, 50> can never correctly contain names longer than 50 characters. Indeed, checking the insertions:

    auto emplace_start = Now();
    
    size_t duplicates = 0;
    for (auto i = 0; i < elements_size; ++i) {
        auto [_, ok] = container.emplace_back(
            make_name(i), "bla bla bla bla bla bla bla bla bla bla bla bla bla",
            4, i, i + 2);
    
        if (!ok) ++duplicates;
    }
    if (duplicates) {
        std::cerr << "Oops, " << duplicates << " duplicate keys not inserted\n";
    }
    
    auto emplace_end = Now();
    

    Reveals that:

    Oops, 999990 duplicate keys not inserted
    Elapsed time emplace: 116.491ms iterate: 0.000145ms find: 0.000597ms, sum:45 , calculation for Animal_multi (boost multi_index)
    

    At least, now you replaced Undefined Behaviour with constraint checks.

    Of course, just increasing the name capacity fixes it: [https://wandbox.org/permlink/6AamJfXe76nYALfR)

    using Name = FixedString<60>;
    

    Prints:

    Elapsed time emplace: 594.475ms iterate: 18.6076ms find: 0.003138ms, sum:499999500000 , calculation for Animal_multi (boost multi_index)
    

    Alternatively you can throw on Name construction with an overly long name: Live On Wandbox

    FixedString(std::string_view s) : data_{0} {
        if (s.length() >= N)
            throw std::length_error("FixedString");
        std::copy_n(s.data(), std::min(s.length(), N - 1), data());
    }
    

    Which duly prints

    terminate called after throwing an instance of 'std::length_error'
      what():  FixedString
    

    Full Listing

    This demo uses FixedString<60> to avoid the key errors:

    #include <boost/multi_index_container.hpp>
    #include <boost/multi_index/hashed_index.hpp>
    #include <boost/multi_index/random_access_index.hpp>
    #include <boost/multi_index/member.hpp>
    #include <iostream>
    #include <iomanip>
    #include <chrono>
    using namespace std::chrono_literals;
    
    int constexpr elements_size{ 1'000'000 };
    
    template <size_t N> struct FixedString {
        static_assert(N > 1); // require space for NUL char
    
        FixedString(std::string_view s) : data_{0} {
            if (s.length() >= N)
                throw std::length_error("FixedString");
            std::copy_n(s.data(), std::min(s.length(), N - 1), data());
        }
    
        std::string_view str() const { return { data(), size() }; }
        operator std::string_view() const { return str(); }
    
        auto data()   const { return data_.data(); }
        auto data()         { return data_.data(); }
        auto c_str()  const { return data_.data(); }
        auto c_str()        { return data_.data(); }
    
        auto begin() const { return data_.begin(); }
        auto end()   const { return data_.end();   }
        auto begin()       { return data_.begin(); }
        auto end()         { return data_.end();   }
    
        size_t size() const {
            auto terminator = std::memchr(data(), 0, data_.max_size());
            return terminator
                ? static_cast<char const*>(terminator) - data()
                : data_.max_size();
        };
    
        bool operator<(std::string_view const& rhs)  const { return str() <  rhs.substr(0, N-1); }
        bool operator==(std::string_view const& rhs) const { return str() == rhs.substr(0, N-1); }
        bool operator!=(std::string_view const& rhs) const { return str() != rhs.substr(0, N-1); }
        bool operator<(FixedString const& rhs)       const { return str() <  rhs.str(); }
        bool operator==(FixedString const& rhs)      const { return str() == rhs.str(); }
        bool operator!=(FixedString const& rhs)      const { return str() != rhs.str(); }
    
      private:
        std::array<char, N> data_;
    };
    
    using Name = FixedString<60>;
    
    struct Animal {
        Animal(std::string_view name, std::string description,
               int leg = 0, int age = 0, double maxSpeed = 0) noexcept 
            : name_{name}, description_{std::move(description)},
              leg_{leg}, age_{age}, maxSpeed_{maxSpeed}
        { }
    
        Name        name_;
        std::string description_;
        int         leg_{0};
        int         age_{0};
        double      maxSpeed_{0.0};
    };
    
    using Hasher         = std::hash<std::string_view>;
    using KeysComparator = std::equal_to<Name>;
    
    using Clock     = std::chrono::steady_clock;
    using Duration  = Clock::duration;
    static auto Now = Clock::now;
    
    void printStatistics(Duration emplace, Duration iterate, Duration find,
                         intmax_t const sum, std::string target)
    {
        std::cout << "Elapsed time"
            << " emplace: " << (emplace/1.0ms) << "ms"
            << " iterate: " << (iterate/1.0ms) << "ms"
            << " find: " << (find/1.0ms) << "ms"
            << ", sum:" << sum
            << " , calculation for " << target
            << std::endl;
    }
    
    void test() {
        namespace bmi = boost::multi_index;
        using Animal_multi = bmi::multi_index_container<Animal,
          bmi::indexed_by<
            bmi::random_access<>,
            bmi::hashed_unique<
                bmi::tag<struct by_name>,
                bmi::member<Animal, Name, &Animal::name_>, Hasher, KeysComparator>
            >
        >;
    
        Animal_multi container;
    
        auto make_name = [](size_t id) {
            return "the really long name of some animal 12345678910_" + std::to_string(id);
        };
    
        auto emplace_start = Now();
    
        size_t duplicates = 0;
        for (auto i = 0; i < elements_size; ++i) {
            auto [_, ok] = container.emplace_back(
                make_name(i), "bla bla bla bla bla bla bla bla bla bla bla bla bla",
                4, i, i + 2);
    
            if (!ok) ++duplicates;
        }
        if (duplicates) {
            std::cerr << "Oops, " << duplicates << " duplicate keys not inserted\n";
        }
    
        auto emplace_end = Now();
    
        intmax_t sum{ 0 };
        auto iterate_start = Now();
        for (auto const& e : container) {
            sum += e.age_;
        }
        auto iterate_end = Now();
    
        auto find_start = Now();
        {
            auto& name_idx = container.get<by_name>();
            auto last_key = make_name(elements_size - 1);
            if (name_idx.count(std::string_view(last_key)) == 0u) {
                std::cout << "WARN: Element has not been found." << std::endl;
            }
        }
        auto find_end = Now();
    
        printStatistics(
                emplace_end - emplace_start,
                iterate_end - iterate_start,
                find_end - find_start, sum,
                "Animal_multi (boost multi_index)");
    }
    
    int main() { test(); }