In my code I need to have a functionality to iterate over all elements and check if there some element already exists possibly as soon as possible, so my choice fell on boost multi index container where I can use vector and unordered_set interface for my class Animal at the same time. The problem is that I am not able to find some element through unordered_set interface since I replaced key from std::string to std::array<char, 50> and adjusted the code, and I don't know what I am doing wrong ?
code: https://wandbox.org/permlink/dnCaEzYVdXkTFBGo
#include <array>
#include <algorithm>
#include <iostream>
#include <chrono>
#include <string>
#include <vector>
#include <list>
#include <map>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include <memory>
#include <boost/multi_index_container.hpp>
#include <boost/multi_index/ordered_index.hpp>
#include <boost/multi_index/composite_key.hpp>
#include <boost/multi_index/hashed_index.hpp>
#include <boost/multi_index/sequenced_index.hpp>
#include <boost/multi_index/random_access_index.hpp>
#include <boost/multi_index/member.hpp>
#include <boost/multi_index/identity.hpp>
int constexpr elements_size{ 1'000'000 };
struct Animal
{
Animal(std::string name, std::string description, int leg, int age, double maxSpeed) noexcept :
description_{std::move(description)}, leg_{leg}, age_{age}, maxSpeed_{maxSpeed}
{
std::copy(name.begin(), name.end(), name_.data());
}
Animal(std::string const& name, std::string const& description) noexcept :
description_{description}
{
std::copy(name.begin(), name.end(), name_.data());
}
Animal(Animal&& animal) noexcept
{
name_ = name_;
description_ = std::move(animal).description_;
leg_ = animal.leg_;
age_ = animal.age_;
maxSpeed_ = animal.maxSpeed_;
}
Animal(Animal const& animal) noexcept
{
name_ = animal.name_;
description_ = animal.description_;
leg_ = animal.leg_;
age_ = animal.age_;
maxSpeed_ = animal.maxSpeed_;
}
Animal& operator=(Animal&& animal) noexcept
{
name_ = name_;
description_ = std::move(animal).description_;
leg_ = animal.leg_;
age_ = animal.age_;
maxSpeed_ = animal.maxSpeed_;
return *this;
}
Animal& operator=(Animal const& animal) noexcept
{
name_ = animal.name_;
description_ = animal.description_;
leg_ = animal.leg_;
age_ = animal.age_;
maxSpeed_ = animal.maxSpeed_;
return *this;
}
std::array<char, 50> name_;
std::string description_;
int leg_{0};
int age_{0};
double maxSpeed_{0.0};
};
struct Hasher
{
bool print_;
Hasher(bool print = false): print_{print} {}
std::size_t operator()(std::array<char, 50> const& name) const
{
if (print_)
std::cout << "array hash" << std::hash<std::string_view>{}(name.data()) << std::endl;
return std::hash<std::string_view>{}(name.data());
}
std::size_t operator()(std::string const& name) const
{
if (print_)
std::cout << "string hash" << std::hash<std::string_view>{}(name.c_str()) << std::endl;
return std::hash<std::string_view>{}(name.c_str());
}
std::size_t operator()(const char* name) const
{
if (print_)
std::cout << "char hash" << std::hash<std::string_view>{}(name) << std::endl;
return std::hash<std::string_view>{}(name);
}
};
struct KeysComparator
{
bool operator()(std::array<char, 50> const& a1, std::array<char, 50> const& a2) const {return a1 == a2; }
template <typename T>
bool operator()(std::string const& n1, T const& t) const
{
std::cout << "### value.name_" << t.value.name_.data() << ", n1: " << n1 << std::endl;
return n1 == t.value.name_.data();
}
};
template<typename TimePoint>
std::string getElapsedTime(TimePoint const& start, TimePoint const& end)
{
auto micro = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
auto milli = std::chrono::duration_cast<std::chrono::milliseconds>(micro);
auto sec = std::chrono::duration_cast<std::chrono::seconds>(milli);
return {std::to_string(micro.count()) + " µs, " + std::to_string(milli.count()) + " ms, " + std::to_string(sec.count()) + " s"};
}
template<typename TimePoint>
void printStatistics(TimePoint const& emplace_start, TimePoint const& emplace_end, TimePoint const& iterate_start, TimePoint const& iterate_end,
TimePoint const& find_start, TimePoint const& find_end, intmax_t const sum, std::string target)
{
std::cout << "Elapsed time emplace: " << getElapsedTime(emplace_start, emplace_end)
<< " | iterate: " << getElapsedTime(iterate_start, iterate_end)
<< " | find: " << getElapsedTime(find_start, find_end)
<< ", sum:" << sum << " , calculation for " << target << std::endl;
}
void test()
{
using namespace boost::multi_index;
using Animal_multi = multi_index_container<Animal, indexed_by<
random_access<>,
hashed_unique<
composite_key<Animal, member<Animal, std::array<char, 50>, &Animal::name_>>,
composite_key_hash<Hasher>,
composite_key_equal_to<KeysComparator>>
>>;
Animal_multi container;
auto emplace_start = std::chrono::steady_clock::now();
for (auto i = 0; i < elements_size; ++i)
container.emplace_back("the really long name of some animal 12345678910_" + std::to_string(i),
"bla bla bla bla bla bla bla bla bla bla bla bla bla", 4, i, i + 2);
auto emplace_end = std::chrono::steady_clock::now();
intmax_t sum{0};
auto iterate_start = std::chrono::steady_clock::now();
for (auto const& e : container)
sum += e.age_;
auto iterate_end = std::chrono::steady_clock::now();
KeysComparator key_comparator;
Hasher hasher{true};
auto find_start = std::chrono::steady_clock::now();
auto &container_interface = container.get<1>();
auto isSucceeded = container_interface.count("the really long name of some animal 12345678910_" + std::to_string(elements_size-1),
hasher, key_comparator);
if (not isSucceeded)
std::cout << "WARN: Element has not been found." << std::endl;
auto find_end = std::chrono::steady_clock::now();
printStatistics(emplace_start, emplace_end, iterate_start, iterate_end, find_start, find_end, sum, "Animal_multi (boost multi_index)");
}
int main()
{
test();
return 0;
}
There are a number of bugs like in the move constructor:
name_ = name_; // oops this does nothing at all
Just follow Rule Of Zero. This will also inform you that std::string
copy/assignment are not noexcept
.
The name copy should probably be length-limited:
std::copy_n(name.begin(), std::min(name.size(), name_.size()), name_.data());
At this point I notice something that might explain your trouble: you don't NUL-terminate, nor make sure that the array is 0-initialized.
Indeed, just a few lines down I spot:
return std::hash<std::string_view>{}(name.data());
That's... UB! Your string_view might contain indeterminate data, but what's worse, you would NEVER have copied the terminating NUL character. So, std::string_view
will model a string with indeterminate length which WILL likely exceed 50.
Read here about Nasal Demons (UB)
Such are the perils of skipping standard library types for the old C craft.
So, here's the entirety of the class with equal/better characteristics:
using Name = std::array<char, 50>;
struct Animal {
Animal(std::string_view name, std::string description,
int leg = 0, int age = 0, double maxSpeed = 0) noexcept
: name_{0}, // zero initialize!
description_{std::move(description)},
leg_{leg},
age_{age},
maxSpeed_{maxSpeed}
{
constexpr auto Capacity = std::tuple_size<Name>::value;
constexpr auto MaxLen = Capacity - 1; // reserve NUL char
assert(name.length() < MaxLen);
std::copy_n(name.data(), std::min(name.length(), MaxLen), name_.data());
}
//Animal ( Animal&& animal ) noexcept = default;
//Animal ( Animal const& animal ) = default;
//Animal& operator= ( Animal&& animal ) noexcept = default;
//Animal& operator= ( Animal const& animal ) = default;
Name name_;
std::string description_;
int leg_{0};
int age_{0};
double maxSpeed_{0.0};
};
FixedString
This just screams for a better Name
type. How about, FixedString
:
template <size_t N> struct FixedString {
static_assert(N > 1); // require space for NUL char
FixedString(std::string_view s) : data_{0} {
if (s.length() >= N)
throw std::length_error("FixedString");
std::copy_n(s.data(), std::min(s.length(), N - 1), data());
}
std::string_view str() const { return { data(), size() }; }
operator std::string_view() const { return str(); }
auto data() const { return data_.data(); }
auto data() { return data_.data(); }
auto c_str() const { return data_.data(); }
auto c_str() { return data_.data(); }
auto begin() const { return data_.begin(); }
auto end() const { return data_.end(); }
auto begin() { return data_.begin(); }
auto end() { return data_.end(); }
size_t size() const {
auto terminator = std::memchr(data(), 0, data_.max_size());
return terminator
? static_cast<char const*>(terminator) - data()
: data_.max_size();
};
bool operator<(FixedString const& rhs) const { return str() < rhs.str(); }
bool operator==(FixedString const& rhs) const { return str() == rhs.str(); }
bool operator!=(FixedString const& rhs) const { return str() != rhs.str(); }
// optimizations:
bool operator<(std::string_view const& rhs) const { return str() < rhs.substr(0, N-1); }
bool operator==(std::string_view const& rhs) const { return str() == rhs.substr(0, N-1); }
bool operator!=(std::string_view const& rhs) const { return str() != rhs.substr(0, N-1); }
private:
std::array<char, N> data_;
};
Now you can simply
using Name = FixedString<50>;
And all your Name
s will magically (and safely) convert to and from string views.
using Name = FixedString<50>;
struct Animal {
Animal(std::string_view name, std::string description,
int leg = 0, int age = 0, double maxSpeed = 0) noexcept
: name_{name}, description_{std::move(description)},
leg_{leg}, age_{age}, maxSpeed_{maxSpeed}
{ }
Name name_;
std::string description_;
int leg_{0};
int age_{0};
double maxSpeed_{0.0};
};
This is the most important lesson I think I learned in my programming career: choosing the right abstraction leads to simplicity. Here, we evaporate two messy helpers:
using Hasher = std::hash<std::string_view>;
using KeysComparator = std::equal_to<Name>;
Boom. They do everything you had, but better.
After simplifying the whole thing to this it should become pretty obvious that a std::array<char, 50>
can never correctly contain names longer than 50 characters. Indeed, checking the insertions:
auto emplace_start = Now();
size_t duplicates = 0;
for (auto i = 0; i < elements_size; ++i) {
auto [_, ok] = container.emplace_back(
make_name(i), "bla bla bla bla bla bla bla bla bla bla bla bla bla",
4, i, i + 2);
if (!ok) ++duplicates;
}
if (duplicates) {
std::cerr << "Oops, " << duplicates << " duplicate keys not inserted\n";
}
auto emplace_end = Now();
Reveals that:
Oops, 999990 duplicate keys not inserted
Elapsed time emplace: 116.491ms iterate: 0.000145ms find: 0.000597ms, sum:45 , calculation for Animal_multi (boost multi_index)
At least, now you replaced Undefined Behaviour with constraint checks.
Of course, just increasing the name capacity fixes it: [https://wandbox.org/permlink/6AamJfXe76nYALfR)
using Name = FixedString<60>;
Prints:
Elapsed time emplace: 594.475ms iterate: 18.6076ms find: 0.003138ms, sum:499999500000 , calculation for Animal_multi (boost multi_index)
Alternatively you can throw on Name
construction with an overly long name: Live On Wandbox
FixedString(std::string_view s) : data_{0} {
if (s.length() >= N)
throw std::length_error("FixedString");
std::copy_n(s.data(), std::min(s.length(), N - 1), data());
}
Which duly prints
terminate called after throwing an instance of 'std::length_error'
what(): FixedString
This demo uses FixedString<60>
to avoid the key errors:
#include <boost/multi_index_container.hpp>
#include <boost/multi_index/hashed_index.hpp>
#include <boost/multi_index/random_access_index.hpp>
#include <boost/multi_index/member.hpp>
#include <iostream>
#include <iomanip>
#include <chrono>
using namespace std::chrono_literals;
int constexpr elements_size{ 1'000'000 };
template <size_t N> struct FixedString {
static_assert(N > 1); // require space for NUL char
FixedString(std::string_view s) : data_{0} {
if (s.length() >= N)
throw std::length_error("FixedString");
std::copy_n(s.data(), std::min(s.length(), N - 1), data());
}
std::string_view str() const { return { data(), size() }; }
operator std::string_view() const { return str(); }
auto data() const { return data_.data(); }
auto data() { return data_.data(); }
auto c_str() const { return data_.data(); }
auto c_str() { return data_.data(); }
auto begin() const { return data_.begin(); }
auto end() const { return data_.end(); }
auto begin() { return data_.begin(); }
auto end() { return data_.end(); }
size_t size() const {
auto terminator = std::memchr(data(), 0, data_.max_size());
return terminator
? static_cast<char const*>(terminator) - data()
: data_.max_size();
};
bool operator<(std::string_view const& rhs) const { return str() < rhs.substr(0, N-1); }
bool operator==(std::string_view const& rhs) const { return str() == rhs.substr(0, N-1); }
bool operator!=(std::string_view const& rhs) const { return str() != rhs.substr(0, N-1); }
bool operator<(FixedString const& rhs) const { return str() < rhs.str(); }
bool operator==(FixedString const& rhs) const { return str() == rhs.str(); }
bool operator!=(FixedString const& rhs) const { return str() != rhs.str(); }
private:
std::array<char, N> data_;
};
using Name = FixedString<60>;
struct Animal {
Animal(std::string_view name, std::string description,
int leg = 0, int age = 0, double maxSpeed = 0) noexcept
: name_{name}, description_{std::move(description)},
leg_{leg}, age_{age}, maxSpeed_{maxSpeed}
{ }
Name name_;
std::string description_;
int leg_{0};
int age_{0};
double maxSpeed_{0.0};
};
using Hasher = std::hash<std::string_view>;
using KeysComparator = std::equal_to<Name>;
using Clock = std::chrono::steady_clock;
using Duration = Clock::duration;
static auto Now = Clock::now;
void printStatistics(Duration emplace, Duration iterate, Duration find,
intmax_t const sum, std::string target)
{
std::cout << "Elapsed time"
<< " emplace: " << (emplace/1.0ms) << "ms"
<< " iterate: " << (iterate/1.0ms) << "ms"
<< " find: " << (find/1.0ms) << "ms"
<< ", sum:" << sum
<< " , calculation for " << target
<< std::endl;
}
void test() {
namespace bmi = boost::multi_index;
using Animal_multi = bmi::multi_index_container<Animal,
bmi::indexed_by<
bmi::random_access<>,
bmi::hashed_unique<
bmi::tag<struct by_name>,
bmi::member<Animal, Name, &Animal::name_>, Hasher, KeysComparator>
>
>;
Animal_multi container;
auto make_name = [](size_t id) {
return "the really long name of some animal 12345678910_" + std::to_string(id);
};
auto emplace_start = Now();
size_t duplicates = 0;
for (auto i = 0; i < elements_size; ++i) {
auto [_, ok] = container.emplace_back(
make_name(i), "bla bla bla bla bla bla bla bla bla bla bla bla bla",
4, i, i + 2);
if (!ok) ++duplicates;
}
if (duplicates) {
std::cerr << "Oops, " << duplicates << " duplicate keys not inserted\n";
}
auto emplace_end = Now();
intmax_t sum{ 0 };
auto iterate_start = Now();
for (auto const& e : container) {
sum += e.age_;
}
auto iterate_end = Now();
auto find_start = Now();
{
auto& name_idx = container.get<by_name>();
auto last_key = make_name(elements_size - 1);
if (name_idx.count(std::string_view(last_key)) == 0u) {
std::cout << "WARN: Element has not been found." << std::endl;
}
}
auto find_end = Now();
printStatistics(
emplace_end - emplace_start,
iterate_end - iterate_start,
find_end - find_start, sum,
"Animal_multi (boost multi_index)");
}
int main() { test(); }