c++unordered-set

How to declare/use an `unordered_set` for triplets (`tuple`) using custom comparator?


How to declare/use an unordered_set for triplets (tuple) using custom comparator?

I need to store triplets of float (handled as tuple) in a set to check for potential duplicates. As it's about float, I guess using regular compare with == will not work so customizing compare is needed.

This minimal code doesn't compile:

>> cat unordered_set_triplet.cpp 
#include <unordered_set>
#include <tuple>
#include <limits> // numeric_limits
#include <cmath> // fabs
#include <functional> // hash

using triplet = std::tuple<float, float, float>;
bool triplet_equal(triplet const & lhs, triplet const & rhs) {
  float const eps = std::numeric_limits<float>::epsilon();
  if (std::fabs(std::get<0>(lhs) - std::get<0>(rhs)) > eps) return false;
  if (std::fabs(std::get<1>(lhs) - std::get<1>(rhs)) > eps) return false;
  if (std::fabs(std::get<2>(lhs) - std::get<2>(rhs)) > eps) return false;
  return true;
}
using unordered_set_triplet = std::unordered_set<triplet,
                                                 std::hash<triplet>,
                                                 decltype(triplet_equal)>;

int main() {
  //unordered_set_triplet s; // Compilation: KO...
  unordered_set_triplet s(10, std::hash<triplet>, triplet_equal);
  s.insert({1.f, 2.f, 3.f});
}

I get:

>> g++ -std=c++20 -o unordered_set_triplet unordered_set_triplet.cpp 
In file included from /usr/include/c++/12/bits/hashtable.h:35,
                 from /usr/include/c++/12/unordered_set:46,
                 from unordered_set_triplet.cpp:1:
/usr/include/c++/12/bits/hashtable_policy.h: In instantiation of ‘struct std::__detail::_Hashtable_ebo_helper<0, bool(const std::tuple<float, float, float>&, const std::tuple<float, float, float>&), false>’:
/usr/include/c++/12/bits/hashtable_policy.h:1631:12:   required from ‘struct std::__detail::_Hashtable_base<std::tuple<float, float, float>, std::tuple<float, float, float>, std::__detail::_Identity, bool(const std::tuple<float, float, float>&, const std::tuple<float, float, float>&), std::hash<std::tuple<float, float, float> >, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Hashtable_traits<true, true, true> >’
/usr/include/c++/12/bits/hashtable.h:182:11:   required from ‘class std::_Hashtable<std::tuple<float, float, float>, std::tuple<float, float, float>, std::allocator<std::tuple<float, float, float> >, std::__detail::_Identity, bool(const std::tuple<float, float, float>&, const std::tuple<float, float, float>&), std::hash<std::tuple<float, float, float> >, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, true, true> >’
/usr/include/c++/12/bits/unordered_set.h:100:18:   required from ‘class std::unordered_set<std::tuple<float, float, float>, std::hash<std::tuple<float, float, float> >, bool(const std::tuple<float, float, float>&, const std::tuple<float, float, float>&)>’
unordered_set_triplet.cpp:21:26:   required from here
/usr/include/c++/12/bits/hashtable_policy.h:1204:11: error: data member ‘std::__detail::_Hashtable_ebo_helper<0, bool(const std::tuple<float, float, float>&, const std::tuple<float, float, float>&), false>::_M_tp’ invalidly declared function type
 1204 |       _Tp _M_tp{};
      |           ^~~~~
/usr/include/c++/12/bits/hashtable.h: In instantiation of ‘class std::_Hashtable<std::tuple<float, float, float>, std::tuple<float, float, float>, std::allocator<std::tuple<float, float, float> >, std::__detail::_Identity, bool(const std::tuple<float, float, float>&, const std::tuple<float, float, float>&), std::hash<std::tuple<float, float, float> >, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<true, true, true> >’:
/usr/include/c++/12/bits/unordered_set.h:100:18:   required from ‘class std::unordered_set<std::tuple<float, float, float>, std::hash<std::tuple<float, float, float> >, bool(const std::tuple<float, float, float>&, const std::tuple<float, float, float>&)>’
unordered_set_triplet.cpp:21:26:   required from here
/usr/include/c++/12/bits/hashtable.h:665:7: error: function returning a function
  665 |       key_eq() const
      |       ^~~~~~
In file included from /usr/include/c++/12/unordered_set:47:
/usr/include/c++/12/bits/unordered_set.h: In instantiation of ‘class std::unordered_set<std::tuple<float, float, float>, std::hash<std::tuple<float, float, float> >, bool(const std::tuple<float, float, float>&, const std::tuple<float, float, float>&)>’:
unordered_set_triplet.cpp:21:26:   required from here
/usr/include/c++/12/bits/unordered_set.h:632:7: error: function returning a function
  632 |       key_eq() const
      |       ^~~~~~
unordered_set_triplet.cpp: In function ‘int main()’:
unordered_set_triplet.cpp:21:49: error: expected primary-expression before ‘,’ token
   21 |   unordered_set_triplet s(10, std::hash<triplet>, triplet_equal);
      |                                                 ^

How to fix this?

EDIT

Doesn't work either using (ordered) set:

>> cat set_triplet.cpp 
#include <iostream>
#include <set>
#include <tuple>
#include <limits> // numeric_limits
#include <cmath> // fabs
#include <functional> // hash

using triplet = std::tuple<float, float, float>;
bool triplet_equal(triplet const & lhs, triplet const & rhs) {
  float const eps = std::numeric_limits<float>::epsilon();
  if (std::fabs(std::get<0>(lhs) - std::get<0>(rhs)) > eps) return false;
  if (std::fabs(std::get<1>(lhs) - std::get<1>(rhs)) > eps) return false;
  if (std::fabs(std::get<2>(lhs) - std::get<2>(rhs)) > eps) return false;
  return true;
}
using set_triplet = std::set<triplet, std::hash<triplet>, decltype(triplet_equal)>;

int main() {
  //set_triplet s; // Compilation: KO...
  set_triplet s(10, std::hash<triplet>, triplet_equal);
  s.insert({1.f, 2.f, 3.f});
  s.insert({1.0000001f, 2.0000001f, 3.0000001f});
  for (auto const & t : s) std::cout << std::get<0>(t) << ", " << std::get<1>(t) << ", " << std::get<2>(t) << std::endl;
}

What container could be appropriate to use? triplet can be seen a 3D point (XYZ): I need to handle/detect duplicate points.

SOLUTION: USING SET (NOT UNORDERED SET) AND INT (NOT FLOAT) WITHOUT IMPLEMENTING LESS

Using tuples made of integer i built this way i = (int) 1000000 * f from float f and using set (as operator< will respect strict ordering up to 6-digit precision after multiplication by 1000000).

>> cat set_uint32_triplet.cpp 
#include <iostream>
#include <set>
#include <tuple>

using triplet_uint32 = std::tuple<uint32_t, uint32_t, uint32_t>;
using triplet_float = std::tuple<float, float, float>;
triplet_uint32 convert(triplet_float const & f) {
  uint32_t precision = 1000000; // Allow for 6-digit precision.
  uint32_t x = (uint32_t) (std::get<0>(f) * precision);
  uint32_t y = (uint32_t) (std::get<1>(f) * precision);
  uint32_t z = (uint32_t) (std::get<2>(f) * precision);
  return {x, y, z};
}

int main() {
  triplet_float pt1 = {1.f, 2.f, 3.f};
  triplet_float pt2 = {1.0000001f, 2.0000001f, 3.0000001f}; // Considered     duplicate with pt1.
  triplet_float pt3 = {1.000001f,  2.000001f,  3.000001f};  // Considered NOT duplicate with pt1.

  std::set<triplet_uint32> s;
  s.insert(convert(pt1));
  s.insert(convert(pt2));
  s.insert(convert(pt3));
  std::cout << "set size " << s.size() << std::endl;
  for (auto const & t : s) std::cout << "set item " << std::get<0>(t) << ", " << std::get<1>(t) << ", " << std::get<2>(t) << ", " << std::endl;
}

>> g++ -std=c++20 -o set_uint32_triplet set_uint32_triplet.cpp

>> ./set_uint32_triplet 
set size 2
set item 1000000, 2000000, 3000000, 
set item 1000000, 2000001, 3000001, 

SOLUTION: USING SET (NOT UNORDERED SET) AND INT (NOT FLOAT) WITH IMPLEMENTING LESS

>> cat set_uint32_triplet_less.cpp 
#include <iostream>
#include <set>
#include <tuple>

#define FLOAT_TO_INT(x) ((x)>=0?(int32_t)((x)+0.5):(int32_t)((x)-0.5))

using triplet_int32 = std::tuple<int32_t, int32_t, int32_t>;
using triplet_float = std::tuple<float, float, float>;
triplet_int32 convert(triplet_float const & f) {
  int32_t precision = 1000; // Allow for 3-digit precision.
  int32_t x = FLOAT_TO_INT(std::get<0>(f) * precision);
  int32_t y = FLOAT_TO_INT(std::get<1>(f) * precision);
  int32_t z = FLOAT_TO_INT(std::get<2>(f) * precision);
  //std::cout.precision(10);
  //std::cout << "convert: " << std::get<0>(f) << ", " << std::get<1>(f) << ", " << std::get<2>(f);
  //std::cout << " - " << std::get<0>(f) * precision << ", " << std::get<1>(f) * precision << ", " << std::get<2>(f) * precision;
  //std::cout << " - " << FLOAT_TO_INT(std::get<0>(f) * precision) << ", " << FLOAT_TO_INT(std::get<1>(f) * precision) << ", " << FLOAT_TO_INT(std::get<2>(f) * precision);
  //std::cout << " - " << x << ", " << y << ", " << z << std::endl;
  return {x, y, z};
}
struct less {
  bool operator()(triplet_int32 const & lhs,
                  triplet_int32 const & rhs) const {
    bool res = true; // lhs < rhs.
    if      (std::get<0>(lhs) >= std::get<0>(rhs)) res = false;
    else if (std::get<1>(lhs) >= std::get<1>(rhs)) res = false;
    else if (std::get<2>(lhs) >= std::get<2>(rhs)) res = false;
    //std::cout << "  less: " << std::get<0>(lhs) << ", " << std::get<1>(lhs) << ", " << std::get<2>(lhs);
    //std::cout <<    " - " << std::get<0>(rhs) << ", " << std::get<1>(rhs) << ", " << std::get<2>(rhs);
    //std::cout << " - res " << res << std::endl;
    return res; // lhs < rhs.
  }
};

int main() {
  triplet_float pt1 = {1.f, 2.f, 3.f};
  triplet_float pt2 = {1.0001f, 2.0001f, 3.0001f}; // Considered     duplicate with pt1.
  triplet_float pt3 = {1.001f,  2.001f,  3.001f};  // Considered NOT duplicate with pt1.

  std::set<triplet_int32, less> s;
  s.insert(convert(pt1));
  s.insert(convert(pt2));
  s.insert(convert(pt3));
  std::cout << "set size " << s.size() << std::endl;
  for (auto const & t : s) std::cout << "set item " << std::get<0>(t) << ", " << std::get<1>(t) << ", " << std::get<2>(t) << ", " << std::endl;
}
>> g++ -std=c++20 -o set_uint32_triplet_less set_uint32_triplet_less.cpp
>> ./set_uint32_triplet_less 
set size 2
set item 1000, 2000, 3000, 
set item 1001, 2001, 3001, 

Solution

  • Problems with KeyEqual

    Firstly, the KeyEqual provided to the std::unordered_set can't be a function, and that's what you're trying to do with decltype(triplet_equal). However, it could be a function pointer. Normally, you should use a function object as follows:

    struct triplet_equal {
        // note: static constexpr only since C++23, otherwise remove those two
        static constexpr bool operator()(triplet const & lhs, triplet const & rhs) const {
            float const eps = std::numeric_limits<float>::epsilon();
            if (std::fabs(std::get<0>(lhs) - std::get<0>(rhs)) > eps) return false;
            if (std::fabs(std::get<1>(lhs) - std::get<1>(rhs)) > eps) return false;
            if (std::fabs(std::get<2>(lhs) - std::get<2>(rhs)) > eps) return false;
            return true;
        }
    };
    
    // ...
    std::unordered_set<triplet, std::hash<triplet>, triplet_equal> s(10);
    

    You don't have to provide any value for the hash or for triplet_equal to the constructor because they are default-constructible.

    Problems with Hash

    The next huge issue is that the standard library has no std::hash specialization for tuples. Look into Generic hash for tuples in unordered_map / unordered_set if you want to make your own.

    However, even if there was one, the problem remains that two equal tuples (where equality is determined by triplet_equal) must also have the same hash. You would have to specialize std::hash yourself so that two equal tuples always have the same hash, despite floating-point imprecision. You might be able to do it by quantizing floats, but I imagine it would be very difficult to do correctly.

    Alternative: use std::set and provide a Compare

    It would be much easier to use std::set, which only requires you to implement a less-than comparison:

    // checks whether x < y after quantization to a multiple of epsilon
    constexpr float eps_less_than(float x, float y) {
        constexpr float e = std::numeric_limits<float>::epsilon();
        // use simple comparison if numbers are far apart
        float d = x - y;
        if (std::fabs(d) >= 2 * e) {
            return d < 0;
        }
        return std::floor(x * (1 / e)) < std::floor(y * (1 / e));
    }
    
    // lexicographical comparison
    struct triplet_less {
        // constexpr since C++23
        constexpr bool operator()(triplet const & lhs, triplet const & rhs) const {
            if (eps_less_than(std::get<0>(lhs), std::get<0>(rhs))) return true;
            if (eps_less_than(std::get<0>(rhs), std::get<0>(lhs))) return false;
    
            if (eps_less_than(std::get<1>(lhs), std::get<1>(rhs))) return true;
            if (eps_less_than(std::get<1>(rhs), std::get<1>(lhs))) return false;
    
            if (eps_less_than(std::get<2>(lhs), std::get<2>(rhs))) return true;
            return false;
        }
    };
    
    int main() {
        std::set<triplet, triplet_less> s;
        s.insert({1.f, 2.f, 3.f});
    }
    

    See live example at Compiler Explorer

    Further Notes

    It would be much better not to use std::tuple, but use a simple aggregate type as follows:

    struct vec3 {
        float x, y, z;
        // C++20: default all comparison operators
        // (you still need a custom vec3_equal to deal with precision issues)
        friend auto constexpr operator<=>(vec3 const&, vec3 const&) = default;
    };
    

    With defaulted comparison operators, it's very easy to get all the functionality of std::tuple, and you can write lhs.x instead of needing std::get<0>(lhs) and other annoyances.