c++sortingstlstrict-weak-ordering

Using comparator to sort set of unique pairs with different criterias for uniqueness and less than


First of all I've tried to search for similar questions but I didn't find any response explaining what could my problem be.

The problem is the following: Given a set of N nodes with coordinates (x,y,z) sort them using a 4th value F as fast as possible.

I want to use a std::set with a custom comparator for this purpose because it has O(log(N)) complexity. I know I could also try a std::vector and a call to std::sort on std::vector but in theory is a slower operation.

Why this? Because I'm constantly inserting elements in the set, changing the F value (it means I change the value and to reorder the element in the container I erase and re-insert it) and I want to take the element with the less F value (that's the element at the front of the container).

But let's go with the std::set problem.

The coordinates define the uniqueness property, following the strict weak ordering rules,it means that a and b are the considered the same object if

!comp(a,b) && !comp(b,a)

The problem is related to define a uniqueness criteria based on the coordinates and a sorting criteria based on the F value. I don't want the set to store two elements with the same coordiantes, but I want it to be allow to store two elements with different coordinates but same F value

The comparator should also satisfais the following three properties:

  1. Irreflexivity x < x false
  2. Assymetry x < y true implies y < x false
  3. Transitivy x < y && y < z implies x < z true

So knowing all these properties I've been working around with the following example implementation:

Some definitions

class Node;
struct NodeComparator;
using NodePair = std::pair<Node *, int>;
using NodeSet  = std::set<NodePair, NodeComparator>;

Here I'm using pointers for convenience

Class Node

class Node
{

public:
    Node()
    {
    }
    Node(int _x, int _y, int _z, int _val) : x(_x), y(_y), z(_z), value(_val)
    {
    }

    int x, y, z;
    int value;

    friend inline std::ostream &operator<<(std::ostream &os, const Node &dt)
    {
        os << "[" << dt.x << ", " << dt.y << ", " << dt.z << "], [" << dt.value << "]";
        return os;
    }
    friend bool operator==(const Node &_lhs, const Node &_rhs){
        if( _lhs.x == _rhs.x &&
            _lhs.y == _rhs.y &&
            _lhs.z == _rhs.z ){
                return true;
            }
        return false;
    }
};

Here the operator << is overloaded only for debugging purposes

The comparator


struct NodeComparator
{
    bool operator()(const NodePair &_lhs, const NodePair &_rhs) const
    {
        if( _lhs.first == nullptr || _rhs.first == nullptr )
            return false;
        /*
        This first check implements uniqueness. 
        If _lhs == _rhs --> comp(_lhs,_rhs) == false && comp(_rhs, _lhs) == false
        So ( !comp(l,r) && !comp(r,l) ) == true
        */
        if( *_lhs.first == *_rhs.first) 
            return false;
        
        int ret = _lhs.second - _rhs.second;
        return ret < 0;
    }
};

I guess one problem could be the case of two nodes with different coordinates but same F value

Full example with concrete cases

Ìn this example I use the above classes to insert/find/erase some elements, but has it is show on the output, it's not behaving as expected:

#include <iostream>
#include <set>
#include <vector>
#include <algorithm>
#include <tuple>

class Node;
struct NodeComparator;
using NodePair = std::pair<Node *, int>;
using NodeSet  = std::set<NodePair, NodeComparator>;
class Node
{

public:
    Node()
    {
    }
    Node(int _x, int _y, int _z, int _val) : x(_x), y(_y), z(_z), value(_val)
    {
    }

    int x, y, z;
    int value;

    friend inline std::ostream &operator<<(std::ostream &os, const Node &dt)
    {
        os << "[" << dt.x << ", " << dt.y << ", " << dt.z << "], [" << dt.value << "]";
        return os;
    }
};

struct NodeComparator
{
    bool operator()(const NodePair &_lhs, const NodePair &_rhs) const
    {
        /*
        This first check implements uniqueness. 
        If _lhs == _rhs --> comp(_lhs,_rhs) == false && comp(_rhs, _lhs) == false
        So ( !comp(l,r) && !comp(r,l) ) == true
        */
        if(_lhs == _rhs) 
            return false;
        
        int ret = _lhs.second - _rhs.second;
        return ret < 0;
    }
};
int main(int argc, char **argv)
{
    Node n1(0, 2, 4, 12), 
         n2(2, 4, 5, 25), 
         n3(0, 1, 4, 34), 
         n4(0, 1, 4, 20), 
         n5(0, 1, 5, 20),
         n6(0, 2, 4, 112);

    NodeSet set;

    set.insert({&n1, n1.value});
    set.insert({&n2, n2.value});
    set.insert({&n3, n3.value});
    set.insert({&n4, n4.value}); //Should not be inserted because it already exists n3 with same coords
    set.insert({&n5, n5.value});

    //Try to insert multiple times a previously inserted node (n1 coords is == n6 coords)
    //It should not be inserted because it already exists one node with the same coords (n1)
    set.insert({&n6, n6.value});
    set.insert({&n6, n6.value});
    set.insert({&n6, n6.value});
    set.insert({&n6, n6.value});
    set.insert({&n6, 0});
    set.insert({&n6, 1});

    if (set.find({&n4, n4.value}) != set.end())
        std::cout << "Found n4" << std::endl;
    
    auto it = set.erase({&n4, 20});
    std::cout << "It value (elements erased): " << it << std::endl;

    if (set.find({&n4, n4.value}) != set.end())
        std::cout << "Found n4 after removal" << std::endl;
    
    std::cout << "Final Set content: " << std::endl;
    for (auto &it : set)
        std::cout << *it.first << std::endl;


    return 0;
}

To compile it with C++11 or above: g++ -o main main.cpp

Output:

Found n4
It value (elements erased): 1
Final Set content: 
[0, 2, 4], [12]
[2, 4, 5], [25]
[0, 1, 4], [34]
[0, 2, 4], [112]

**Expected Output: ** Correspond to elements n1, n5, n2, n3 ordered from the one with less F (n1) to the one with the higher F (n3).

Final Set content: 
[0, 2, 4], [12]
[0, 1, 5], [20]
[2, 4, 5], [25]
[0, 1, 4], [34]

I would appreciate a lot any help or ideas and alternatives of implementation. Thanks


Solution

  • Unfortunately, your requirements cannot be fulfilled by one std::set alone. The std::set uses the same comparator for sorting and uniqueness. The comparator has no state, meaning, you cannot compare one time with the first and the next time with a second condition. That will not work.

    So, you need to use 2 containers, like first a std::unordered_set using a comparator for equal coordinates and the a second container for the sorting, like a std::multiset..

    You could also use a std::unordered_map in conjunction with a std::multiset.

    Or you create your own container as a class and try to optimize performance.

    Let me show you an example using the combination of std::unordered_set and std::multiset. It will be fast, because the std::unordered_set uses hashes.

    #include <iostream>
    #include <unordered_set>
    #include <set>
    #include <array>
    #include <vector>
    
    using Coordinate = std::array<int, 3>;
    
    struct Node {
        Coordinate coordinate{};
        int value{};
        bool operator == (const Node& other) const { return coordinate == other.coordinate; }
        friend std::ostream& operator << (std::ostream& os, const Node& n) {
            return os << "[" << n.coordinate[0] << ", " << n.coordinate[1] << ", " << n.coordinate[2] << "], [" << n.value << "]"; }
    };
    struct CompareOnSecond { bool operator ()(const Node& n1, const Node& n2)const { return n1.value < n2.value; } };
    struct Hash {size_t operator()(const Node& n) const {return n.coordinate[0] ^ n.coordinate[1] ^ n.coordinate[2];} };
    
    using UniqueNodes = std::unordered_set<Node, Hash>;
    using Sorter = std::multiset<Node, CompareOnSecond>;
    
    int main() {
        // a vector with some test nodes
        std::vector<Node> testNodes{
        { {{0, 2, 4}}, 12 },
        { {{2, 4, 5}}, 25 },
        { {{0, 1, 4}}, 34 },
        { {{0, 1, 4}}, 20 },
        { {{0, 1, 5}}, 20 },
        { {{0, 2, 4}}, 112 } };
    
        // Here we will store the unique nodes
        UniqueNodes uniqueNodes{};
        for (const Node& n : testNodes) uniqueNodes.insert(n);
    
        // And now, do the sorting
        Sorter sortedNodes(uniqueNodes.begin(), uniqueNodes.end());
    
        // Some test functions
        std::cout << "\nSorted unique nodes:\n";
        for (const Node& n : sortedNodes) std::cout << n << '\n';
    
        // find a node
        if (sortedNodes.find({ {{0, 1, 4}}, 20 }) != sortedNodes.end())
            std::cout << "\nFound n4\n";
    
        // Erase a node
        auto it = sortedNodes.erase({ {{0, 1, 4}}, 20 });
        std::cout << "It value (elements erased): " << it << '\n';
    
        // Was it really erased?
        if (sortedNodes.find({ {{0, 1, 4}}, 20 }) != sortedNodes.end())
            std::cout << "\nFound n4 after removal\n";
    
        // Show final result
        std::cout << "\nFinal Set content:\n";
        for (const Node& n : sortedNodes) std::cout << n << '\n';
    }