When doing union by size, you compare the size of the nodes that you're trying to unify. I found this code on codeforces that shows how to build a DSU with path compression and union by size:
#include <bits/stdc++.h>
using namespace std;
class DSU
{
public:
unordered_map<int, int> parent;
unordered_map<int, int> size;
void make_set(int v)
{
parent[v] = v;
size[v] = 1;
}
int find_set(int v)
{
if (v == parent[v])
return v;
return parent[v] = find_set(parent[v]);
}
void union_sets(int a, int b)
{
a = find_set(a);
b = find_set(b);
if (a != b)
{
if (size[a] < size[b])
swap(a, b);
// a big, b small
// attach small tree to big tree
parent[b] = a;
size[a] += size[b];
}
}
};
The question is, don't you have to modify find_set
to decrease the size of the parent node since the parent node will be pointing to the root now and not to another child node ?
Wouldn't this be the correct implementation of find_set
?
int find_set(int v)
{
if (v == parent[v])
return v;
size[parent[v]] -= 1;
return parent[v] = find_set(parent[v]);
}
Yes, but you gain nothing since that parent is not the representative of the set.
That is,
int find_set(int v)
{
if (v == parent[v])
return v;
// This parent is not the representative, changing
// the size is superfluous as it will no longer
// be referenced again.
size[parent[v]] -= 1;
return parent[v] = find_set(parent[v]);
}