Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions src/support/disjoint_sets.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Copyright 2024 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef wasm_support_disjoint_sets_h
#define wasm_support_disjoint_sets_h

#include <cassert>
#include <cstddef>
#include <vector>

namespace wasm {

// A disjoint set forest (a.k.a. union-find) implementation. See
// https://en.wikipedia.org/wiki/Disjoint-set_data_structure.
struct DisjointSets {
struct ElemInfo {
// The index of the parent element, or the index of the element itself if it
// has no parent.
size_t parent;
// An upper bound on the height of the tree rooted at this element.
size_t rank;
};
std::vector<ElemInfo> info;

// Add an element and return its index.
size_t addSet() {
size_t ret = info.size();
info.push_back({ret, 0});
return ret;
}

// Get the representative element of the set to which `elem` belongs.
size_t getRoot(size_t elem) {
assert(elem < info.size());
size_t root = elem;
// Follow parent pointers up to the root.
for (; info[root].parent != root; root = info[root].parent) {
}
// Compress the path to make subsequent getRoots of this set faster.
while (elem != root) {
size_t parent = info[elem].parent;
info[elem].parent = root;
elem = parent;
}
return root;
}

// Join the sets to which the elements belong and return the representative
// element of the union.
size_t getUnion(size_t elem1, size_t elem2) {
assert(elem1 < info.size() && elem2 < info.size());
size_t root1 = getRoot(elem1);
size_t root2 = getRoot(elem2);
if (root1 == root2) {
// Already in the same set.
return root1;
}
// Canonicalize so that root1 has the greater rank.
if (info[root1].rank < info[root2].rank) {
std::swap(root1, root2);
}
// Merge the trees, smaller into larger.
info[root2].parent = root1;
// If the ranks were equal, the new root has a larger rank.
if (info[root1].rank == info[root2].rank) {
++info[root1].rank;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
++info[root1].rank;
info[root1].rank++;

Unless there is some reason for it? To me the default ++ reads more clearly in general.

}
return root1;
}
};

} // namespace wasm

#endif // wasm_support_disjoint_sets_h
1 change: 1 addition & 0 deletions test/gtest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ include_directories(../../src/wasm)
set(unittest_SOURCES
cfg.cpp
dfa_minimization.cpp
disjoint_sets.cpp
json.cpp
lattices.cpp
possible-contents.cpp
Expand Down
106 changes: 106 additions & 0 deletions test/gtest/disjoint_sets.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/*
* Copyright 2024 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "support/disjoint_sets.h"
#include "gtest/gtest.h"

using namespace wasm;

TEST(DisjointSetsTest, NewSets) {
DisjointSets sets;
auto elem1 = sets.addSet();
auto elem2 = sets.addSet();
EXPECT_NE(elem1, elem2);

auto root1 = sets.getRoot(elem1);
EXPECT_EQ(elem1, root1);

auto root2 = sets.getRoot(elem2);
EXPECT_EQ(elem2, root2);
}

TEST(DisjointSetsTest, Union) {
DisjointSets sets;
auto elem1 = sets.addSet();
auto elem2 = sets.addSet();
auto root = sets.getUnion(elem1, elem2);
EXPECT_TRUE(root == elem1 || root == elem2);

auto root1 = sets.getRoot(elem1);
auto root2 = sets.getRoot(elem2);
EXPECT_EQ(root1, root);
EXPECT_EQ(root2, root);
}

TEST(DisjointSetsTest, TwoUnions) {
DisjointSets sets;
auto elem1 = sets.addSet();
auto elem2 = sets.addSet();
auto elem3 = sets.addSet();
auto elem4 = sets.addSet();

auto rootA = sets.getUnion(elem1, elem3);
auto rootB = sets.getUnion(elem2, elem4);
EXPECT_EQ(sets.getRoot(elem1), rootA);
EXPECT_EQ(sets.getRoot(elem2), rootB);
EXPECT_EQ(sets.getRoot(elem3), rootA);
EXPECT_EQ(sets.getRoot(elem4), rootB);
EXPECT_NE(rootA, rootB);
}

TEST(DisjointSetsTest, UnionList) {
constexpr size_t count = 16;
DisjointSets sets;
size_t elems[count];
for (size_t i = 0; i < count; ++i) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, can we standardize on i++ for such increments?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer to standardize on ++i. For integers it obviously doesn't matter, but for nontrivial iterators it can make a big difference because postincrement requires making a copy of the iterator while preincrement does not.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general that is true, but in a for loop like this, the output of ++i / i++ is not being read? The next time i is read is in the condition check, which reads the updated i anyhow.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anyhow, I don't mean to block this PR on it, but I think it's separately worth making a choice here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sg 👍

elems[i] = sets.addSet();
}

for (size_t i = 1; i < count; ++i) {
sets.getUnion(elems[i], elems[i - 1]);
}

auto root = sets.getRoot(elems[0]);
for (size_t rep = 0; rep < 2; ++rep) {
for (size_t i = 0; i < count; ++i) {
auto currRoot = sets.getRoot(elems[i]);
EXPECT_EQ(currRoot, root);
}
}
}

TEST(DisjointSetsTest, UnionTree) {
constexpr size_t count = 16;
DisjointSets sets;
size_t elems[count];
for (size_t i = 0; i < count; ++i) {
elems[i] = sets.addSet();
}

for (size_t stride = 2; stride <= count; stride *= 2) {
for (size_t i = 0; i < count; i += stride) {
sets.getUnion(elems[i], elems[i + stride / 2]);
}
}

auto root = sets.getRoot(elems[0]);
for (size_t rep = 0; rep < 2; ++rep) {
for (size_t i = 0; i < count; ++i) {
auto currRoot = sets.getRoot(elems[i]);
EXPECT_EQ(currRoot, root);
}
}
}