Google OR-Tools v9.15
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
connected_components.h
Go to the documentation of this file.
1// Copyright 2010-2025 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14// Finds the connected components in an undirected graph:
15// https://en.wikipedia.org/wiki/Connected_component_(graph_theory)
16//
17// If you have a fixed graph where the node are dense integers, use
18// GetConnectedComponents(): it's very fast and uses little memory.
19//
20// If you have a more dynamic scenario where you want to incrementally
21// add nodes or edges and query the connectivity between them, use the
22// [Dense]ConnectedComponentsFinder class, which uses the union-find algorithm
23// aka disjoint sets: https://en.wikipedia.org/wiki/Disjoint-set_data_structure.
24
25#ifndef UTIL_GRAPH_CONNECTED_COMPONENTS_H_
26#define UTIL_GRAPH_CONNECTED_COMPONENTS_H_
27
28#include <functional>
29#include <map>
30#include <memory>
31#include <set>
32#include <type_traits>
33#include <vector>
34
35#include "absl/container/flat_hash_map.h"
36#include "absl/container/flat_hash_set.h"
37#include "absl/hash/hash.h"
38#include "absl/meta/type_traits.h"
41
42namespace util {
43// Generic version of GetConnectedComponents() (see below) that supports other
44// integer types, e.g. int64_t for huge graphs with more than 2^31 nodes.
45template <class UndirectedGraph, class NodeType>
46std::vector<NodeType> GetConnectedComponentsTpl(NodeType num_nodes,
47 const UndirectedGraph& graph);
48
49// Finds the connected components of the graph, using BFS internally.
50// Works on any *undirected* graph class whose nodes are dense integers and that
51// supports the [] operator for adjacency lists: graph[x] must be an integer
52// container listing the nodes that are adjacent to node #x.
53// Example: std::vector<std::vector<int>>.
54//
55// "Undirected" means that for all y in graph[x], x is in graph[y].
56//
57// Returns the mapping from node to component index. The component indices are
58// deterministic: Component #0 will be the one that has node #0, component #1
59// the one that has the lowest-index node that isn't in component #0, and so on.
60//
61// Example on the following 6-node graph: 5--3--0--1 2--4
62// vector<vector<int>> graph = {{1, 3}, {0}, {4}, {0, 5}, {2}, {3}};
63// GetConnectedComponents(graph); // returns [0, 0, 1, 0, 1, 0].
64template <class UndirectedGraph>
65std::vector<int> GetConnectedComponents(int num_nodes,
66 const UndirectedGraph& graph) {
67 return GetConnectedComponentsTpl(num_nodes, graph);
68}
69} // namespace util
70
71// NOTE(user): The rest of the functions below should also be in namespace
72// util, but for historical reasons it hasn't been done yet.
73
74// A connected components finder that only works on dense ints.
76 public:
78
79 // We support copy and move construction.
81 default;
83 const DenseConnectedComponentsFinder&) = default;
86 default;
87
88 // The main API is the same as ConnectedComponentsFinder (below): see the
89 // homonymous functions there.
90 bool AddEdge(int node1, int node2);
91 bool Connected(int node1, int node2);
92 int GetSize(int node);
93 int GetNumberOfComponents() const { return num_components_; }
94 int GetNumberOfNodes() const { return parent_.size(); }
95
96 // Gets the current set of root nodes in sorted order. Runs in amortized
97 // O(#components) time.
98 const std::vector<int>& GetComponentRoots();
99
100 // Sets the number of nodes in the graph. The graph can only grow: this
101 // dies if "num_nodes" is lower or equal to any of the values ever given
102 // to AddEdge(), or lower than a previous value given to SetNumberOfNodes().
103 // You need this if there are nodes that don't have any edges.
104 void SetNumberOfNodes(int num_nodes);
105
106 // Returns the root of the set for the given node. node must be in
107 // [0;GetNumberOfNodes()-1].
108 // Non-const because it does path compression internally.
109 int FindRoot(int node);
110
111 // Returns the parent of the given node, in the chain towards the root of the
112 // set for this node. Returns the node itself if it is a root.
113 int GetParent(int node) const { return parent_[node]; }
114
115 // Returns the same as GetConnectedComponents().
116 std::vector<int> GetComponentIds();
117
118 private:
119 // parent[i] is the id of an ancestor for node i. A node is a root iff
120 // parent[i] == i.
121 std::vector<int> parent_;
122 // If i is a root, component_size_[i] is the number of elements in the
123 // component. If i is not a root, component_size_[i] is meaningless.
124 std::vector<int> component_size_;
125 // rank[i] is the depth of the tree.
126 std::vector<int> rank_;
127 // Number of connected components.
128 int num_components_ = 0;
129 // The current roots. This is maintained lazily by GetComponentRoots().
130 std::vector<int> roots_;
131 // The number of nodes that existed the last time GetComponentRoots() was
132 // called.
133 int num_nodes_at_last_get_roots_call_ = 0;
134};
135
136namespace internal {
137// A helper to deduce the type of map to use depending on whether CompareOrHashT
138// is a comparator or a hasher (prefer the latter).
139template <typename T, typename CompareOrHashT, typename Eq>
141 // SFINAE trait to detect hash functors and select unordered containers if so,
142 // and ordered containers otherwise (= by default).
143 template <typename U, typename V, typename E = void>
145 using Set = std::set<T, CompareOrHashT>;
146 using Map = std::map<T, int, CompareOrHashT>;
147 };
148
149 // Specialization for when U is a hash functor and Eq is void (no custom
150 // equality).
151 // The expression inside decltype is basically saying that "H(x)" is
152 // well-formed, where H is an instance of U and x is an instance of T, and is
153 // a value of integral type. That is, we are "duck-typing" on whether U looks
154 // like a hash functor.
155 template <typename U, typename V>
157 U, V,
158 absl::enable_if_t<std::is_integral<decltype(std::declval<const U&>()(
159 std::declval<const T&>()))>::value &&
160 std::is_same_v<V, void>>> {
161 using Set = absl::flat_hash_set<T, CompareOrHashT>;
162 using Map = absl::flat_hash_map<T, int, CompareOrHashT>;
163 };
164
165 // Specialization for when U is a hash functor and Eq is provided (not void).
166 template <typename U, typename V>
168 U, V,
169 absl::enable_if_t<std::is_integral<decltype(std::declval<const U&>()(
170 std::declval<const T&>()))>::value &&
171 !std::is_same_v<V, void>>> {
172 using Set = absl::flat_hash_set<T, CompareOrHashT, Eq>;
173 using Map = absl::flat_hash_map<T, int, CompareOrHashT, Eq>;
174 };
175
178};
179
180} // namespace internal
181
182// Usage:
183// ConnectedComponentsFinder<MyNodeType> cc;
184// cc.AddNode(node1);
185// cc.AddNode(node2);
186// cc.AddEdge(node1, node2);
187// ... repeating, adding nodes and edges as needed. Adding an edge
188// will automatically also add the two nodes at its ends, if they
189// haven't already been added.
190// vector<set<MyNodeType> > components;
191// cc.FindConnectedComponents(&components);
192// Each entry in components now contains all the nodes in a single
193// connected component.
194//
195// Protocol buffers can be used as the node type. Equality and hash functions
196// for protocol buffers can be found in ortools/base/message_hasher.h.
197//
198// Usage with flat_hash_set:
199// using ConnectedComponentType = flat_hash_set<MyNodeType>;
200// ConnectedComponentsFinder<ConnectedComponentType::key_type,
201// ConnectedComponentType::hasher,
202// ConnectedComponentType::key_equal>
203// cc;
204// ...
205// vector<ConnectedComponentType> components;
206// cc.FindConnectedComponents(&components);
207//
208// If you want to, you can continue adding nodes and edges after calling
209// FindConnectedComponents, then call it again later.
210//
211// If your node type isn't STL-friendly, then you can use pointers to
212// it instead:
213// ConnectedComponentsFinder<MySTLUnfriendlyNodeType*> cc;
214// cc.AddNode(&node1);
215// ... and so on...
216// Of course, in this usage, the connected components finder retains
217// these pointers through its lifetime (though it doesn't dereference them).
218template <typename T, typename CompareOrHashT = std::less<T>,
219 typename Eq = void>
221 public:
222 using Set =
223 typename internal::ConnectedComponentsTypeHelper<T, CompareOrHashT,
224 Eq>::Set;
225
226 // Constructs a connected components finder.
228
231 delete;
232
233 // Adds a node in the graph. It is OK to add the same node more than
234 // once; additions after the first have no effect.
235 void AddNode(T node) { LookupOrInsertNode<true>(node); }
236
237 // Adds an edge in the graph. Also adds both endpoint nodes as necessary.
238 // It is not an error to add the same edge twice. Self-edges are OK too.
239 // Returns true if the two nodes are newly connected, and false if they were
240 // already connected.
241 bool AddEdge(T node1, T node2) {
242 return delegate_.AddEdge(LookupOrInsertNode<false>(node1),
243 LookupOrInsertNode<false>(node2));
244 }
245
246 // Returns true iff both nodes are in the same connected component.
247 // Returns false if either node has not been already added with AddNode.
248 bool Connected(T node1, T node2) {
249 return delegate_.Connected(gtl::FindWithDefault(index_, node1, -1),
250 gtl::FindWithDefault(index_, node2, -1));
251 }
252
253 // Finds the connected component containing a node, and returns the
254 // total number of nodes in that component. Returns zero iff the
255 // node has not been already added with AddNode.
256 int GetSize(T node) {
257 return delegate_.GetSize(gtl::FindWithDefault(index_, node, -1));
258 }
259
260 // Finds all the connected components and assigns them to components.
261 // Components are ordered in the same way nodes were added, i.e. if node 'b'
262 // was added before node 'c', then either:
263 // - 'c' belongs to the same component as a node 'a' added before 'b', or
264 // - the component for 'c' comes after the one for 'b'.
265 // There are two versions:
266 // - The first one returns the result, and stores each component in a vector.
267 // This is the preferred version.
268 // - The second one populates the result, and stores each component in a set.
269 std::vector<std::vector<T>> FindConnectedComponents() {
270 const auto component_ids = delegate_.GetComponentIds();
271 std::vector<std::vector<T>> components(delegate_.GetNumberOfComponents());
272 for (const auto& elem_id : index_) {
273 components[component_ids[elem_id.second]].push_back(elem_id.first);
274 }
275 return components;
276 }
277 void FindConnectedComponents(std::vector<Set>* components) {
278 const auto component_ids = delegate_.GetComponentIds();
279 components->clear();
280 components->resize(delegate_.GetNumberOfComponents());
281 for (const auto& elem_id : index_) {
282 components->at(component_ids[elem_id.second]).insert(elem_id.first);
283 }
284 }
285
286 // Returns the current number of connected components.
287 // This number can change as the new nodes or edges are added.
289 return delegate_.GetNumberOfComponents();
290 }
291
292 // Returns the current number of added distinct nodes.
293 // This includes nodes added explicitly via the calls to AddNode() method
294 // and implicitly via the calls to AddEdge() method.
295 // Nodes that were added several times only count once.
296 int GetNumberOfNodes() const { return delegate_.GetNumberOfNodes(); }
297
298 private:
299 // Returns the index for the given node. If the node does not exist and
300 // update_delegate is true, explicitly add the node to the delegate.
301 template <bool update_delegate>
302 int LookupOrInsertNode(T node) {
303 const auto result = index_.emplace(node, index_.size());
304 const int node_id = result.first->second;
305 if (update_delegate && result.second) {
306 // A new index was created.
307 delegate_.SetNumberOfNodes(node_id + 1);
308 }
309 return node_id;
310 }
311
312 DenseConnectedComponentsFinder delegate_;
314 index_;
315};
316
317// =============================================================================
318// Implementations of the method templates
319// =============================================================================
320namespace util {
321template <class UndirectedGraph, typename NodeType>
322std::vector<NodeType> GetConnectedComponentsTpl(NodeType num_nodes,
323 const UndirectedGraph& graph) {
324 // We use 'num_nodes' as special component id meaning 'unknown', because
325 // it's of the right type, and -1 is tricky to use with unsigned ints.
326 std::vector<NodeType> component_of_node(num_nodes, num_nodes);
327 std::vector<NodeType> bfs_queue;
328 NodeType num_components = 0;
329 for (NodeType src = 0; src < num_nodes; ++src) {
330 if (component_of_node[src] != num_nodes) continue;
331 bfs_queue.push_back(src);
332 component_of_node[src] = num_components;
333 for (size_t num_visited = 0; num_visited < bfs_queue.size();
334 ++num_visited) {
335 const NodeType node = bfs_queue[num_visited];
336 for (const NodeType neighbor : graph[node]) {
337 if (component_of_node[neighbor] != num_nodes) continue;
338 component_of_node[neighbor] = num_components;
339 bfs_queue.push_back(neighbor);
340 }
341 }
342 ++num_components;
343 bfs_queue.clear();
344 }
345 return component_of_node;
346}
347
348} // namespace util
349
350#endif // UTIL_GRAPH_CONNECTED_COMPONENTS_H_
ConnectedComponentsFinder()=default
ConnectedComponentsFinder(const ConnectedComponentsFinder &)=delete
typename internal::ConnectedComponentsTypeHelper< T, CompareOrHashT, Eq >::Set Set
bool Connected(T node1, T node2)
void FindConnectedComponents(std::vector< Set > *components)
std::vector< std::vector< T > > FindConnectedComponents()
bool AddEdge(T node1, T node2)
ConnectedComponentsFinder & operator=(const ConnectedComponentsFinder &)=delete
bool Connected(int node1, int node2)
bool AddEdge(int node1, int node2)
DenseConnectedComponentsFinder(const DenseConnectedComponentsFinder &)=default
DenseConnectedComponentsFinder & operator=(DenseConnectedComponentsFinder &&)=default
DenseConnectedComponentsFinder & operator=(const DenseConnectedComponentsFinder &)=default
const std::vector< int > & GetComponentRoots()
DenseConnectedComponentsFinder(DenseConnectedComponentsFinder &&)=default
const MapUtilMappedT< Collection > & FindWithDefault(const Collection &collection, const KeyType &key, const MapUtilMappedT< Collection > &value)
Definition map_util.h:36
std::vector< int > GetConnectedComponents(int num_nodes, const UndirectedGraph &graph)
std::vector< NodeType > GetConnectedComponentsTpl(NodeType num_nodes, const UndirectedGraph &graph)
typename SelectContainer< CompareOrHashT, Eq >::Set Set
typename SelectContainer< CompareOrHashT, Eq >::Map Map