Google OR-Tools v9.11
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
connected_components.h
Go to the documentation of this file.
1// Copyright 2010-2024 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14//
15// Licensed under the Apache License, Version 2.0 (the "License");
16// you may not use this file except in compliance with the License.
17// You may obtain a copy of the License at
18//
19// http://www.apache.org/licenses/LICENSE-2.0
20//
21// Unless required by applicable law or agreed to in writing, software
22// distributed under the License is distributed on an "AS IS" BASIS,
23// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24// See the License for the specific language governing permissions and
25// limitations under the License.
26
27// Finds the connected components in an undirected graph:
28// https://en.wikipedia.org/wiki/Connected_component_(graph_theory)
29//
30// If you have a fixed graph where the node are dense integers, use
31// GetConnectedComponents(): it's very fast and uses little memory.
32//
33// If you have a more dynamic scenario where you want to incrementally
34// add nodes or edges and query the connectivity between them, use the
35// [Dense]ConnectedComponentsFinder class, which uses the union-find algorithm
36// aka disjoint sets: https://en.wikipedia.org/wiki/Disjoint-set_data_structure.
37
38#ifndef UTIL_GRAPH_CONNECTED_COMPONENTS_H_
39#define UTIL_GRAPH_CONNECTED_COMPONENTS_H_
40
41#include <functional>
42#include <map>
43#include <memory>
44#include <set>
45#include <type_traits>
46#include <vector>
47
48#include "absl/container/flat_hash_map.h"
49#include "absl/container/flat_hash_set.h"
50#include "absl/hash/hash.h"
51#include "absl/meta/type_traits.h"
54
55namespace util {
56// Generic version of GetConnectedComponents() (see below) that supports other
57// integer types, e.g. int64_t for huge graphs with more than 2^31 nodes.
58template <class UndirectedGraph, class NodeType>
59std::vector<NodeType> GetConnectedComponentsTpl(NodeType num_nodes,
60 const UndirectedGraph& graph);
61
62// Finds the connected components of the graph, using BFS internally.
63// Works on any *undirected* graph class whose nodes are dense integers and that
64// supports the [] operator for adjacency lists: graph[x] must be an integer
65// container listing the nodes that are adjacent to node #x.
66// Example: std::vector<std::vector<int>>.
67//
68// "Undirected" means that for all y in graph[x], x is in graph[y].
69//
70// Returns the mapping from node to component index. The component indices are
71// deterministic: Component #0 will be the one that has node #0, component #1
72// the one that has the lowest-index node that isn't in component #0, and so on.
73//
74// Example on the following 6-node graph: 5--3--0--1 2--4
75// vector<vector<int>> graph = {{1, 3}, {0}, {4}, {0, 5}, {2}, {3}};
76// GetConnectedComponents(graph); // returns [0, 0, 1, 0, 1, 0].
77template <class UndirectedGraph>
78std::vector<int> GetConnectedComponents(int num_nodes,
79 const UndirectedGraph& graph) {
80 return GetConnectedComponentsTpl(num_nodes, graph);
81}
82} // namespace util
83
84// NOTE(user): The rest of the functions below should also be in namespace
85// util, but for historical reasons it hasn't been done yet.
86
87// A connected components finder that only works on dense ints.
89 public:
91
92 // We support copy and move construction.
94 default;
96 const DenseConnectedComponentsFinder&) = default;
99 default;
100
101 // The main API is the same as ConnectedComponentsFinder (below): see the
102 // homonymous functions there.
103 bool AddEdge(int node1, int node2);
104 bool Connected(int node1, int node2);
105 int GetSize(int node);
106 int GetNumberOfComponents() const { return num_components_; }
107 int GetNumberOfNodes() const { return parent_.size(); }
108
109 // Gets the current set of root nodes in sorted order. Runs in amortized
110 // O(#components) time.
111 const std::vector<int>& GetComponentRoots();
112
113 // Sets the number of nodes in the graph. The graph can only grow: this
114 // dies if "num_nodes" is lower or equal to any of the values ever given
115 // to AddEdge(), or lower than a previous value given to SetNumberOfNodes().
116 // You need this if there are nodes that don't have any edges.
117 void SetNumberOfNodes(int num_nodes);
118
119 // Returns the root of the set for the given node. node must be in
120 // [0;GetNumberOfNodes()-1].
121 // Non-const because it does path compression internally.
122 int FindRoot(int node);
123
124 // Returns the same as GetConnectedComponents().
125 std::vector<int> GetComponentIds();
126
127 private:
128 // parent[i] is the id of an ancestor for node i. A node is a root iff
129 // parent[i] == i.
130 std::vector<int> parent_;
131 // If i is a root, component_size_[i] is the number of elements in the
132 // component. If i is not a root, component_size_[i] is meaningless.
133 std::vector<int> component_size_;
134 // rank[i] is the depth of the tree.
135 std::vector<int> rank_;
136 // Number of connected components.
137 int num_components_ = 0;
138 // The current roots. This is maintained lazily by GetComponentRoots().
139 std::vector<int> roots_;
140 // The number of nodes that existed the last time GetComponentRoots() was
141 // called.
142 int num_nodes_at_last_get_roots_call_ = 0;
143};
144
145namespace internal {
146// A helper to deduce the type of map to use depending on whether CompareOrHashT
147// is a comparator or a hasher (prefer the latter).
148template <typename T, typename CompareOrHashT, typename Eq>
150 // SFINAE trait to detect hash functors and select unordered containers if so,
151 // and ordered containers otherwise (= by default).
152 template <typename U, typename V, typename E = void>
154 using Set = std::set<T, CompareOrHashT>;
155 using Map = std::map<T, int, CompareOrHashT>;
156 };
157
158 // Specialization for when U is a hash functor and Eq is void (no custom
159 // equality).
160 // The expression inside decltype is basically saying that "H(x)" is
161 // well-formed, where H is an instance of U and x is an instance of T, and is
162 // a value of integral type. That is, we are "duck-typing" on whether U looks
163 // like a hash functor.
164 template <typename U, typename V>
166 U, V,
167 absl::enable_if_t<std::is_integral<decltype(std::declval<const U&>()(
168 std::declval<const T&>()))>::value &&
169 std::is_same_v<V, void>>> {
170 using Set = absl::flat_hash_set<T, CompareOrHashT>;
171 using Map = absl::flat_hash_map<T, int, CompareOrHashT>;
172 };
173
174 // Specialization for when U is a hash functor and Eq is provided (not void).
175 template <typename U, typename V>
177 U, V,
178 absl::enable_if_t<std::is_integral<decltype(std::declval<const U&>()(
179 std::declval<const T&>()))>::value &&
180 !std::is_same_v<V, void>>> {
181 using Set = absl::flat_hash_set<T, CompareOrHashT, Eq>;
182 using Map = absl::flat_hash_map<T, int, CompareOrHashT, Eq>;
183 };
184
187};
188
189} // namespace internal
190
191// Usage:
192// ConnectedComponentsFinder<MyNodeType> cc;
193// cc.AddNode(node1);
194// cc.AddNode(node2);
195// cc.AddEdge(node1, node2);
196// ... repeating, adding nodes and edges as needed. Adding an edge
197// will automatically also add the two nodes at its ends, if they
198// haven't already been added.
199// vector<set<MyNodeType> > components;
200// cc.FindConnectedComponents(&components);
201// Each entry in components now contains all the nodes in a single
202// connected component.
203//
204// Protocol buffers can be used as the node type. Equality and hash functions
205// for protocol buffers can be found in ortools/base/message_hasher.h.
206//
207// Usage with flat_hash_set:
208// using ConnectedComponentType = flat_hash_set<MyNodeType>;
209// ConnectedComponentsFinder<ConnectedComponentType::key_type,
210// ConnectedComponentType::hasher,
211// ConnectedComponentType::key_equal>
212// cc;
213// ...
214// vector<ConnectedComponentType> components;
215// cc.FindConnectedComponents(&components);
216//
217// If you want to, you can continue adding nodes and edges after calling
218// FindConnectedComponents, then call it again later.
219//
220// If your node type isn't STL-friendly, then you can use pointers to
221// it instead:
222// ConnectedComponentsFinder<MySTLUnfriendlyNodeType*> cc;
223// cc.AddNode(&node1);
224// ... and so on...
225// Of course, in this usage, the connected components finder retains
226// these pointers through its lifetime (though it doesn't dereference them).
227template <typename T, typename CompareOrHashT = std::less<T>,
228 typename Eq = void>
230 public:
231 using Set =
232 typename internal::ConnectedComponentsTypeHelper<T, CompareOrHashT,
233 Eq>::Set;
234
235 // Constructs a connected components finder.
237
240 delete;
241
242 // Adds a node in the graph. It is OK to add the same node more than
243 // once; additions after the first have no effect.
244 void AddNode(T node) { LookupOrInsertNode<true>(node); }
245
246 // Adds an edge in the graph. Also adds both endpoint nodes as necessary.
247 // It is not an error to add the same edge twice. Self-edges are OK too.
248 // Returns true if the two nodes are newly connected, and false if they were
249 // already connected.
250 bool AddEdge(T node1, T node2) {
251 return delegate_.AddEdge(LookupOrInsertNode<false>(node1),
252 LookupOrInsertNode<false>(node2));
253 }
254
255 // Returns true iff both nodes are in the same connected component.
256 // Returns false if either node has not been already added with AddNode.
257 bool Connected(T node1, T node2) {
258 return delegate_.Connected(gtl::FindWithDefault(index_, node1, -1),
259 gtl::FindWithDefault(index_, node2, -1));
260 }
261
262 // Finds the connected component containing a node, and returns the
263 // total number of nodes in that component. Returns zero iff the
264 // node has not been already added with AddNode.
265 int GetSize(T node) {
266 return delegate_.GetSize(gtl::FindWithDefault(index_, node, -1));
267 }
268
269 // Finds all the connected components and assigns them to components.
270 // Components are ordered in the same way nodes were added, i.e. if node 'b'
271 // was added before node 'c', then either:
272 // - 'c' belongs to the same component as a node 'a' added before 'b', or
273 // - the component for 'c' comes after the one for 'b'.
274 // There are two versions:
275 // - The first one returns the result, and stores each component in a vector.
276 // This is the preferred version.
277 // - The second one populates the result, and stores each component in a set.
278 std::vector<std::vector<T>> FindConnectedComponents() {
279 const auto component_ids = delegate_.GetComponentIds();
280 std::vector<std::vector<T>> components(delegate_.GetNumberOfComponents());
281 for (const auto& elem_id : index_) {
282 components[component_ids[elem_id.second]].push_back(elem_id.first);
283 }
284 return components;
285 }
286 void FindConnectedComponents(std::vector<Set>* components) {
287 const auto component_ids = delegate_.GetComponentIds();
288 components->clear();
289 components->resize(delegate_.GetNumberOfComponents());
290 for (const auto& elem_id : index_) {
291 components->at(component_ids[elem_id.second]).insert(elem_id.first);
292 }
293 }
294
295 // Returns the current number of connected components.
296 // This number can change as the new nodes or edges are added.
298 return delegate_.GetNumberOfComponents();
299 }
300
301 // Returns the current number of added distinct nodes.
302 // This includes nodes added explicitly via the calls to AddNode() method
303 // and implicitly via the calls to AddEdge() method.
304 // Nodes that were added several times only count once.
305 int GetNumberOfNodes() const { return delegate_.GetNumberOfNodes(); }
306
307 private:
308 // Returns the index for the given node. If the node does not exist and
309 // update_delegate is true, explicitly add the node to the delegate.
310 template <bool update_delegate>
311 int LookupOrInsertNode(T node) {
312 const auto result = index_.emplace(node, index_.size());
313 const int node_id = result.first->second;
314 if (update_delegate && result.second) {
315 // A new index was created.
316 delegate_.SetNumberOfNodes(node_id + 1);
317 }
318 return node_id;
319 }
320
323 index_;
324};
325
326// =============================================================================
327// Implementations of the method templates
328// =============================================================================
329namespace util {
330template <class UndirectedGraph, typename NodeType>
331std::vector<NodeType> GetConnectedComponentsTpl(NodeType num_nodes,
332 const UndirectedGraph& graph) {
333 // We use 'num_nodes' as special component id meaning 'unknown', because
334 // it's of the right type, and -1 is tricky to use with unsigned ints.
335 std::vector<NodeType> component_of_node(num_nodes, num_nodes);
336 std::vector<NodeType> bfs_queue;
337 NodeType num_components = 0;
338 for (NodeType src = 0; src < num_nodes; ++src) {
339 if (component_of_node[src] != num_nodes) continue;
340 bfs_queue.push_back(src);
341 component_of_node[src] = num_components;
342 for (size_t num_visited = 0; num_visited < bfs_queue.size();
343 ++num_visited) {
344 const NodeType node = bfs_queue[num_visited];
345 for (const NodeType neighbor : graph[node]) {
346 if (component_of_node[neighbor] != num_nodes) continue;
347 component_of_node[neighbor] = num_components;
348 bfs_queue.push_back(neighbor);
349 }
350 }
351 ++num_components;
352 bfs_queue.clear();
353 }
354 return component_of_node;
355}
356
357} // namespace util
358
359#endif // UTIL_GRAPH_CONNECTED_COMPONENTS_H_
ConnectedComponentsFinder(const ConnectedComponentsFinder &)=delete
typename internal::ConnectedComponentsTypeHelper< T, CompareOrHashT, Eq >::Set Set
bool Connected(T node1, T node2)
void FindConnectedComponents(std::vector< Set > *components)
std::vector< std::vector< T > > FindConnectedComponents()
ConnectedComponentsFinder()
Constructs a connected components finder.
bool AddEdge(T node1, T node2)
ConnectedComponentsFinder & operator=(const ConnectedComponentsFinder &)=delete
A connected components finder that only works on dense ints.
bool Connected(int node1, int node2)
bool AddEdge(int node1, int node2)
DenseConnectedComponentsFinder(const DenseConnectedComponentsFinder &)=default
We support copy and move construction.
std::vector< int > GetComponentIds()
Returns the same as GetConnectedComponents().
DenseConnectedComponentsFinder & operator=(DenseConnectedComponentsFinder &&)=default
DenseConnectedComponentsFinder & operator=(const DenseConnectedComponentsFinder &)=default
const std::vector< int > & GetComponentRoots()
DenseConnectedComponentsFinder(DenseConnectedComponentsFinder &&)=default
GraphType graph
const MapUtilMappedT< Collection > & FindWithDefault(const Collection &collection, const KeyType &key, const MapUtilMappedT< Collection > &value)
Definition map_util.h:36
A collections of i/o utilities for the Graph classes in ./graph.h.
std::vector< int > GetConnectedComponents(int num_nodes, const UndirectedGraph &graph)
std::vector< NodeType > GetConnectedComponentsTpl(NodeType num_nodes, const UndirectedGraph &graph)
typename SelectContainer< CompareOrHashT, Eq >::Set Set
typename SelectContainer< CompareOrHashT, Eq >::Map Map