Google OR-Tools v9.11
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
inclusion.h
Go to the documentation of this file.
1// Copyright 2010-2024 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14#ifndef OR_TOOLS_SAT_INCLUSION_H_
15#define OR_TOOLS_SAT_INCLUSION_H_
16
17#include <stddef.h>
18#include <stdint.h>
19
20#include <algorithm>
21#include <cstdint>
22#include <functional>
23#include <limits>
24#include <tuple>
25#include <utility>
26#include <vector>
27
28#include "absl/log/check.h"
30
31namespace operations_research {
32namespace sat {
33
34// An helper class to process many sets of integer in [0, n] and detects all the
35// set included in each others. This is a common operations in presolve, and
36// while it can be slow the algorithm used here is pretty efficient in practice.
37//
38// The algorithm is based on the SAT preprocessing algorithm to detect clauses
39// that subsumes others. It uses a one-watcher scheme where each subset
40// candidate has only one element watched. To identify all potential subset of a
41// superset, one need to inspect the watch list for all element of the superset
42// candidate.
43//
44// The number n will be detected automatically but we allocate various vector
45// of size n, so avoid having large integer values in your sets.
46//
47// All set contents will be accessed via storage_[index].
48// This can be used with a vector<vector<>> or our CompactVectorVector that we
49// use in a few place. But it can also be anything that support:
50// - storage_.size()
51// - range iteration over storage_[index]
52// - storage_[index].size()
53template <class Storage>
55 public:
56 explicit InclusionDetector(const Storage& storage) : storage_(storage) {}
57
58 // Resets the class to an empty state.
59 void Reset() {
60 num_potential_subsets_ = 0;
61 num_potential_supersets_ = 0;
62 candidates_.clear();
63 }
64
65 // Adds a candidate set to consider for the next DetectInclusions() call.
66 // The argument is an index that will only be used via storage_[index] to get
67 // the content of the candidate set.
68 //
69 // Note that set with no element are just ignored and will never be returned
70 // as part of an inclusion.
71 void AddPotentialSubset(int index);
73 void AddPotentialSet(int index);
74
75 // By default we will detect all inclusions. It is possible to make sure we
76 // don't do more than O(work_limit) operations and eventually abort early by
77 // setting this. Note that we don't reset it on Reset().
78 //
79 // This is needed, because for m candidates of size n, we can have O(m ^ 2)
80 // inclusions, each requiring O(n) work to check.
81 void SetWorkLimit(uint64_t work_limit) { work_limit_ = work_limit; }
82
83 // Finds all subset included in a superset and call "process" on each of the
84 // detected inclusion. The std::function argument corresponds to indices
85 // passed to the Add*() calls.
86 //
87 // The order of detection will be by increasing superset size. For superset
88 // with the same size, the order will be deterministic but not specified. And
89 // similarly, for a given superset, the order of the included subsets is
90 // deterministic but not specified.
91 //
92 // Note that only the candidate marked as such can be a subset/superset.
93 // For the candidate than can be both and are duplicates (i.e. same set), only
94 // one pair will be returned. We will also never return identity inclusion and
95 // we always have subset != superset.
97 const std::function<void(int subset, int superset)>& process);
98
99 // Function that should only be used from within "process()".
100 // Returns the bitset corresponsing to the elements of the current superset
101 // passed to the process() function.
102 std::vector<bool> IsInSuperset() const { return is_in_superset_; }
103
104 // Function that should only be used from within "process()".
105 // Stop will abort the current search. The other two will cause the
106 // corresponding candidate set to never appear in any future inclusion.
107 void StopProcessingCurrentSubset() { stop_with_current_subset_ = true; }
108 void StopProcessingCurrentSuperset() { stop_with_current_superset_ = true; }
109 void Stop() {
110 stop_ = true;
111 signatures_.clear();
112 one_watcher_.clear();
113 is_in_superset_.clear();
114 }
115
116 // The algorithm here can detect many small set included in a big set while
117 // only scanning the superset once. So if we do scan the superset in the
118 // process function, we can do a lot more work. This is here to reuse the
119 // deterministic limit mechanism.
120 void IncreaseWorkDone(uint64_t increase) { work_done_ += increase; }
121
122 // Stats.
123 int num_potential_subsets() const { return num_potential_subsets_; }
124 int num_potential_supersets() const { return num_potential_supersets_; }
125 uint64_t work_done() const { return work_done_; }
126
127 private:
128 // Allows to access the elements of each candidates via storage_[index];
129 const Storage& storage_;
130
131 // List of candidates, this will be sorted.
132 struct Candidate {
133 int index; // Storage index.
134 int size;
135
136 // For identical sizes, we need this order for correctness
137 // 0: subset only, 1: both, 2: superset only.
138 int order = 1;
139
140 bool CanBeSubset() const { return order <= 1; }
141 bool CanBeSuperset() const { return order >= 1; }
142
143 // We use this with stable_sort, so no need to add the index.
144 bool operator<(const Candidate& other) const {
145 return std::tie(size, order) < std::tie(other.size, other.order);
146 }
147 };
148 std::vector<Candidate> candidates_;
149
150 int num_potential_subsets_ = 0;
151 int num_potential_supersets_ = 0;
152 uint64_t work_done_ = 0;
153 uint64_t work_limit_ = std::numeric_limits<uint64_t>::max();
154
155 // Temporary data only used by DetectInclusions().
156 bool stop_;
157 bool stop_with_current_subset_;
158 bool stop_with_current_superset_;
159 std::vector<uint64_t> signatures_;
160 std::vector<std::vector<int>> one_watcher_; // Index in candidates_.
161 std::vector<int> superset_elements_;
162 std::vector<bool> is_in_superset_;
163};
164
165// Deduction guide.
166template <typename Storage>
168
169template <typename Storage>
171 DCHECK_GE(index, 0);
172 DCHECK_LT(index, storage_.size());
173 const int num_elements = storage_[index].size();
174 if (num_elements == 0) return;
175
176 ++num_potential_subsets_;
177 ++num_potential_supersets_;
178 candidates_.push_back({index, num_elements, /*order=*/1});
179}
180
181template <typename Storage>
183 DCHECK_GE(index, 0);
184 DCHECK_LT(index, storage_.size());
185 const int num_elements = storage_[index].size();
186 if (num_elements == 0) return;
187
188 ++num_potential_subsets_;
189 candidates_.push_back({index, num_elements, /*order=*/0});
190}
191
192template <typename Storage>
194 DCHECK_GE(index, 0);
195 DCHECK_LT(index, storage_.size());
196 const int num_elements = storage_[index].size();
197 if (num_elements == 0) return;
198
199 DCHECK_GE(index, 0);
200 DCHECK_LT(index, storage_.size());
201 ++num_potential_supersets_;
202 candidates_.push_back({index, num_elements, /*order=*/2});
203}
204
205template <typename Storage>
207 const std::function<void(int subset, int superset)>& process) {
208 // No need to do any work in these cases.
209 if (candidates_.size() <= 1) return;
210 if (num_potential_subsets_ == 0) return;
211 if (num_potential_supersets_ == 0) return;
212
213 // Temp data must be ready to use.
214 stop_ = false;
215 DCHECK(is_in_superset_.empty());
216 DCHECK(signatures_.empty());
217 DCHECK(one_watcher_.empty());
218
219 // Main algo.
220 work_done_ = 0;
221 std::stable_sort(candidates_.begin(), candidates_.end());
222 for (const Candidate& candidate : candidates_) {
223 const auto& candidate_elements = storage_[candidate.index];
224 const int candidate_index = signatures_.size();
225
226 // Compute the signature and also resize vector if needed. We want a
227 // signature that is order invariant and is compatible with inclusion.
228 uint64_t signature = 0;
229 int max_element = 0;
230 for (const int e : candidate_elements) {
231 DCHECK_GE(e, 0);
232 max_element = std::max(max_element, e);
233 signature |= (int64_t{1} << (e & 63));
234 }
235 DCHECK_EQ(is_in_superset_.size(), one_watcher_.size());
236 if (max_element >= is_in_superset_.size()) {
237 is_in_superset_.resize(max_element + 1, false);
238 one_watcher_.resize(max_element + 1);
239 }
240 signatures_.push_back(signature);
241
242 stop_with_current_superset_ = false;
243 if (candidate.CanBeSuperset()) {
244 const Candidate& superset = candidate;
245
246 // Bitset should be cleared.
247 DCHECK(std::all_of(is_in_superset_.begin(), is_in_superset_.end(),
248 [](bool b) { return !b; }));
249
250 // Find any subset included in current superset.
251 work_done_ += 2 * superset.size;
252 if (work_done_ > work_limit_) return Stop();
253
254 // We make a copy because process() might alter the content of the
255 // storage when it returns "stop_with_current_superset_" and we need
256 // to clean is_in_superset_ properly.
257 //
258 // TODO(user): Alternatively, we could clean is_in_superset_ in the
259 // call to StopProcessingCurrentSuperset() and force client to call it
260 // before altering the superset content.
261 superset_elements_.assign(candidate_elements.begin(),
262 candidate_elements.end());
263 for (const int e : superset_elements_) {
264 is_in_superset_[e] = true;
265 }
266
267 const uint64_t superset_signature = signatures_.back();
268 for (const int superset_e : superset_elements_) {
269 for (int i = 0; i < one_watcher_[superset_e].size(); ++i) {
270 const int c_index = one_watcher_[superset_e][i];
271 const Candidate& subset = candidates_[c_index];
272 DCHECK_LE(subset.size, superset.size);
273
274 // Quick check with signature.
275 if ((signatures_[c_index] & ~superset_signature) != 0) continue;
276
277 // Long check with bitset.
278 bool is_included = true;
279 work_done_ += subset.size;
280 if (work_done_ > work_limit_) return Stop();
281 for (const int subset_e : storage_[subset.index]) {
282 if (!is_in_superset_[subset_e]) {
283 is_included = false;
284 break;
285 }
286 }
287 if (!is_included) continue;
288
289 stop_with_current_subset_ = false;
290 process(subset.index, superset.index);
291
292 if (stop_) return;
293 if (work_done_ > work_limit_) return Stop();
294
295 if (stop_with_current_subset_) {
296 // Remove from the watcher list.
297 std::swap(one_watcher_[superset_e][i],
298 one_watcher_[superset_e].back());
299 one_watcher_[superset_e].pop_back();
300 --i;
301 }
302 if (stop_with_current_superset_) break;
303 }
304 if (stop_with_current_superset_) break;
305 }
306
307 // Cleanup.
308 for (const int e : superset_elements_) {
309 is_in_superset_[e] = false;
310 }
311 }
312
313 // Add new subset candidate to the watchers.
314 //
315 // Tricky: If this was also a superset and has been removed, we don't want
316 // to watch it!
317 if (candidate.CanBeSubset() && !stop_with_current_superset_) {
318 // Choose to watch the one with smallest list.
319 int best_choice = -1;
320 work_done_ += candidate.size;
321 if (work_done_ > work_limit_) return Stop();
322 for (const int e : candidate_elements) {
323 DCHECK_GE(e, 0);
324 DCHECK_LT(e, one_watcher_.size());
325 if (best_choice == -1 ||
326 one_watcher_[e].size() < one_watcher_[best_choice].size()) {
327 best_choice = e;
328 }
329 }
330 DCHECK_NE(best_choice, -1);
331 one_watcher_[best_choice].push_back(candidate_index);
332 }
333 }
334
335 // Stop also performs some cleanup.
336 Stop();
337}
338
339} // namespace sat
340} // namespace operations_research
341
342#endif // OR_TOOLS_SAT_INCLUSION_H_
IntegerValue size
void IncreaseWorkDone(uint64_t increase)
Definition inclusion.h:120
void Reset()
Resets the class to an empty state.
Definition inclusion.h:59
std::vector< bool > IsInSuperset() const
Definition inclusion.h:102
void DetectInclusions(const std::function< void(int subset, int superset)> &process)
Definition inclusion.h:206
void SetWorkLimit(uint64_t work_limit)
Definition inclusion.h:81
InclusionDetector(const Storage &storage)
Definition inclusion.h:56
int index
InclusionDetector(const Storage &storage) -> InclusionDetector< Storage >
Deduction guide.
In SWIG mode, we don't want anything besides these top-level includes.