Google OR-Tools v9.12
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
inclusion.h
Go to the documentation of this file.
1// Copyright 2010-2025 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14#ifndef OR_TOOLS_SAT_INCLUSION_H_
15#define OR_TOOLS_SAT_INCLUSION_H_
16
17#include <stddef.h>
18#include <stdint.h>
19
20#include <algorithm>
21#include <cstdint>
22#include <functional>
23#include <limits>
24#include <tuple>
25#include <utility>
26#include <vector>
27
28#include "absl/log/check.h"
29#include "absl/types/span.h"
31#include "ortools/sat/util.h"
32#include "ortools/util/bitset.h"
34
35namespace operations_research {
36namespace sat {
37
38// An helper class to process many sets of integer in [0, n] and detects all the
39// set included in each others. This is a common operations in presolve, and
40// while it can be slow the algorithm used here is pretty efficient in practice.
41//
42// The algorithm is based on the SAT preprocessing algorithm to detect clauses
43// that subsumes others. It uses a one-watcher scheme where each subset
44// candidate has only one element watched. To identify all potential subset of a
45// superset, one need to inspect the watch list for all element of the superset
46// candidate.
47//
48// The number n will be detected automatically but we allocate various vector
49// of size n, so avoid having large integer values in your sets.
50//
51// All set contents will be accessed via storage_[index].
52// This can be used with a vector<vector<>> or our CompactVectorVector that we
53// use in a few place. But it can also be anything that support:
54// - storage_.size()
55// - range iteration over storage_[index]
56// - storage_[index].size()
57template <class Storage>
59 public:
60 InclusionDetector(const Storage& storage, TimeLimit* time_limit)
61 : storage_(storage), time_limit_(time_limit) {}
62
63 // Resets the class to an empty state.
64 void Reset() {
65 num_potential_subsets_ = 0;
66 num_potential_supersets_ = 0;
67 candidates_.clear();
68 }
69
70 // Adds a candidate set to consider for the next DetectInclusions() call.
71 // The argument is an index that will only be used via storage_[index] to get
72 // the content of the candidate set.
73 //
74 // Note that set with no element are just ignored and will never be returned
75 // as part of an inclusion.
76 void AddPotentialSubset(int index);
77 void AddPotentialSuperset(int index);
78 void AddPotentialSet(int index);
79
80 // By default we will detect all inclusions. It is possible to make sure we
81 // don't do more than O(work_limit) operations and eventually abort early by
82 // setting this. Note that we don't reset it on Reset().
83 //
84 // This is needed, because for m candidates of size n, we can have O(m ^ 2)
85 // inclusions, each requiring O(n) work to check.
86 void SetWorkLimit(uint64_t work_limit) { work_limit_ = work_limit; }
87
88 // Finds all subset included in a superset and call "process" on each of the
89 // detected inclusion. The std::function argument corresponds to indices
90 // passed to the Add*() calls.
91 //
92 // The order of detection will be by increasing superset size. For superset
93 // with the same size, the order will be deterministic but not specified. And
94 // similarly, for a given superset, the order of the included subsets is
95 // deterministic but not specified.
96 //
97 // Note that only the candidate marked as such can be a subset/superset.
98 // For the candidate than can be both and are duplicates (i.e. same set), only
99 // one pair will be returned. We will also never return identity inclusion and
100 // we always have subset != superset.
101 void DetectInclusions(
102 const std::function<void(int subset, int superset)>& process);
103
104 // Function that should only be used from within "process()".
105 // Stop will abort the current search. The other two will cause the
106 // corresponding candidate set to never appear in any future inclusion.
107 void StopProcessingCurrentSubset() { stop_with_current_subset_ = true; }
108 void StopProcessingCurrentSuperset() { stop_with_current_superset_ = true; }
109 void Stop() {
110 stop_ = true;
111 signatures_.clear();
112 one_watcher_.clear();
113 is_in_superset_.resize(0);
114 }
115
116 // The algorithm here can detect many small set included in a big set while
117 // only scanning the superset once. So if we do scan the superset in the
118 // process function, we can do a lot more work. This is here to reuse the
119 // deterministic limit mechanism.
120 void IncreaseWorkDone(uint64_t increase) { work_done_ += increase; }
121
122 // Stats.
123 int num_potential_subsets() const { return num_potential_subsets_; }
124 int num_potential_supersets() const { return num_potential_supersets_; }
125 uint64_t work_done() const { return work_done_; }
126 bool Stopped() const { return stop_; }
127
128 private:
129 // Allows to access the elements of each candidates via storage_[index];
130 const Storage& storage_;
131
132 TimeLimit* time_limit_;
133
134 // List of candidates, this will be sorted.
135 struct Candidate {
136 int index; // Storage index.
137 int size;
138
139 // For identical sizes, we need this order for correctness
140 // 0: subset only, 1: both, 2: superset only.
141 int order = 1;
142
143 bool CanBeSubset() const { return order <= 1; }
144 bool CanBeSuperset() const { return order >= 1; }
145
146 // We use this with stable_sort, so no need to add the index.
147 bool operator<(const Candidate& other) const {
148 return std::tie(size, order) < std::tie(other.size, other.order);
149 }
150 };
151 std::vector<Candidate> candidates_;
152
153 int num_potential_subsets_ = 0;
154 int num_potential_supersets_ = 0;
155 uint64_t work_done_ = 0;
156 uint64_t work_limit_ = std::numeric_limits<uint64_t>::max();
157
158 bool stop_ = false;
159 bool stop_with_current_subset_ = false;
160 bool stop_with_current_superset_ = false;
161 std::vector<uint64_t> signatures_;
162 std::vector<std::vector<int>> one_watcher_; // Index in candidates_.
163 std::vector<int> superset_elements_;
164 Bitset64<int> is_in_superset_;
165};
166
167// Similar API and purpose to InclusionDetector. But this one is a bit simpler
168// and faster if it fit your usage. This assume an initial given set of
169// potential subsets, that will be queried against supersets one by one.
170template <class Storage>
172 public:
173 SubsetsDetector(const Storage& storage, TimeLimit* time_limit)
174 : storage_(storage), time_limit_(time_limit) {}
175
176 void SetWorkLimit(uint64_t work_limit) { work_limit_ = work_limit; }
177 void StopProcessingCurrentSubset() { stop_with_current_subset_ = true; }
178 void StopProcessingCurrentSuperset() { stop_with_current_superset_ = true; }
179 void Stop() {
180 stop_ = true;
181 one_watcher_.clear();
182 is_in_superset_.resize(0);
183 }
184
185 uint64_t work_done() const { return work_done_; }
186 bool Stopped() const { return stop_; }
187
188 // Different API than InclusionDetector.
189 // 1/ Add all potential subset to the storage_.
190 // 2/ Call IndexAllStorageAsSubsets()
191 // 3/ Call one or more time FindSubsets().
192 // - process() can call StopProcessingCurrentSuperset() to abort early
193 // - process() can call StopProcessingCurrentSubset() to never consider
194 // that subset again.
195 // 4/ Call Stop() to reclaim some memory.
196 //
197 // Optimization: next_index_to_try is an index in superset that can be used
198 // to skip some position for which we already called FindSubsets().
199 void IndexAllStorageAsSubsets();
200 void FindSubsets(absl::Span<const int> superset, int* next_index_to_try,
201 const std::function<void(int subset)>& process);
202
203 private:
204 // Allows to access the elements of each subsets via storage_[index];
205 const Storage& storage_;
206
207 TimeLimit* time_limit_;
208 uint64_t work_done_ = 0;
209 uint64_t work_limit_ = std::numeric_limits<uint64_t>::max();
210
211 struct OneWatcherData {
212 int index;
213 int other_element;
214 uint64_t signature;
215 };
216
217 bool stop_ = false;
218 bool stop_with_current_subset_ = false;
219 bool stop_with_current_superset_ = false;
221 Bitset64<int> is_in_superset_;
222};
223
224// Deduction guide.
225template <typename Storage>
227
228template <typename Storage>
229SubsetsDetector(const Storage& storage) -> SubsetsDetector<Storage>;
230
231template <typename Storage>
233 DCHECK_GE(index, 0);
234 DCHECK_LT(index, storage_.size());
235 const int num_elements = storage_[index].size();
236 if (num_elements == 0) return;
237
238 ++num_potential_subsets_;
239 ++num_potential_supersets_;
240 candidates_.push_back({index, num_elements, /*order=*/1});
241}
242
243template <typename Storage>
245 DCHECK_GE(index, 0);
246 DCHECK_LT(index, storage_.size());
247 const int num_elements = storage_[index].size();
248 if (num_elements == 0) return;
249
250 ++num_potential_subsets_;
251 candidates_.push_back({index, num_elements, /*order=*/0});
252}
253
254template <typename Storage>
256 DCHECK_GE(index, 0);
257 DCHECK_LT(index, storage_.size());
258 const int num_elements = storage_[index].size();
259 if (num_elements == 0) return;
260
261 DCHECK_GE(index, 0);
262 DCHECK_LT(index, storage_.size());
263 ++num_potential_supersets_;
264 candidates_.push_back({index, num_elements, /*order=*/2});
265}
266
267// Compute the signature and the maximum element. We want a
268// signature that is order invariant and is compatible with inclusion.
269inline std::pair<uint64_t, int> ComputeSignatureAndMaxElement(
270 absl::Span<const int> elements) {
271 uint64_t signature = 0;
272 int max_element = 0;
273 for (const int e : elements) {
274 DCHECK_GE(e, 0);
275 max_element = std::max(max_element, e);
276 signature |= (int64_t{1} << (e & 63));
277 }
278 return {signature, max_element};
279}
280
281template <typename Storage>
283 const std::function<void(int subset, int superset)>& process) {
284 // No need to do any work in these cases.
285 if (candidates_.size() <= 1) return;
286 if (num_potential_subsets_ == 0) return;
287 if (num_potential_supersets_ == 0) return;
288
289 // Temp data must be ready to use.
290 stop_ = false;
291 DCHECK(signatures_.empty());
292 DCHECK(one_watcher_.empty());
293
294 // We check each time our work_done_ has increased by more than this.
295 constexpr int64_t kCheckTimeLimitInterval = 1000;
296 int64_t next_time_limit_check = kCheckTimeLimitInterval;
297
298 // Main algo.
299 work_done_ = 0;
300 std::stable_sort(candidates_.begin(), candidates_.end());
301 for (const Candidate& candidate : candidates_) {
302 const auto& candidate_elements = storage_[candidate.index];
303 const int candidate_index = signatures_.size();
304
305 const auto [signature, max_element] =
306 ComputeSignatureAndMaxElement(candidate_elements);
307 signatures_.push_back(signature);
308 DCHECK_EQ(is_in_superset_.size(), one_watcher_.size());
309 if (max_element >= is_in_superset_.size()) {
310 is_in_superset_.resize(max_element + 1);
311 one_watcher_.resize(max_element + 1);
312 }
313
314 stop_with_current_superset_ = false;
315 if (candidate.CanBeSuperset()) {
316 const Candidate& superset = candidate;
317
318 // Bitset should be cleared.
319 DCHECK(std::all_of(is_in_superset_.begin(), is_in_superset_.end(),
320 [](bool b) { return !b; }));
321
322 // Find any subset included in current superset.
323 work_done_ += 2 * superset.size;
324 if (work_done_ > work_limit_) return Stop();
325 if (work_done_ > next_time_limit_check) {
326 if (time_limit_->LimitReached()) return Stop();
327 next_time_limit_check = work_done_ + kCheckTimeLimitInterval;
328 }
329
330 // We make a copy because process() might alter the content of the
331 // storage when it returns "stop_with_current_superset_" and we need
332 // to clean is_in_superset_ properly.
333 //
334 // TODO(user): Alternatively, we could clean is_in_superset_ in the
335 // call to StopProcessingCurrentSuperset() and force client to call it
336 // before altering the superset content.
337 superset_elements_.assign(candidate_elements.begin(),
338 candidate_elements.end());
339 for (const int e : superset_elements_) {
340 is_in_superset_.Set(e);
341 }
342
343 const uint64_t superset_signature = signatures_.back();
344 const auto is_in_superset_view = is_in_superset_.const_view();
345 for (const int superset_e : superset_elements_) {
346 for (int i = 0; i < one_watcher_[superset_e].size(); ++i) {
347 const int c_index = one_watcher_[superset_e][i];
348 const Candidate& subset = candidates_[c_index];
349 DCHECK_LE(subset.size, superset.size);
350
351 // Quick check with signature.
352 if ((signatures_[c_index] & ~superset_signature) != 0) continue;
353
354 // Long check with bitset.
355 bool is_included = true;
356 work_done_ += subset.size;
357 if (work_done_ > work_limit_) return Stop();
358 if (work_done_ > next_time_limit_check) {
359 if (time_limit_->LimitReached()) return Stop();
360 next_time_limit_check = work_done_ + kCheckTimeLimitInterval;
361 }
362 for (const int subset_e : storage_[subset.index]) {
363 if (!is_in_superset_view[subset_e]) {
364 is_included = false;
365 break;
366 }
367 }
368 if (!is_included) continue;
369
370 stop_with_current_subset_ = false;
371 process(subset.index, superset.index);
372
373 if (stop_) return;
374 if (work_done_ > work_limit_) return Stop();
375 if (work_done_ > next_time_limit_check) {
376 if (time_limit_->LimitReached()) return Stop();
377 next_time_limit_check = work_done_ + kCheckTimeLimitInterval;
378 }
379
380 if (stop_with_current_subset_) {
381 // Remove from the watcher list.
382 std::swap(one_watcher_[superset_e][i],
383 one_watcher_[superset_e].back());
384 one_watcher_[superset_e].pop_back();
385 --i;
386 }
387 if (stop_with_current_superset_) break;
388 }
389 if (stop_with_current_superset_) break;
390 }
391
392 // Cleanup.
393 for (const int e : superset_elements_) {
394 is_in_superset_.ClearBucket(e);
395 }
396 }
397
398 // Add new subset candidate to the watchers.
399 //
400 // Tricky: If this was also a superset and has been removed, we don't want
401 // to watch it!
402 if (candidate.CanBeSubset() && !stop_with_current_superset_) {
403 // Choose to watch the one with smallest list.
404 int best_choice = -1;
405 work_done_ += candidate.size;
406 if (work_done_ > work_limit_) return Stop();
407 for (const int e : candidate_elements) {
408 DCHECK_GE(e, 0);
409 DCHECK_LT(e, one_watcher_.size());
410 if (best_choice == -1 ||
411 one_watcher_[e].size() < one_watcher_[best_choice].size()) {
412 best_choice = e;
413 }
414 }
415 DCHECK_NE(best_choice, -1);
416 one_watcher_[best_choice].push_back(candidate_index);
417 }
418 }
419
420 // Stop also performs some cleanup.
421 Stop();
422}
423
424template <typename Storage>
426 stop_ = false;
427
428 // Flat representation of one_watcher_, we will fill it in one go from there.
429 std::vector<int> tmp_keys;
430 std::vector<OneWatcherData> tmp_values;
431 std::vector<int> element_to_num_watched;
432
433 work_done_ = 0;
434 for (int index = 0; index < storage_.size(); ++index) {
435 const auto& subset = storage_[index];
436 CHECK_GE(subset.size(), 2);
437
438 const auto [signature, max_element] = ComputeSignatureAndMaxElement(subset);
439 if (max_element >= is_in_superset_.size()) {
440 is_in_superset_.resize(max_element + 1);
441 }
442 if (max_element >= element_to_num_watched.size()) {
443 element_to_num_watched.resize(max_element + 1);
444 }
445
446 // Choose to watch the one with smallest list so far.
447 int best_choice = -1;
448 int best_value = -1;
449 int second_choice = -1;
450 int second_value = -1;
451 work_done_ += subset.size();
452 if (work_done_ > work_limit_) return Stop();
453 for (const int e : subset) {
454 DCHECK_GE(e, 0);
455 DCHECK_LT(e, element_to_num_watched.size());
456 const int value = element_to_num_watched[e];
457 if (value >= best_value) {
458 second_choice = best_choice;
459 second_value = best_value;
460 best_choice = e;
461 best_value = value;
462 } else if (value > second_value) {
463 second_choice = e;
464 second_value = value;
465 }
466 }
467 DCHECK_NE(best_choice, -1);
468 DCHECK_NE(second_choice, -1);
469 DCHECK_NE(best_choice, second_choice);
470
471 element_to_num_watched[best_choice]++;
472 tmp_keys.push_back(best_choice);
473 tmp_values.push_back({index, second_choice, signature});
474 }
475
476 one_watcher_.ResetFromFlatMapping(tmp_keys, tmp_values);
477}
478
479template <typename Storage>
481 absl::Span<const int> superset, int* next_index_to_try,
482 const std::function<void(int subset)>& process) {
483 // We check each time our work_done_ has increased by more than this.
484 constexpr int64_t kCheckTimeLimitInterval = 1000;
485 int64_t next_time_limit_check = kCheckTimeLimitInterval;
486
487 // Compute the signature and also resize vector if needed. We want a
488 // signature that is order invariant and is compatible with inclusion.
489 const auto [superset_signature, max_element] =
491 if (max_element >= is_in_superset_.size()) {
492 is_in_superset_.resize(max_element + 1);
493 }
494
495 // Find any subset included in current superset.
496 work_done_ += 2 * superset.size();
497 if (work_done_ > work_limit_) return Stop();
498 if (work_done_ > next_time_limit_check) {
499 if (time_limit_->LimitReached()) return Stop();
500 next_time_limit_check = work_done_ + kCheckTimeLimitInterval;
501 }
502
503 // Bitset should be cleared.
504 DCHECK(std::all_of(is_in_superset_.begin(), is_in_superset_.end(),
505 [](bool b) { return !b; }));
506 for (const int e : superset) {
507 is_in_superset_.Set(e);
508 }
509
510 stop_with_current_superset_ = false;
511 const auto is_in_superset_view = is_in_superset_.const_view();
512 for (; *next_index_to_try < superset.size(); ++*next_index_to_try) {
513 const int superset_e = superset[*next_index_to_try];
514 if (superset_e >= one_watcher_.size()) continue;
515 auto cached_span = one_watcher_[superset_e];
516 for (int i = 0; i < cached_span.size(); ++i) {
517 ++work_done_;
518
519 // Do a bunch of quick checks. The second one is optimized for size 2
520 // which happens a lot in our usage of merging clique with implications.
521 const auto [subset_index, other_e, subset_signature] = cached_span[i];
522 if ((subset_signature & ~superset_signature) != 0) continue;
523 if (!is_in_superset_view[other_e]) continue;
524
525 // Long check with bitset.
526 const absl::Span<const int> subset = storage_[subset_index];
527 if (subset.size() > superset.size()) continue;
528
529 // TODO(user): Technically we do not need to check the watched position or
530 // the "other element" position, we could do that by permuting them first
531 // or last and iterating on a subspan. However, in many slow situation, we
532 // have millions of size 2 sets, and the time is dominated by the first
533 // check.
534 bool is_included = true;
535 work_done_ += subset.size();
536 if (work_done_ > work_limit_) return Stop();
537 if (work_done_ > next_time_limit_check) {
538 if (time_limit_->LimitReached()) return Stop();
539 next_time_limit_check = work_done_ + kCheckTimeLimitInterval;
540 }
541 for (const int subset_e : subset) {
542 if (!is_in_superset_view[subset_e]) {
543 is_included = false;
544 break;
545 }
546 }
547 if (!is_included) continue;
548
549 stop_with_current_subset_ = false;
550 process(subset_index);
551
552 // TODO(user): Remove this and the more complex API need once we move
553 // class.
554 if (stop_) return;
555 if (work_done_ > work_limit_) return Stop();
556 if (work_done_ > next_time_limit_check) {
557 if (time_limit_->LimitReached()) return Stop();
558 next_time_limit_check = work_done_ + kCheckTimeLimitInterval;
559 }
560
561 if (stop_with_current_subset_) {
562 one_watcher_.RemoveBySwap(superset_e, i);
563 cached_span.remove_suffix(1);
564 --i;
565 }
566 if (stop_with_current_superset_) break;
567 }
568 if (stop_with_current_superset_) break;
569 }
570
571 // Cleanup.
572 for (const int e : superset) {
573 is_in_superset_.ClearBucket(e);
574 }
575}
576
577} // namespace sat
578} // namespace operations_research
579
580#endif // OR_TOOLS_SAT_INCLUSION_H_
void IncreaseWorkDone(uint64_t increase)
Definition inclusion.h:120
void Reset()
Resets the class to an empty state.
Definition inclusion.h:64
InclusionDetector(const Storage &storage, TimeLimit *time_limit)
Definition inclusion.h:60
void DetectInclusions(const std::function< void(int subset, int superset)> &process)
Definition inclusion.h:282
void SetWorkLimit(uint64_t work_limit)
Definition inclusion.h:86
void FindSubsets(absl::Span< const int > superset, int *next_index_to_try, const std::function< void(int subset)> &process)
Definition inclusion.h:480
void SetWorkLimit(uint64_t work_limit)
Definition inclusion.h:176
SubsetsDetector(const Storage &storage, TimeLimit *time_limit)
Definition inclusion.h:173
process() can call StopProcessingCurrentSuperset() to abort early - process() can call StopProcessingCurrentSubset() to never consider void IndexAllStorageAsSubsets()
time_limit
Definition solve.cc:22
std::pair< uint64_t, int > ComputeSignatureAndMaxElement(absl::Span< const int > elements)
Definition inclusion.h:269
InclusionDetector(const Storage &storage) -> InclusionDetector< Storage >
Deduction guide.
SubsetsDetector(const Storage &storage) -> SubsetsDetector< Storage >
In SWIG mode, we don't want anything besides these top-level includes.