Google OR-Tools v9.14
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
set_cover_heuristics.cc
Go to the documentation of this file.
1// Copyright 2010-2025 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
15
16#include <algorithm>
17#include <climits>
18#include <cstdint>
19#include <limits>
20#include <numeric>
21#include <utility>
22#include <vector>
23
24#include "absl/base/casts.h"
25#include "absl/log/check.h"
26#include "absl/log/log.h"
27#include "absl/numeric/bits.h"
28#include "absl/random/distributions.h"
29#include "absl/random/random.h"
30#include "absl/types/span.h"
36
37namespace operations_research {
38
39constexpr SubsetIndex kNotFound(-1);
40static constexpr Cost kMaxPossibleCost = std::numeric_limits<Cost>::max();
41static constexpr double kInfinity = std::numeric_limits<float>::infinity();
42
44
46 return inv_->CheckConsistency(consistency_level_);
47}
48
49// TrivialSolutionGenerator.
50
52 absl::Span<const SubsetIndex> focus) {
53 StopWatch stop_watch(&run_time_);
54 const SubsetIndex num_subsets(model()->num_subsets());
55 SubsetBoolVector choices(num_subsets, false);
56 for (const SubsetIndex subset : focus) {
57 choices[subset] = true;
58 }
59 inv()->LoadSolution(choices);
60 inv()->Recompute(CL::kCostAndCoverage);
61 return true;
62}
63
64// RandomSolutionGenerator.
65
67 absl::Span<const SubsetIndex> focus) {
68 StopWatch stop_watch(&run_time_);
69 inv()->ClearTrace();
70 std::vector<SubsetIndex> shuffled(focus.begin(), focus.end());
71 std::shuffle(shuffled.begin(), shuffled.end(), absl::BitGen());
72 for (const SubsetIndex subset : shuffled) {
73 if (inv()->is_selected()[subset]) continue;
74 if (inv()->num_free_elements()[subset] != 0) {
75 inv()->Select(subset, CL::kFreeAndUncovered);
76 }
77 }
78 inv()->CompressTrace();
79 DCHECK(inv()->CheckConsistency(CL::kFreeAndUncovered));
80 return true;
81}
82
83// GreedySolutionGenerator.
84
86 absl::Span<const SubsetIndex> focus) {
87 StopWatch stop_watch(&run_time_);
88 DCHECK(inv()->CheckConsistency(CL::kCostAndCoverage));
89 inv()->Recompute(CL::kFreeAndUncovered);
90 inv()->ClearTrace();
91 std::vector<std::pair<float, SubsetIndex::ValueType>> subset_priorities;
92 DVLOG(1) << "focus.size(): " << focus.size();
93 subset_priorities.reserve(focus.size());
94 const SubsetCostVector& costs = model()->subset_costs();
95 for (const SubsetIndex subset : focus) {
96 if (!inv()->is_selected()[subset] &&
97 inv()->num_free_elements()[subset] != 0) {
98 // NOMUTANTS -- reason, for C++
99 const float priority = inv()->num_free_elements()[subset] / costs[subset];
100 subset_priorities.push_back({priority, subset.value()});
101 }
102 }
104 const SparseColumnView& colunms = model()->columns();
105 const SparseRowView& rows = model()->rows();
106 // The priority queue maintains the maximum number of elements covered by unit
107 // of cost. We chose 16 as the arity of the heap after some testing.
108 // TODO(user): research more about the best value for Arity.
110 subset_priorities, num_subsets);
111 SubsetBoolVector subset_seen(num_subsets, false);
112 std::vector<SubsetIndex> subsets_to_remove;
113 subsets_to_remove.reserve(focus.size());
114 while (!pq.IsEmpty() || inv()->num_uncovered_elements() > 0) {
115 VLOG_EVERY_N_SEC(1, 5) << "Queue size: " << pq.heap_size()
116 << ", #uncovered elements: "
118 const SubsetIndex best_subset(pq.TopIndex());
119 pq.Pop();
120 inv()->Select(best_subset, CL::kFreeAndUncovered);
121 // NOMUTANTS -- reason, for C++
122 subset_seen[best_subset] = true;
123 subsets_to_remove.push_back(best_subset);
124 for (const ElementIndex element : colunms[best_subset]) {
125 for (const SubsetIndex subset : rows[element]) {
126 if (subset_seen[subset]) continue;
127 subset_seen[subset] = true;
128 const BaseInt marginal_impact(inv()->num_free_elements()[subset]);
129 if (marginal_impact > 0) {
130 const float priority = marginal_impact / costs[subset];
131 pq.Update({priority, subset.value()});
132 } else {
133 pq.Remove(subset.value());
134 }
135 subsets_to_remove.push_back(subset);
136 }
137 }
138 for (const SubsetIndex subset : subsets_to_remove) {
139 subset_seen[subset] = false;
140 }
141 subsets_to_remove.clear();
142 DVLOG(1) << "Cost = " << inv()->cost()
143 << " num_uncovered_elements = " << inv()->num_uncovered_elements();
144 }
145 inv()->CompressTrace();
146 // Don't expect pq to be empty, because of the break in the while loop.
147 DCHECK(inv()->CheckConsistency(CL::kFreeAndUncovered));
148 return true;
149}
150
151namespace {
152// This class gathers statistics about the usefulness of the ratio computation.
153class ComputationUsefulnessStats {
154 public:
155 // If is_active is true, the stats are gathered, otherwise there is no
156 // overhead, in particular no memory allocation.
157 explicit ComputationUsefulnessStats(const SetCoverInvariant* inv,
158 bool is_active)
159 : inv_(inv),
160 is_active_(is_active),
161 num_ratio_computations_(),
162 num_useless_computations_(),
164 if (is_active) {
165 BaseInt num_subsets = inv_->model()->num_subsets();
166 num_ratio_computations_.assign(num_subsets, 0);
167 num_useless_computations_.assign(num_subsets, 0);
168 num_free_elements_.assign(num_subsets, -1); // -1 means not computed yet.
169 }
170 }
171
172 // To be called each time a num_free_elements is computed.
173 void Update(SubsetIndex subset, BaseInt new_num_free_elements) {
174 if (is_active_) {
175 if (new_num_free_elements == num_free_elements_[subset]) {
176 ++num_useless_computations_[subset];
177 }
178 ++num_ratio_computations_[subset];
179 num_free_elements_[subset] = new_num_free_elements;
180 }
181 }
182
183 // To be called at the end of the algorithm.
184 void PrintStats() {
185 if (is_active_) {
186 BaseInt num_subsets_considered = 0;
187 BaseInt num_ratio_updates = 0;
188 BaseInt num_wasted_ratio_updates = 0;
189 for (const SubsetIndex subset : inv_->model()->SubsetRange()) {
190 if (num_ratio_computations_[subset] > 0) {
191 ++num_subsets_considered;
192 if (num_ratio_computations_[subset] > 1) {
193 num_ratio_updates += num_ratio_computations_[subset] - 1;
194 }
195 }
196 num_wasted_ratio_updates += num_useless_computations_[subset];
197 }
198 LOG(INFO) << "num_subsets_considered = " << num_subsets_considered;
199 LOG(INFO) << "num_ratio_updates = " << num_ratio_updates;
200 LOG(INFO) << "num_wasted_ratio_updates = " << num_wasted_ratio_updates;
201 }
202 }
203
204 private:
205 // The invariant on which the stats are performed.
206 const SetCoverInvariant* inv_;
207
208 // Whether the stats are active or not.
209 bool is_active_;
210
211 // Number of times the ratio was computed for a subset.
212 SubsetToIntVector num_ratio_computations_;
213
214 // Number of times the ratio was computed for a subset and was the same as the
215 // previous one.
216 SubsetToIntVector num_useless_computations_;
217
218 // The value num_free_elements_ for the subset the last time it was computed.
219 // Used to detect useless computations.
220 SubsetToIntVector num_free_elements_;
221};
222} // namespace
223
224namespace {
225// Clearly not the fastest radix sort, but its complexity is the right one.
226// Furthermore:
227// - it is as memory-safe as std::vectors can be (no pointers),
228// - no multiplication is performed,
229// - it is stable
230// - it handles the cases of signed and unsigned integers automatically,
231// - bounds on the keys are optional, or they can be computed automatically,
232// - based on those bounds, the number of passes is automatically computed,
233// - a payload is associated to each key, and it is sorted in the same way
234// as the keys. This payload can be a vector of integers or a vector of
235// pointers to larger objects.
236// TODO(user): Make it an independent library.
237// - add support for decreasing counting sort,
238// - make payloads optional,
239// - support floats and doubles,
240// - improve performance.
241// - use vectorized code.
242namespace internal {
243uint32_t RawBits(uint32_t x) { return x; } // NOLINT
244uint32_t RawBits(int x) { return absl::bit_cast<uint32_t>(x); } // NOLINT
245uint32_t RawBits(float x) { return absl::bit_cast<uint32_t>(x); } // NOLINT
246uint64_t RawBits(uint64_t x) { return x; } // NOLINT
247uint64_t RawBits(int64_t x) { return absl::bit_cast<uint64_t>(x); } // NOLINT
248uint64_t RawBits(double x) { return absl::bit_cast<uint64_t>(x); } // NOLINT
249
250inline uint32_t Bucket(uint32_t x, uint32_t shift, uint32_t radix) {
251 DCHECK_EQ(0, radix & (radix - 1)); // Must be a power of two.
252 // NOMUTANTS -- a way to compute the remainder of a division when radix is a
253 // power of two.
254 return (RawBits(x) >> shift) & (radix - 1);
255}
256
257template <typename T>
258int NumBitsToRepresent(T value) {
259 DCHECK_LE(absl::countl_zero(RawBits(value)), sizeof(T) * CHAR_BIT);
260 return sizeof(T) * CHAR_BIT - absl::countl_zero(RawBits(value));
261}
262
263template <typename Key, typename Counter>
264void UpdateCounters(uint32_t radix, int shift, std::vector<Key>& keys,
265 std::vector<Counter>& counts) {
266 std::fill(counts.begin(), counts.end(), 0);
267 DCHECK_EQ(counts[0], 0);
268 DCHECK_EQ(0, radix & (radix - 1)); // Must be a power of two.
269 const auto num_keys = keys.size();
270 for (int64_t i = 0; i < num_keys; ++i) {
271 ++counts[Bucket(keys[i], shift, radix)];
272 }
273 // Now the counts will contain the sum of the sizes below and including each
274 // bucket.
275 for (uint64_t i = 1; i < radix; ++i) {
276 counts[i] += counts[i - 1];
277 }
278}
279
280template <typename Key, typename Payload, typename Counter>
281void IncreasingCountingSort(uint32_t radix, int shift, std::vector<Key>& keys,
282 std::vector<Payload>& payloads,
283 std::vector<Key>& scratch_keys,
284 std::vector<Payload>& scratch_payloads,
285 std::vector<Counter>& counts) {
286 DCHECK_EQ(0, radix & (radix - 1)); // Must be a power of two.
287 UpdateCounters(radix, shift, keys, counts);
288 const auto num_keys = keys.size();
289 // In this order for stability.
290 for (int64_t i = num_keys - 1; i >= 0; --i) {
291 Counter c = --counts[Bucket(keys[i], shift, radix)];
292 scratch_keys[c] = keys[i];
293 scratch_payloads[c] = payloads[i];
294 }
295 std::swap(keys, scratch_keys);
296 std::swap(payloads, scratch_payloads);
297}
298} // namespace internal
299
300template <typename Key, typename Payload>
301void RadixSort(int radix_log, std::vector<Key>& keys,
302 std::vector<Payload>& payloads, Key /*min_key*/, Key max_key) {
303 // range_log is the number of bits necessary to represent the max_key
304 // We could as well use max_key - min_key, but it is more expensive to
305 // compute.
306 const int range_log = internal::NumBitsToRepresent(max_key);
307 DCHECK_EQ(internal::NumBitsToRepresent(0), 0);
308 DCHECK_LE(internal::NumBitsToRepresent(std::numeric_limits<Key>::max()),
309 std::numeric_limits<Key>::digits);
310 const int radix = 1 << radix_log; // By definition.
311 std::vector<uint32_t> counters(radix, 0);
312 std::vector<Key> scratch_keys(keys.size());
313 std::vector<Payload> scratch_payloads(payloads.size());
314 for (int shift = 0; shift < range_log; shift += radix_log) {
315 DCHECK_LE(1 << shift, max_key);
316 internal::IncreasingCountingSort(radix, shift, keys, payloads, scratch_keys,
317 scratch_payloads, counters);
318 }
319}
320
321// TODO(user): Move this to SetCoverInvariant.
322
323std::vector<ElementIndex> GetUncoveredElementsSortedByDegree(
324 const SetCoverInvariant* const inv) {
325 const BaseInt num_elements = inv->model()->num_elements();
326 std::vector<ElementIndex> degree_sorted_elements; // payloads
327 degree_sorted_elements.reserve(num_elements);
328 std::vector<BaseInt> keys;
329 keys.reserve(num_elements);
330 const SparseRowView& rows = inv->model()->rows();
331 BaseInt max_degree = 0;
332 for (const ElementIndex element : inv->model()->ElementRange()) {
333 // Already covered elements should not be considered.
334 if (inv->coverage()[element] != 0) continue;
335 degree_sorted_elements.push_back(element);
336 const BaseInt size = rows[element].size();
337 max_degree = std::max(max_degree, size);
338 keys.push_back(size);
339 }
340 RadixSort(11, keys, degree_sorted_elements, 1, max_degree);
341#ifndef NDEBUG
342 BaseInt prev_key = -1;
343 for (const auto key : keys) {
344 DCHECK_LE(prev_key, key);
345 prev_key = key;
346 }
347#endif
348 return degree_sorted_elements;
349}
350
351// Computes: d = c1 * n2 - c2 * n1. This is an easy way to compare two ratios
352// without having to use a full division.
353// If d < 0 then c1 / n1 < c2 / n2,
354// If d == 0 then c1 / n1 == c2 / n2, etc...
355// NOTE(user): This can be implemented using SSE2 with a gain of 5-10%.
356double Determinant(Cost c1, BaseInt n1, Cost c2, BaseInt n2) {
357 return c1 * n2 - n1 * c2;
358}
359} // namespace
360
361// ElementDegreeSolutionGenerator.
362// There is no need to use a priority queue here, as the ratios are computed
363// on-demand. Also elements are sorted based on degree once and for all and
364// moved past when the elements become already covered.
365
367 const SubsetBoolVector& in_focus) {
368 StopWatch stop_watch(&run_time_);
369 DVLOG(1) << "Entering ElementDegreeSolutionGenerator::NextSolution";
370 inv()->Recompute(CL::kFreeAndUncovered);
371 // Create the list of all the indices in the problem.
372 std::vector<ElementIndex> degree_sorted_elements =
373 GetUncoveredElementsSortedByDegree(inv());
374 ComputationUsefulnessStats stats(inv(), false);
375 const SparseRowView& rows = model()->rows();
376 const SubsetCostVector& costs = model()->subset_costs();
377 for (const ElementIndex element : degree_sorted_elements) {
378 // No need to cover an element that is already covered.
379 if (inv()->coverage()[element] != 0) continue;
380 SubsetIndex best_subset(-1);
381 Cost best_subset_cost = 0.0;
382 BaseInt best_subset_num_free_elts = 0;
383 for (const SubsetIndex subset : rows[element]) {
384 if (!in_focus[subset]) continue;
385 const BaseInt num_free_elements = inv()->num_free_elements()[subset];
386 stats.Update(subset, num_free_elements);
387 const Cost det = Determinant(costs[subset], num_free_elements,
388 best_subset_cost, best_subset_num_free_elts);
389 // Compare R = costs[subset] / num_free_elements with
390 // B = best_subset_cost / best_subset_num_free_elts.
391 // If R < B, we choose subset.
392 // If the ratios are the same, we choose the subset with the most free
393 // elements.
394 // TODO(user): What about adding a tolerance for equality, which could
395 // further favor larger columns?
396 if (det < 0 ||
397 (det == 0 && num_free_elements > best_subset_num_free_elts)) {
398 best_subset = subset;
399 best_subset_cost = costs[subset];
400 best_subset_num_free_elts = num_free_elements;
401 }
402 }
403 if (best_subset.value() == -1) {
404 LOG(WARNING) << "Best subset not found. Algorithmic error or invalid "
405 "input.";
406 continue;
407 }
408 DCHECK_NE(best_subset.value(), -1);
409 inv()->Select(best_subset, CL::kFreeAndUncovered);
410 DVLOG(1) << "Cost = " << inv()->cost()
411 << " num_uncovered_elements = " << inv()->num_uncovered_elements();
412 }
413 inv()->CompressTrace();
414 stats.PrintStats();
415 DCHECK(inv()->CheckConsistency(CL::kFreeAndUncovered));
416 return true;
417}
418
419// LazyElementDegreeSolutionGenerator.
420// There is no need to use a priority queue here, as the ratios are computed
421// on-demand. Also elements are sorted based on degree once and for all and
422// moved past when the elements become already covered.
423
425 const SubsetBoolVector& in_focus) {
426 StopWatch stop_watch(&run_time_);
427 inv()->CompressTrace();
428 DVLOG(1) << "Entering LazyElementDegreeSolutionGenerator::NextSolution";
429 DCHECK(inv()->CheckConsistency(CL::kCostAndCoverage));
430 // Create the list of all the indices in the problem.
431 std::vector<ElementIndex> degree_sorted_elements =
432 GetUncoveredElementsSortedByDegree(inv());
433 const SparseRowView& rows = model()->rows();
434 const SparseColumnView& columns = model()->columns();
435 ComputationUsefulnessStats stats(inv(), false);
436 const SubsetCostVector& costs = model()->subset_costs();
437 for (const ElementIndex element : degree_sorted_elements) {
438 // No need to cover an element that is already covered.
439 if (inv()->coverage()[element] != 0) continue;
440 SubsetIndex best_subset(-1);
441 Cost best_subset_cost = 0.0; // Cost of the best subset.
442 BaseInt best_subset_num_free_elts = 0;
443 for (const SubsetIndex subset : rows[element]) {
444 if (!in_focus[subset]) continue;
445 const Cost filtering_det =
446 Determinant(costs[subset], columns[subset].size(), best_subset_cost,
447 best_subset_num_free_elts);
448 // If the ratio with the initial number elements is greater, we skip this
449 // subset.
450 if (filtering_det > 0) continue;
451 const BaseInt num_free_elements = inv()->ComputeNumFreeElements(subset);
452 stats.Update(subset, num_free_elements);
453 const Cost det = Determinant(costs[subset], num_free_elements,
454 best_subset_cost, best_subset_num_free_elts);
455 // Same as ElementDegreeSolutionGenerator.
456 if (det < 0 ||
457 (det == 0 && num_free_elements > best_subset_num_free_elts)) {
458 best_subset = subset;
459 best_subset_cost = costs[subset];
460 best_subset_num_free_elts = num_free_elements;
461 }
462 }
463 DCHECK_NE(best_subset, SubsetIndex(-1));
464 inv()->Select(best_subset, CL::kCostAndCoverage);
465 DVLOG(1) << "Cost = " << inv()->cost()
466 << " num_uncovered_elements = " << inv()->num_uncovered_elements();
467 }
468 DCHECK(inv()->CheckConsistency(CL::kCostAndCoverage));
469 stats.PrintStats();
470 return true;
471}
472
473// SteepestSearch.
474
476 StopWatch stop_watch(&run_time_);
477 const int64_t num_iterations = max_iterations();
478 DCHECK(inv()->CheckConsistency(CL::kCostAndCoverage));
479 inv()->Recompute(CL::kFreeAndUncovered);
480 DVLOG(1) << "Entering SteepestSearch::NextSolution, num_iterations = "
481 << num_iterations;
482 // Return false if inv() contains no solution.
483 // TODO(user): This should be relaxed for partial solutions.
484 if (inv()->num_uncovered_elements() != 0) {
485 return false;
486 }
487
488 // Create priority queue with cost of using a subset, by decreasing order.
489 // Do it only for selected AND removable subsets.
490 const SubsetCostVector& costs = model()->subset_costs();
491 std::vector<std::pair<float, SubsetIndex::ValueType>> subset_priorities;
492 subset_priorities.reserve(in_focus.size());
493 for (const SetCoverDecision& decision : inv()->trace()) {
494 const SubsetIndex subset = decision.subset();
495 if (in_focus[subset] && inv()->is_selected()[subset] &&
496 inv()->ComputeIsRedundant(subset)) {
497 const float delta_per_element = costs[subset];
498 subset_priorities.push_back({delta_per_element, subset.value()});
499 }
500 }
501 DVLOG(1) << "subset_priorities.size(): " << subset_priorities.size();
503 subset_priorities, model()->num_subsets());
504 for (int iteration = 0; iteration < num_iterations && !pq.IsEmpty();
505 ++iteration) {
506 const SubsetIndex best_subset(pq.TopIndex());
507 pq.Pop();
508 DCHECK(inv()->is_selected()[best_subset]);
509 DCHECK(inv()->ComputeIsRedundant(best_subset));
510 DCHECK_GT(costs[best_subset], 0.0);
511 inv()->Deselect(best_subset, CL::kFreeAndUncovered);
512 for (const SubsetIndex subset :
513 IntersectingSubsetsRange(*model(), best_subset)) {
514 if (!inv()->ComputeIsRedundant(subset)) {
515 pq.Remove(subset.value());
516 }
517 }
518 DVLOG(1) << "Cost = " << inv()->cost();
519 }
520 inv()->CompressTrace();
521 // TODO(user): change this to enable working on partial solutions.
522 DCHECK_EQ(inv()->num_uncovered_elements(), 0);
523 DCHECK(inv()->CheckConsistency(CL::kFreeAndUncovered));
524 return true;
525}
526
527// LazySteepestSearch.
528
530 StopWatch stop_watch(&run_time_);
531 const int64_t num_iterations = max_iterations();
532 DCHECK(inv()->CheckConsistency(CL::kCostAndCoverage));
533 DVLOG(1) << "Entering LazySteepestSearch::NextSolution, num_iterations = "
534 << num_iterations;
535 const BaseInt num_selected_subsets = inv()->trace().size();
536 std::vector<Cost> selected_costs;
537 selected_costs.reserve(num_selected_subsets);
538 // First part of the trick:
539 // Since the heuristic is greedy, it only considers subsets that are selected
540 // and in_focus.
541 const SubsetCostVector& costs = model()->subset_costs();
542 for (const SetCoverDecision& decision : inv()->trace()) {
543 if (in_focus[decision.subset()]) {
544 selected_costs.push_back(costs[decision.subset()]);
545 }
546 }
547 std::vector<BaseInt> cost_permutation(selected_costs.size());
548 std::iota(cost_permutation.begin(), cost_permutation.end(), 0);
549 // TODO(user): use RadixSort with doubles and payloads.
550 std::sort(cost_permutation.begin(), cost_permutation.end(),
551 [&selected_costs](const BaseInt i, const BaseInt j) {
552 if (selected_costs[i] == selected_costs[j]) {
553 return i < j;
554 }
555 return selected_costs[i] > selected_costs[j];
556 });
557 std::vector<SubsetIndex> cost_sorted_subsets;
558 cost_sorted_subsets.reserve(num_selected_subsets);
559 for (const BaseInt i : cost_permutation) {
560 cost_sorted_subsets.push_back(inv()->trace()[i].subset());
561 }
562 for (const SubsetIndex subset : cost_sorted_subsets) {
563 // Second part of the trick:
564 // ComputeIsRedundant is expensive, but it is going to be called only once
565 // per subset in the solution. Once this has been done, there is no need to
566 // update any priority queue, nor to use a stronger level of consistency
567 // than kCostAndCoverage.
568 // In the non-lazy version, the redundancy of a subset may be updated many
569 // times and the priority queue must be updated accordingly, including just
570 // for removing the subset that was just considered,
571 // A possible optimization would be to sort the elements by coverage and
572 // run ComputeIsRedundant with the new element order. This would make the
573 // subsets which cover only one element easier to prove non-redundant.
574 if (inv()->is_selected()[subset] && inv()->ComputeIsRedundant(subset)) {
575 inv()->Deselect(subset, CL::kCostAndCoverage);
576 }
577 }
578 inv()->CompressTrace();
579 DCHECK(inv()->CheckConsistency(CL::kCostAndCoverage));
580 return true;
581}
582
583// Guided Tabu Search
584
586 const SubsetIndex num_subsets(model()->num_subsets());
587 const SubsetCostVector& subset_costs = model()->subset_costs();
588 times_penalized_.assign(num_subsets.value(), 0);
589 augmented_costs_ = subset_costs;
590 utilities_ = subset_costs;
591}
592
593namespace {
594bool FlipCoin() {
595 // TODO(user): use STL for repeatable testing.
596 return absl::Bernoulli(absl::BitGen(), 0.5);
597}
598} // namespace
599
600void GuidedTabuSearch::UpdatePenalties(absl::Span<const SubsetIndex> focus) {
601 const SubsetCostVector& subset_costs = model()->subset_costs();
602 Cost max_utility = -1.0;
603 for (const SubsetIndex subset : focus) {
604 if (inv()->is_selected()[subset]) {
605 max_utility = std::max(max_utility, utilities_[subset]);
606 }
607 }
608 const double epsilon_utility = epsilon_ * max_utility;
609 for (const SubsetIndex subset : focus) {
610 if (inv()->is_selected()[subset]) {
611 const double utility = utilities_[subset];
612 if ((max_utility - utility <= epsilon_utility) && FlipCoin()) {
613 ++times_penalized_[subset];
614 const int times_penalized = times_penalized_[subset];
615 const Cost cost =
616 subset_costs[subset]; // / columns[subset].size().value();
617 utilities_[subset] = cost / (1 + times_penalized);
618 augmented_costs_[subset] =
619 cost * (1 + penalty_factor_ * times_penalized);
620 }
621 }
622 }
623}
624
625bool GuidedTabuSearch::NextSolution(absl::Span<const SubsetIndex> focus) {
626 StopWatch stop_watch(&run_time_);
627 const int64_t num_iterations = max_iterations();
628 DCHECK(inv()->CheckConsistency(CL::kFreeAndUncovered));
629 DVLOG(1) << "Entering GuidedTabuSearch::NextSolution, num_iterations = "
630 << num_iterations;
631 const SubsetCostVector& subset_costs = model()->subset_costs();
632 Cost best_cost = inv()->cost();
633 SubsetBoolVector best_choices = inv()->is_selected();
634 Cost augmented_cost =
635 std::accumulate(augmented_costs_.begin(), augmented_costs_.end(), 0.0);
636 BaseInt trace_size = inv()->trace().size();
637 for (int iteration = 0; iteration < num_iterations; ++iteration) {
638 if (inv()->trace().size() > 2 * trace_size) {
639 inv()->CompressTrace();
640 trace_size = inv()->trace().size();
641 }
642 Cost best_delta = kMaxPossibleCost;
643 SubsetIndex best_subset = kNotFound;
644 for (const SubsetIndex subset : focus) {
645 const Cost delta = augmented_costs_[subset];
646 DVLOG(1) << "Subset, " << subset.value() << ", at ,"
647 << inv()->is_selected()[subset] << ", delta =, " << delta
648 << ", best_delta =, " << best_delta;
649 if (inv()->is_selected()[subset]) {
650 // Try to remove subset from solution, if the gain from removing is
651 // worth it:
652 if (-delta < best_delta &&
653 // and it can be removed, and
654 inv()->ComputeIsRedundant(subset) &&
655 // it is not Tabu OR decreases the actual cost (aspiration):
656 (!tabu_list_.Contains(subset) ||
657 inv()->cost() - subset_costs[subset] < best_cost)) {
658 best_delta = -delta;
659 best_subset = subset;
660 }
661 } else {
662 // Try to use subset in solution, if its penalized delta is good.
663 if (delta < best_delta) {
664 // The limit kMaxPossibleCost is ill-defined,
665 // there is always a best_subset. Is it intended?
666 if (!tabu_list_.Contains(subset)) {
667 best_delta = delta;
668 best_subset = subset;
669 }
670 }
671 }
672 }
673 if (best_subset == kNotFound) { // Local minimum reached.
674 inv()->LoadSolution(best_choices);
675 return true;
676 }
677 DVLOG(1) << "Best subset, " << best_subset.value() << ", at ,"
678 << inv()->is_selected()[best_subset] << ", best_delta = ,"
679 << best_delta;
680
681 UpdatePenalties(focus);
682 tabu_list_.Add(best_subset);
683 if (inv()->is_selected()[best_subset]) {
684 inv()->Deselect(best_subset, CL::kFreeAndUncovered);
685 } else {
686 inv()->Select(best_subset, CL::kFreeAndUncovered);
687 }
688 // TODO(user): make the cost computation incremental.
689 augmented_cost =
690 std::accumulate(augmented_costs_.begin(), augmented_costs_.end(), 0.0);
691
692 DVLOG(1) << "Iteration, " << iteration << ", current cost = ,"
693 << inv()->cost() << ", best cost = ," << best_cost
694 << ", penalized cost = ," << augmented_cost;
695 if (inv()->cost() < best_cost) {
696 VLOG(1) << "Updated best cost, " << "Iteration, " << iteration
697 << ", current cost = ," << inv()->cost() << ", best cost = ,"
698 << best_cost << ", penalized cost = ," << augmented_cost;
699 best_cost = inv()->cost();
700 best_choices = inv()->is_selected();
701 }
702 }
703 inv()->LoadSolution(best_choices);
704 inv()->CompressTrace();
705 DCHECK(inv()->CheckConsistency(CL::kFreeAndUncovered));
706 return true;
707}
708
709// Guided Local Search
710void GuidedLocalSearch::Initialize() {
711 const SparseColumnView& columns = model()->columns();
712 penalties_.assign(columns.size(), 0);
713 penalization_factor_ = alpha_ * inv()->cost() * 1.0 / (columns.size());
714 for (const SetCoverDecision& decision : inv()->trace()) {
715 const SubsetIndex subset = decision.subset();
716 if (inv()->is_selected()[subset]) {
717 utility_heap_.Insert({static_cast<float>(model()->subset_costs()[subset] /
718 (1 + penalties_[subset])),
719 subset.value()});
720 }
721 }
722}
723
724Cost GuidedLocalSearch::ComputeDelta(SubsetIndex subset) const {
725 const float delta = (penalization_factor_ * penalties_[subset] +
726 model()->subset_costs()[subset]);
727 if (inv()->is_selected()[subset] && inv()->ComputeIsRedundant(subset)) {
728 return delta;
729 } else if (!inv()->is_selected()[subset]) {
730 return -delta;
731 }
732 return kInfinity;
733}
734
735bool GuidedLocalSearch::NextSolution(absl::Span<const SubsetIndex> focus) {
736 StopWatch stop_watch(&run_time_);
737 const int64_t num_iterations = max_iterations();
738 inv()->Recompute(CL::kRedundancy);
739 Cost best_cost = inv()->cost();
740 SubsetBoolVector best_choices = inv()->is_selected();
741
742 for (const SubsetIndex& subset : focus) {
743 const float delta = ComputeDelta(subset);
744 if (delta < kInfinity) {
745 priority_heap_.Insert({delta, subset.value()});
746 }
747 }
748
749 for (int iteration = 0;
750 !priority_heap_.IsEmpty() && iteration < num_iterations; ++iteration) {
751 // Improve current solution respective to the current penalties by flipping
752 // the best subset.
753 const SubsetIndex best_subset(priority_heap_.TopIndex());
754 if (inv()->is_selected()[best_subset]) {
755 utility_heap_.Insert({0, best_subset.value()});
756 inv()->Deselect(best_subset, CL::kRedundancy);
757 } else {
758 utility_heap_.Insert(
759 {static_cast<float>(model()->subset_costs()[best_subset] /
760 (1 + penalties_[best_subset])),
761 best_subset.value()});
762 inv()->Select(best_subset, CL::kRedundancy);
763 }
764 DCHECK(!utility_heap_.IsEmpty());
765
766 // Getting the subset with highest utility. utility_heap_ is not empty,
767 // because we just inserted a pair.
768 const SubsetIndex penalized_subset(utility_heap_.TopIndex());
769 utility_heap_.Pop();
770 ++penalties_[penalized_subset];
771 utility_heap_.Insert(
772 {static_cast<float>(model()->subset_costs()[penalized_subset] /
773 (1 + penalties_[penalized_subset])),
774 penalized_subset.value()});
775 DCHECK(!utility_heap_.IsEmpty());
776
777 // Get removable subsets (Add them to the heap).
778 for (const SubsetIndex subset : inv()->newly_removable_subsets()) {
779 const float delta_selected = (penalization_factor_ * penalties_[subset] +
780 model()->subset_costs()[subset]);
781 priority_heap_.Insert({delta_selected, subset.value()});
782 }
783 DCHECK(!priority_heap_.IsEmpty());
784
785 for (const SubsetIndex subset : {penalized_subset, best_subset}) {
786 const float delta = ComputeDelta(subset);
787 if (delta < kInfinity) {
788 priority_heap_.Insert({delta, subset.value()});
789 }
790 }
791 DCHECK(!priority_heap_.IsEmpty());
792
793 // Get new non removable subsets and remove them from the heap.
794 // This is when the priority_heap_ can become empty and end the outer loop
795 // early.
796 for (const SubsetIndex subset : inv()->newly_non_removable_subsets()) {
797 priority_heap_.Remove(subset.value());
798 }
799
800 if (inv()->cost() < best_cost) {
801 best_cost = inv()->cost();
802 best_choices = inv()->is_selected();
803 }
804 }
805 inv()->LoadSolution(best_choices);
806
807 // Improve the solution by removing redundant subsets.
808 for (const SubsetIndex& subset : focus) {
809 if (inv()->is_selected()[subset] && inv()->ComputeIsRedundant(subset))
810 inv()->Deselect(subset, CL::kRedundancy);
811 }
812 DCHECK_EQ(inv()->num_uncovered_elements(), 0);
813 return true;
814}
815
816namespace {
817void SampleSubsets(std::vector<SubsetIndex>* list, BaseInt num_subsets) {
818 num_subsets = std::min<BaseInt>(num_subsets, list->size());
819 CHECK_GE(num_subsets, 0);
820 std::shuffle(list->begin(), list->end(), absl::BitGen());
821 list->resize(num_subsets);
822}
823} // namespace
824
825std::vector<SubsetIndex> ClearRandomSubsets(BaseInt num_subsets,
826 SetCoverInvariant* inv) {
827 return ClearRandomSubsets(inv->model()->all_subsets(), num_subsets, inv);
828}
829
830std::vector<SubsetIndex> ClearRandomSubsets(absl::Span<const SubsetIndex> focus,
831 BaseInt num_subsets_to_choose,
832 SetCoverInvariant* inv) {
833 num_subsets_to_choose =
834 std::min<BaseInt>(num_subsets_to_choose, focus.size());
835 CHECK_GE(num_subsets_to_choose, 0);
836 std::vector<SubsetIndex> chosen_indices;
837 for (const SubsetIndex subset : focus) {
838 if (inv->is_selected()[subset]) {
839 chosen_indices.push_back(subset);
840 }
841 }
842 SampleSubsets(&chosen_indices, num_subsets_to_choose);
843 BaseInt num_deselected = 0;
844 for (const SubsetIndex subset : chosen_indices) {
845 if (inv->is_selected()[subset]) { // subset may have been deselected in a
846 // previous iteration.
847 inv->Deselect(subset, CL::kCostAndCoverage);
848 ++num_deselected;
849 }
850 for (const SubsetIndex connected_subset :
851 IntersectingSubsetsRange(*inv->model(), subset)) {
852 // connected_subset may have been deselected in a previous iteration.
853 if (inv->is_selected()[connected_subset]) {
854 inv->Deselect(connected_subset, CL::kCostAndCoverage);
855 ++num_deselected;
856 }
857 }
858 // Note that num_deselected may exceed num_subsets_to_choose by more than 1.
859 if (num_deselected > num_subsets_to_choose) break;
860 }
861 return chosen_indices;
862}
863
864std::vector<SubsetIndex> ClearMostCoveredElements(BaseInt max_num_subsets,
865 SetCoverInvariant* inv) {
866 return ClearMostCoveredElements(inv->model()->all_subsets(), max_num_subsets,
867 inv);
868}
869
870std::vector<SubsetIndex> ClearMostCoveredElements(
871 absl::Span<const SubsetIndex> focus, BaseInt max_num_subsets,
872 SetCoverInvariant* inv) {
873 // This is the vector we will return.
874 std::vector<SubsetIndex> sampled_subsets;
875
876 const ElementToIntVector& coverage = inv->coverage();
877 const BaseInt num_subsets = inv->model()->num_subsets();
878 const SparseRowView& rows = inv->model()->rows();
879
880 // Collect the sets which have at least one element whose coverage > 1,
881 // even if those sets are not removable.
882 SubsetBoolVector subset_is_collected(num_subsets, false);
883 for (const ElementIndex element : inv->model()->ElementRange()) {
884 if (coverage[element] <= 1) continue;
885 for (const SubsetIndex subset : rows[element]) {
886 if (inv->is_selected()[subset] && !subset_is_collected[subset]) {
887 subset_is_collected[subset] = true;
888 }
889 }
890 }
891
892 // Now intersect with focus: sampled_subsets = focus ⋂ impacted_subsets.
893 // NOTE(user): this might take too long. TODO(user):find another algorithm if
894 // necessary.
895 for (const SubsetIndex subset : focus) {
896 if (subset_is_collected[subset]) {
897 sampled_subsets.push_back(subset);
898 }
899 }
900
901 // Actually *sample* sampled_subset.
902 // TODO(user): find another algorithm if necessary.
903 std::shuffle(sampled_subsets.begin(), sampled_subsets.end(), absl::BitGen());
904 sampled_subsets.resize(
905 std::min<BaseInt>(sampled_subsets.size(), max_num_subsets));
906
907 // Testing has shown that sorting sampled_subsets is not necessary.
908 // Now, un-select the subset in sampled_subsets.
909 for (const SubsetIndex subset : sampled_subsets) {
910 inv->Deselect(subset, CL::kCostAndCoverage);
911 }
912 return sampled_subsets;
913}
914
915} // namespace operations_research
void Update(Aggregate element)
Change the value of an element.
bool IsEmpty() const
True iff the heap is empty.
HeapIndex heap_size() const
Returns the number of elements in the heap.
bool NextSolution() final
Virtual methods that must be implemented by derived classes.
bool NextSolution() final
Virtual methods that must be implemented by derived classes.
void Initialize()
Initializes the Guided Tabu Search algorithm.
bool NextSolution(absl::Span< const SubsetIndex > focus) final
bool NextSolution() final
Virtual methods that must be implemented by derived classes.
bool NextSolution() final
Virtual methods that must be implemented by derived classes.
bool NextSolution() final
Virtual methods that must be implemented by derived classes.
A helper class used to store the decisions made during a search.
void Recompute(ConsistencyLevel target_consistency)
Recomputes the invariant to the given consistency level.
const SubsetToIntVector & num_free_elements() const
const std::vector< SetCoverDecision > & trace() const
Returns the vector of the decisions which have led to the current solution.
void LoadSolution(const SubsetBoolVector &solution)
Loads the solution and recomputes the data in the invariant.
bool Deselect(SubsetIndex subset, ConsistencyLevel consistency)
const SubsetBoolVector & is_selected() const
Returns the subset assignment vector.
SetCoverModel * model() const
Returns the weighted set covering model to which the state applies.
bool Select(SubsetIndex subset, ConsistencyLevel consistency)
const ElementToIntVector & coverage() const
Returns vector containing number of subsets covering each element.
BaseInt ComputeNumFreeElements(SubsetIndex subset) const
Computes the number of free (uncovered) elements in the given subset.
Cost cost() const
Returns the cost of current solution.
BaseInt num_uncovered_elements() const
Returns the number of uncovered elements.
util_intops::StrongIntRange< ElementIndex > ElementRange() const
const SparseRowView & rows() const
Row view of the set covering problem.
const SubsetCostVector & subset_costs() const
Vector of costs for each subset.
const SparseColumnView & columns() const
Column view of the set covering problem.
std::vector< SubsetIndex > all_subsets() const
Returns the list of indices for all the subsets in the model.
SetCoverInvariant::ConsistencyLevel consistency_level_
The consistency needed by the solution generator.
int64_t max_iterations() const
Returns the maximum number of iterations.
absl::Duration run_time_
run_time_ is an abstract duration for the time spent in NextSolution().
Cost cost() const
Returns the current cost of the solution in the invariant.
bool NextSolution() final
Virtual methods that must be implemented by derived classes.
bool NextSolution() final
Virtual methods that must be implemented by derived classes.
constexpr Fractional kInfinity
Infinity for type Fractional.
Definition lp_types.h:87
End of the interface. Below is the implementation.
dual_gradient T(y - `dual_solution`) class DiagonalTrustRegionProblemFromQp
In SWIG mode, we don't want anything besides these top-level includes.
constexpr SubsetIndex kNotFound(-1)
static constexpr double kInfinity
util_intops::StrongVector< SubsetIndex, Cost > SubsetCostVector
Definition base_types.h:58
SetCoverInvariant::ConsistencyLevel CL
std::vector< SubsetIndex > ClearRandomSubsets(BaseInt num_subsets, SetCoverInvariant *inv)
The consistency level is maintained up to kCostAndCoverage.
util_intops::StrongVector< SubsetIndex, BaseInt > SubsetToIntVector
Definition base_types.h:65
static constexpr Cost kMaxPossibleCost
std::vector< SubsetIndex > ClearMostCoveredElements(BaseInt max_num_subsets, SetCoverInvariant *inv)
The consistency level is maintained up to kCostAndCoverage.
util_intops::StrongVector< ElementIndex, BaseInt > ElementToIntVector
Definition base_types.h:64
util_intops::StrongVector< SubsetIndex, bool > SubsetBoolVector
Definition base_types.h:71
void RadixSort(absl::Span< T > values, int num_bits=sizeof(T) *8)
Definition radix_sort.h:270
util_intops::StrongVector< SubsetIndex, SparseColumn > SparseColumnView
Views of the sparse vectors.
Definition base_types.h:68
double Cost
Basic non-strict type for cost. The speed penalty for using double is ~2%.
Definition base_types.h:32
util_intops::StrongVector< ElementIndex, SparseRow > SparseRowView
Definition base_types.h:69
trees with all degrees equal w the current value of int max_iterations