Google OR-Tools v9.12
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
set_cover_heuristics.cc
Go to the documentation of this file.
1// Copyright 2010-2025 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
15
16#include <algorithm>
17#include <climits>
18#include <cstdint>
19#include <limits>
20#include <numeric>
21#include <utility>
22#include <vector>
23
24#include "absl/base/casts.h"
25#include "absl/log/check.h"
26#include "absl/numeric/bits.h"
27#include "absl/random/distributions.h"
28#include "absl/random/random.h"
29#include "absl/types/span.h"
34
35namespace operations_research {
36
37constexpr SubsetIndex kNotFound(-1);
38static constexpr Cost kMaxPossibleCost = std::numeric_limits<Cost>::max();
39static constexpr double kInfinity = std::numeric_limits<float>::infinity();
40
41namespace {
42SubsetBoolVector MakeBoolVector(absl::Span<const SubsetIndex> focus,
43 SubsetIndex size) {
44 SubsetBoolVector result(SubsetIndex(size), false);
45 for (const SubsetIndex subset : focus) {
46 result[subset] = true;
47 }
48 return result;
49}
50} // anonymous namespace
51
53
54// TrivialSolutionGenerator.
55
57 return NextSolution(inv_->model()->all_subsets());
58}
59
61 absl::Span<const SubsetIndex> focus) {
62 const SubsetIndex num_subsets(inv_->model()->num_subsets());
63 SubsetBoolVector choices(num_subsets, false);
64 for (const SubsetIndex subset : focus) {
65 choices[subset] = true;
66 }
67 inv_->LoadSolution(choices);
68 inv_->Recompute(CL::kCostAndCoverage);
69 return true;
70}
71
72// RandomSolutionGenerator.
73
75 return NextSolution(inv_->model()->all_subsets());
76}
77
79 const std::vector<SubsetIndex>& focus) {
80 inv_->ClearTrace();
81 std::vector<SubsetIndex> shuffled = focus;
82 std::shuffle(shuffled.begin(), shuffled.end(), absl::BitGen());
83 for (const SubsetIndex subset : shuffled) {
84 if (inv_->is_selected()[subset]) continue;
85 if (inv_->num_free_elements()[subset] != 0) {
86 inv_->Select(subset, CL::kFreeAndUncovered);
87 }
88 }
89 inv_->CompressTrace();
90 DCHECK(inv_->CheckConsistency(CL::kFreeAndUncovered));
91 return true;
92}
93
94// GreedySolutionGenerator.
95
97 return NextSolution(inv_->model()->all_subsets(),
98 inv_->model()->subset_costs());
99}
100
102 absl::Span<const SubsetIndex> focus) {
103 return NextSolution(focus, inv_->model()->subset_costs());
104}
105
106bool GreedySolutionGenerator::NextSolution(absl::Span<const SubsetIndex> focus,
107 const SubsetCostVector& costs) {
108 DCHECK(inv_->CheckConsistency(CL::kCostAndCoverage));
109 inv_->Recompute(CL::kFreeAndUncovered);
110 inv_->ClearTrace();
111 std::vector<std::pair<float, SubsetIndex::ValueType>> subset_priorities;
112 DVLOG(1) << "focus.size(): " << focus.size();
113 subset_priorities.reserve(focus.size());
114 for (const SubsetIndex subset : focus) {
115 if (!inv_->is_selected()[subset] &&
116 inv_->num_free_elements()[subset] != 0) {
117 // NOMUTANTS -- reason, for C++
118 const float priority = inv_->num_free_elements()[subset] / costs[subset];
119 subset_priorities.push_back({priority, subset.value()});
120 }
121 }
122 // The priority queue maintains the maximum number of elements covered by unit
123 // of cost. We chose 16 as the arity of the heap after some testing.
124 // TODO(user): research more about the best value for Arity.
126 subset_priorities, inv_->model()->num_subsets());
127 while (!pq.IsEmpty()) {
128 const SubsetIndex best_subset(pq.TopIndex());
129 pq.Pop();
130 inv_->Select(best_subset, CL::kFreeAndUncovered);
131 // NOMUTANTS -- reason, for C++
132 if (inv_->num_uncovered_elements() == 0) break;
133 for (IntersectingSubsetsIterator it(*inv_->model(), best_subset);
134 !it.at_end(); ++it) {
135 const SubsetIndex subset = *it;
136 const BaseInt marginal_impact(inv_->num_free_elements()[subset]);
137 if (marginal_impact > 0) {
138 const float priority = marginal_impact / costs[subset];
139 pq.Update({priority, subset.value()});
140 } else {
141 pq.Remove(subset.value());
142 }
143 }
144 DVLOG(1) << "Cost = " << inv_->cost()
145 << " num_uncovered_elements = " << inv_->num_uncovered_elements();
146 }
147 inv_->CompressTrace();
148 // Don't expect pq to be empty, because of the break in the while loop.
149 DCHECK(inv_->CheckConsistency(CL::kFreeAndUncovered));
150 return true;
151}
152
153namespace {
154// This class gathers statistics about the usefulness of the ratio computation.
155class ComputationUsefulnessStats {
156 public:
157 // If is_active is true, the stats are gathered, otherwise there is no
158 // overhead, in particular no memory allocation.
159 explicit ComputationUsefulnessStats(const SetCoverInvariant* inv,
160 bool is_active)
161 : inv_(inv),
162 is_active_(is_active),
163 num_ratio_computations_(),
164 num_useless_computations_(),
166 if (is_active) {
167 BaseInt num_subsets = inv_->model()->num_subsets();
168 num_ratio_computations_.assign(num_subsets, 0);
169 num_useless_computations_.assign(num_subsets, 0);
170 num_free_elements_.assign(num_subsets, -1); // -1 means not computed yet.
171 }
172 }
173
174 // To be called each time a num_free_elements is computed.
175 void Update(SubsetIndex subset, BaseInt new_num_free_elements) {
176 if (is_active_) {
177 if (new_num_free_elements == num_free_elements_[subset]) {
178 ++num_useless_computations_[subset];
179 }
180 ++num_ratio_computations_[subset];
181 num_free_elements_[subset] = new_num_free_elements;
182 }
183 }
184
185 // To be called at the end of the algorithm.
186 void PrintStats() {
187 if (is_active_) {
188 BaseInt num_subsets_considered = 0;
189 BaseInt num_ratio_updates = 0;
190 BaseInt num_wasted_ratio_updates = 0;
191 for (const SubsetIndex subset : inv_->model()->SubsetRange()) {
192 if (num_ratio_computations_[subset] > 0) {
193 ++num_subsets_considered;
194 if (num_ratio_computations_[subset] > 1) {
195 num_ratio_updates += num_ratio_computations_[subset] - 1;
196 }
197 }
198 num_wasted_ratio_updates += num_useless_computations_[subset];
199 }
200 LOG(INFO) << "num_subsets_considered = " << num_subsets_considered;
201 LOG(INFO) << "num_ratio_updates = " << num_ratio_updates;
202 LOG(INFO) << "num_wasted_ratio_updates = " << num_wasted_ratio_updates;
203 }
204 }
205
206 private:
207 // The invariant on which the stats are performed.
208 const SetCoverInvariant* inv_;
209
210 // Whether the stats are active or not.
211 bool is_active_;
212
213 // Number of times the ratio was computed for a subset.
214 SubsetToIntVector num_ratio_computations_;
215
216 // Number of times the ratio was computed for a subset and was the same as the
217 // previous one.
218 SubsetToIntVector num_useless_computations_;
219
220 // The value num_free_elements_ for the subset the last time it was computed.
221 // Used to detect useless computations.
222 SubsetToIntVector num_free_elements_;
223};
224
225namespace {
226// Clearly not the fastest radix sort, but its complexity is the right one.
227// Furthermore:
228// - it is as memory-safe as std::vectors can be (no pointers),
229// - no multiplication is performed,
230// - it is stable
231// - it handles the cases of signed and unsigned integers automatically,
232// - bounds on the keys are optional, or they can be computed automatically,
233// - based on those bounds, the number of passes is automatically computed,
234// - a payload is associated to each key, and it is sorted in the same way
235// as the keys. This payload can be a vector of integers or a vector of
236// pointers to larger objects.
237// TODO(user): Make it an independent library.
238// - add support for decreasing counting sort,
239// - make payloads optional,
240// - support floats and doubles,
241// - improve performance.
242// - use vectorized code.
243namespace internal {
244uint32_t RawBits(uint32_t x) { return x; } // NOLINT
245uint32_t RawBits(int x) { return absl::bit_cast<uint32_t>(x); } // NOLINT
246uint32_t RawBits(float x) { return absl::bit_cast<uint32_t>(x); } // NOLINT
247uint64_t RawBits(uint64_t x) { return x; } // NOLINT
248uint64_t RawBits(int64_t x) { return absl::bit_cast<uint64_t>(x); } // NOLINT
249uint64_t RawBits(double x) { return absl::bit_cast<uint64_t>(x); } // NOLINT
250
251inline uint32_t Bucket(uint32_t x, uint32_t shift, uint32_t radix) {
252 DCHECK_EQ(0, radix & (radix - 1)); // Must be a power of two.
253 // NOMUTANTS -- a way to compute the remainder of a division when radix is a
254 // power of two.
255 return (RawBits(x) >> shift) & (radix - 1);
256}
257
258template <typename T>
259int NumBitsToRepresent(T value) {
260 DCHECK_LE(absl::countl_zero(RawBits(value)), sizeof(T) * CHAR_BIT);
261 return sizeof(T) * CHAR_BIT - absl::countl_zero(RawBits(value));
262}
263
264template <typename Key, typename Counter>
265void UpdateCounters(uint32_t radix, int shift, std::vector<Key>& keys,
266 std::vector<Counter>& counts) {
267 std::fill(counts.begin(), counts.end(), 0);
268 DCHECK_EQ(counts[0], 0);
269 DCHECK_EQ(0, radix & (radix - 1)); // Must be a power of two.
270 const auto num_keys = keys.size();
271 for (int64_t i = 0; i < num_keys; ++i) {
272 ++counts[Bucket(keys[i], shift, radix)];
273 }
274 // Now the counts will contain the sum of the sizes below and including each
275 // bucket.
276 for (uint64_t i = 1; i < radix; ++i) {
277 counts[i] += counts[i - 1];
278 }
279}
280
281template <typename Key, typename Payload, typename Counter>
282void IncreasingCountingSort(uint32_t radix, int shift, std::vector<Key>& keys,
283 std::vector<Payload>& payloads,
284 std::vector<Key>& scratch_keys,
285 std::vector<Payload>& scratch_payloads,
286 std::vector<Counter>& counts) {
287 DCHECK_EQ(0, radix & (radix - 1)); // Must be a power of two.
288 UpdateCounters(radix, shift, keys, counts);
289 const auto num_keys = keys.size();
290 // In this order for stability.
291 for (int64_t i = num_keys - 1; i >= 0; --i) {
292 Counter c = --counts[Bucket(keys[i], shift, radix)];
293 scratch_keys[c] = keys[i];
294 scratch_payloads[c] = payloads[i];
295 }
296 std::swap(keys, scratch_keys);
297 std::swap(payloads, scratch_payloads);
298}
299} // namespace internal
300
301template <typename Key, typename Payload>
302void RadixSort(int radix_log, std::vector<Key>& keys,
303 std::vector<Payload>& payloads, Key /*min_key*/, Key max_key) {
304 // range_log is the number of bits necessary to represent the max_key
305 // We could as well use max_key - min_key, but it is more expensive to
306 // compute.
307 const int range_log = internal::NumBitsToRepresent(max_key);
308 DCHECK_EQ(internal::NumBitsToRepresent(0), 0);
309 DCHECK_LE(internal::NumBitsToRepresent(std::numeric_limits<Key>::max()),
310 std::numeric_limits<Key>::digits);
311 const int radix = 1 << radix_log; // By definition.
312 std::vector<uint32_t> counters(radix, 0);
313 std::vector<Key> scratch_keys(keys.size());
314 std::vector<Payload> scratch_payloads(payloads.size());
315 for (int shift = 0; shift < range_log; shift += radix_log) {
316 DCHECK_LE(1 << shift, max_key);
317 internal::IncreasingCountingSort(radix, shift, keys, payloads, scratch_keys,
318 scratch_payloads, counters);
319 }
320}
321} // namespace
322
323std::vector<ElementIndex> GetUncoveredElementsSortedByDegree(
324 const SetCoverInvariant* const inv) {
325 const BaseInt num_elements = inv->model()->num_elements();
326 std::vector<ElementIndex> degree_sorted_elements; // payloads
327 degree_sorted_elements.reserve(num_elements);
328 std::vector<BaseInt> keys;
329 keys.reserve(num_elements);
330 const SparseRowView& rows = inv->model()->rows();
331 BaseInt max_degree = 0;
332 for (const ElementIndex element : inv->model()->ElementRange()) {
333 // Already covered elements should not be considered.
334 if (inv->coverage()[element] != 0) continue;
335 degree_sorted_elements.push_back(element);
336 const BaseInt size = rows[element].size();
337 max_degree = std::max(max_degree, size);
338 keys.push_back(size);
339 }
340 RadixSort(11, keys, degree_sorted_elements, 1, max_degree);
341#ifndef NDEBUG
342 BaseInt prev_key = -1;
343 for (const auto key : keys) {
344 DCHECK_LE(prev_key, key);
345 prev_key = key;
346 }
347#endif
348 return degree_sorted_elements;
349}
350
351// Computes: d = c1 * n2 - c2 * n1. This is an easy way to compare two ratios
352// without having to use a full division.
353// If d < 0 then c1 / n1 < c2 / n2,
354// If d == 0 then c1 / n1 == c2 / n2, etc...
355// NOTE(user): This can be implemented using SSE2 with a gain of 5-10%.
356double Determinant(Cost c1, BaseInt n1, Cost c2, BaseInt n2) {
357 return c1 * n2 - n1 * c2;
358}
359} // namespace
360
361// ElementDegreeSolutionGenerator.
362// There is no need to use a priority queue here, as the ratios are computed
363// on-demand. Also elements are sorted based on degree once and for all and
364// moved past when the elements become already covered.
366 const SubsetIndex num_subsets(inv_->model()->num_subsets());
367 const SubsetBoolVector in_focus(num_subsets, true);
368 return NextSolution(in_focus, inv_->model()->subset_costs());
369}
370
372 absl::Span<const SubsetIndex> focus) {
373 const SubsetIndex num_subsets(inv_->model()->num_subsets());
374 const SubsetBoolVector in_focus = MakeBoolVector(focus, num_subsets);
375 return NextSolution(in_focus, inv_->model()->subset_costs());
376}
377
379 absl::Span<const SubsetIndex> focus, const SubsetCostVector& costs) {
380 const SubsetIndex num_subsets(inv_->model()->num_subsets());
381 const SubsetBoolVector in_focus = MakeBoolVector(focus, num_subsets);
382 return NextSolution(in_focus, costs);
383}
384
386 const SubsetBoolVector& in_focus, const SubsetCostVector& costs) {
387 DVLOG(1) << "Entering ElementDegreeSolutionGenerator::NextSolution";
388 inv_->Recompute(CL::kFreeAndUncovered);
389 // Create the list of all the indices in the problem.
390 std::vector<ElementIndex> degree_sorted_elements =
391 GetUncoveredElementsSortedByDegree(inv_);
392 ComputationUsefulnessStats stats(inv_, false);
393 const SparseRowView& rows = inv_->model()->rows();
394 for (const ElementIndex element : degree_sorted_elements) {
395 // No need to cover an element that is already covered.
396 if (inv_->coverage()[element] != 0) continue;
397 SubsetIndex best_subset(-1);
398 Cost best_subset_cost = 0.0;
399 BaseInt best_subset_num_free_elts = 0;
400 for (const SubsetIndex subset : rows[element]) {
401 if (!in_focus[subset]) continue;
402 const BaseInt num_free_elements = inv_->num_free_elements()[subset];
403 stats.Update(subset, num_free_elements);
404 const Cost det = Determinant(costs[subset], num_free_elements,
405 best_subset_cost, best_subset_num_free_elts);
406 // Compare R = costs[subset] / num_free_elements with
407 // B = best_subset_cost / best_subset_num_free_elts.
408 // If R < B, we choose subset.
409 // If the ratios are the same, we choose the subset with the most free
410 // elements.
411 // TODO(user): What about adding a tolerance for equality, which could
412 // further favor larger columns?
413 if (det < 0 ||
414 (det == 0 && num_free_elements > best_subset_num_free_elts)) {
415 best_subset = subset;
416 best_subset_cost = costs[subset];
417 best_subset_num_free_elts = num_free_elements;
418 }
419 }
420 if (best_subset.value() == -1) {
421 LOG(WARNING) << "Best subset not found. Algorithmic error or invalid "
422 "input.";
423 continue;
424 }
425 DCHECK_NE(best_subset.value(), -1);
426 inv_->Select(best_subset, CL::kFreeAndUncovered);
427 DVLOG(1) << "Cost = " << inv_->cost()
428 << " num_uncovered_elements = " << inv_->num_uncovered_elements();
429 }
430 inv_->CompressTrace();
431 stats.PrintStats();
432 DCHECK(inv_->CheckConsistency(CL::kFreeAndUncovered));
433 return true;
434}
435
436// LazyElementDegreeSolutionGenerator.
437// There is no need to use a priority queue here, as the ratios are computed
438// on-demand. Also elements are sorted based on degree once and for all and
439// moved past when the elements become already covered.
441 const SubsetIndex num_subsets(inv_->model()->num_subsets());
442 const SubsetBoolVector in_focus(num_subsets, true);
443 return NextSolution(in_focus, inv_->model()->subset_costs());
444}
445
447 absl::Span<const SubsetIndex> focus) {
448 const SubsetIndex num_subsets(inv_->model()->num_subsets());
449 const SubsetBoolVector in_focus = MakeBoolVector(focus, num_subsets);
450 return NextSolution(in_focus, inv_->model()->subset_costs());
451}
452
454 absl::Span<const SubsetIndex> focus, const SubsetCostVector& costs) {
455 const SubsetIndex num_subsets(inv_->model()->num_subsets());
456 const SubsetBoolVector in_focus = MakeBoolVector(focus, num_subsets);
457 return NextSolution(in_focus, costs);
458}
459
461 const SubsetBoolVector& in_focus, const SubsetCostVector& costs) {
462 DVLOG(1) << "Entering LazyElementDegreeSolutionGenerator::NextSolution";
463 DCHECK(inv_->CheckConsistency(CL::kCostAndCoverage));
464 // Create the list of all the indices in the problem.
465 std::vector<ElementIndex> degree_sorted_elements =
466 GetUncoveredElementsSortedByDegree(inv_);
467 const SparseRowView& rows = inv_->model()->rows();
468 const SparseColumnView& columns = inv_->model()->columns();
469 ComputationUsefulnessStats stats(inv_, false);
470 for (const ElementIndex element : degree_sorted_elements) {
471 // No need to cover an element that is already covered.
472 if (inv_->coverage()[element] != 0) continue;
473 SubsetIndex best_subset(-1);
474 Cost best_subset_cost = 0.0; // Cost of the best subset.
475 BaseInt best_subset_num_free_elts = 0;
476 for (const SubsetIndex subset : rows[element]) {
477 if (!in_focus[subset]) continue;
478 const Cost filtering_det =
479 Determinant(costs[subset], columns[subset].size(), best_subset_cost,
480 best_subset_num_free_elts);
481 // If the ratio with the initial number elements is greater, we skip this
482 // subset.
483 if (filtering_det > 0) continue;
484 const BaseInt num_free_elements = inv_->ComputeNumFreeElements(subset);
485 stats.Update(subset, num_free_elements);
486 const Cost det = Determinant(costs[subset], num_free_elements,
487 best_subset_cost, best_subset_num_free_elts);
488 // Same as ElementDegreeSolutionGenerator.
489 if (det < 0 ||
490 (det == 0 && num_free_elements > best_subset_num_free_elts)) {
491 best_subset = subset;
492 best_subset_cost = costs[subset];
493 best_subset_num_free_elts = num_free_elements;
494 }
495 }
496 DCHECK_NE(best_subset, SubsetIndex(-1));
497 inv_->Select(best_subset, CL::kCostAndCoverage);
498 DVLOG(1) << "Cost = " << inv_->cost()
499 << " num_uncovered_elements = " << inv_->num_uncovered_elements();
500 }
501 inv_->CompressTrace();
502 DCHECK(inv_->CheckConsistency(CL::kCostAndCoverage));
503 stats.PrintStats();
504 return true;
505}
506
507// SteepestSearch.
508
509void SteepestSearch::UpdatePriorities(absl::Span<const SubsetIndex>) {}
510
511bool SteepestSearch::NextSolution(int num_iterations) {
512 const SubsetIndex num_subsets(inv_->model()->num_subsets());
513 const SubsetBoolVector in_focus(num_subsets, true);
514 return NextSolution(in_focus, inv_->model()->subset_costs(), num_iterations);
515}
516
517bool SteepestSearch::NextSolution(absl::Span<const SubsetIndex> focus,
518 int num_iterations) {
519 const SubsetIndex num_subsets(inv_->model()->num_subsets());
520 const SubsetBoolVector in_focus = MakeBoolVector(focus, num_subsets);
521 return NextSolution(focus, inv_->model()->subset_costs(), num_iterations);
522}
523
524bool SteepestSearch::NextSolution(absl::Span<const SubsetIndex> focus,
525 const SubsetCostVector& costs,
526 int num_iterations) {
527 const SubsetIndex num_subsets(inv_->model()->num_subsets());
528 const SubsetBoolVector in_focus = MakeBoolVector(focus, num_subsets);
529 return NextSolution(in_focus, costs, num_iterations);
530}
531
533 const SubsetCostVector& costs,
534 int num_iterations) {
535 DCHECK(inv_->CheckConsistency(CL::kCostAndCoverage));
536 inv_->Recompute(CL::kFreeAndUncovered);
537 DVLOG(1) << "Entering SteepestSearch::NextSolution, num_iterations = "
538 << num_iterations;
539 // Return false if inv_ contains no solution.
540 // TODO(user): This should be relaxed for partial solutions.
541 if (inv_->num_uncovered_elements() != 0) {
542 return false;
543 }
544
545 // Create priority queue with cost of using a subset, by decreasing order.
546 // Do it only for selected AND removable subsets.
547 std::vector<std::pair<float, SubsetIndex::ValueType>> subset_priorities;
548 subset_priorities.reserve(in_focus.size());
549 for (const SetCoverDecision& decision : inv_->trace()) {
550 const SubsetIndex subset = decision.subset();
551 if (in_focus[subset] && inv_->is_selected()[subset] &&
552 inv_->ComputeIsRedundant(subset)) {
553 const float delta_per_element = costs[subset];
554 subset_priorities.push_back({delta_per_element, subset.value()});
555 }
556 }
557 DVLOG(1) << "subset_priorities.size(): " << subset_priorities.size();
558 AdjustableKAryHeap<float, SubsetIndex::ValueType, 16, true> pq(
559 subset_priorities, inv_->model()->num_subsets());
560 for (int iteration = 0; iteration < num_iterations && !pq.IsEmpty();
561 ++iteration) {
562 const SubsetIndex best_subset(pq.TopIndex());
563 pq.Pop();
564 DCHECK(inv_->is_selected()[best_subset]);
565 DCHECK(inv_->ComputeIsRedundant(best_subset));
566 DCHECK_GT(costs[best_subset], 0.0);
567 inv_->Deselect(best_subset, CL::kFreeAndUncovered);
568
569 for (IntersectingSubsetsIterator it(*inv_->model(), best_subset);
570 !it.at_end(); ++it) {
571 const SubsetIndex subset = *it;
572 if (!inv_->ComputeIsRedundant(subset)) {
573 pq.Remove(subset.value());
574 }
575 }
576 DVLOG(1) << "Cost = " << inv_->cost();
577 }
578 inv_->CompressTrace();
579 // TODO(user): change this to enable working on partial solutions.
580 DCHECK_EQ(inv_->num_uncovered_elements(), 0);
581 DCHECK(inv_->CheckConsistency(CL::kFreeAndUncovered));
582 return true;
583}
584
585// Guided Tabu Search
586
588 const SubsetIndex num_subsets(inv_->model()->num_subsets());
589 const SubsetCostVector& subset_costs = inv_->model()->subset_costs();
590 times_penalized_.assign(num_subsets.value(), 0);
591 augmented_costs_ = subset_costs;
592 utilities_ = subset_costs;
593}
594
595namespace {
596bool FlipCoin() {
597 // TODO(user): use STL for repeatable testing.
598 return absl::Bernoulli(absl::BitGen(), 0.5);
599}
600} // namespace
601
602void GuidedTabuSearch::UpdatePenalties(absl::Span<const SubsetIndex> focus) {
603 const SubsetCostVector& subset_costs = inv_->model()->subset_costs();
604 Cost max_utility = -1.0;
605 for (const SubsetIndex subset : focus) {
606 if (inv_->is_selected()[subset]) {
607 max_utility = std::max(max_utility, utilities_[subset]);
608 }
609 }
610 const double epsilon_utility = epsilon_ * max_utility;
611 for (const SubsetIndex subset : focus) {
612 if (inv_->is_selected()[subset]) {
613 const double utility = utilities_[subset];
614 if ((max_utility - utility <= epsilon_utility) && FlipCoin()) {
615 ++times_penalized_[subset];
616 const int times_penalized = times_penalized_[subset];
617 const Cost cost =
618 subset_costs[subset]; // / columns[subset].size().value();
619 utilities_[subset] = cost / (1 + times_penalized);
620 augmented_costs_[subset] =
621 cost * (1 + penalty_factor_ * times_penalized);
622 }
623 }
624 }
625}
626
627bool GuidedTabuSearch::NextSolution(int num_iterations) {
628 return NextSolution(inv_->model()->all_subsets(), num_iterations);
629}
630
631bool GuidedTabuSearch::NextSolution(absl::Span<const SubsetIndex> focus,
632 int num_iterations) {
633 DCHECK(inv_->CheckConsistency(CL::kFreeAndUncovered));
634 DVLOG(1) << "Entering GuidedTabuSearch::NextSolution, num_iterations = "
635 << num_iterations;
636 const SubsetCostVector& subset_costs = inv_->model()->subset_costs();
637 Cost best_cost = inv_->cost();
638 SubsetBoolVector best_choices = inv_->is_selected();
639 Cost augmented_cost =
640 std::accumulate(augmented_costs_.begin(), augmented_costs_.end(), 0.0);
641 BaseInt trace_size = inv_->trace().size();
642 for (int iteration = 0; iteration < num_iterations; ++iteration) {
643 if (inv_->trace().size() > 2 * trace_size) {
644 inv_->CompressTrace();
645 trace_size = inv_->trace().size();
646 }
647 Cost best_delta = kMaxPossibleCost;
648 SubsetIndex best_subset = kNotFound;
649 for (const SubsetIndex subset : focus) {
650 const Cost delta = augmented_costs_[subset];
651 DVLOG(1) << "Subset, " << subset.value() << ", at ,"
652 << inv_->is_selected()[subset] << ", delta =, " << delta
653 << ", best_delta =, " << best_delta;
654 if (inv_->is_selected()[subset]) {
655 // Try to remove subset from solution, if the gain from removing is
656 // worth it:
657 if (-delta < best_delta &&
658 // and it can be removed, and
659 inv_->ComputeIsRedundant(subset) &&
660 // it is not Tabu OR decreases the actual cost (aspiration):
661 (!tabu_list_.Contains(subset) ||
662 inv_->cost() - subset_costs[subset] < best_cost)) {
663 best_delta = -delta;
664 best_subset = subset;
665 }
666 } else {
667 // Try to use subset in solution, if its penalized delta is good.
668 if (delta < best_delta) {
669 // The limit kMaxPossibleCost is ill-defined,
670 // there is always a best_subset. Is it intended?
671 if (!tabu_list_.Contains(subset)) {
672 best_delta = delta;
673 best_subset = subset;
674 }
675 }
676 }
677 }
678 if (best_subset == kNotFound) { // Local minimum reached.
679 inv_->LoadSolution(best_choices);
680 return true;
681 }
682 DVLOG(1) << "Best subset, " << best_subset.value() << ", at ,"
683 << inv_->is_selected()[best_subset] << ", best_delta = ,"
684 << best_delta;
685
686 UpdatePenalties(focus);
687 tabu_list_.Add(best_subset);
688 inv_->Flip(best_subset, CL::kFreeAndUncovered);
689 // TODO(user): make the cost computation incremental.
690 augmented_cost =
691 std::accumulate(augmented_costs_.begin(), augmented_costs_.end(), 0.0);
692
693 DVLOG(1) << "Iteration, " << iteration << ", current cost = ,"
694 << inv_->cost() << ", best cost = ," << best_cost
695 << ", penalized cost = ," << augmented_cost;
696 if (inv_->cost() < best_cost) {
697 LOG(INFO) << "Updated best cost, " << "Iteration, " << iteration
698 << ", current cost = ," << inv_->cost() << ", best cost = ,"
699 << best_cost << ", penalized cost = ," << augmented_cost;
700 best_cost = inv_->cost();
701 best_choices = inv_->is_selected();
702 }
703 }
704 inv_->LoadSolution(best_choices);
705 inv_->CompressTrace();
706 DCHECK(inv_->CheckConsistency(CL::kFreeAndUncovered));
707 return true;
708}
709
710// Guided Local Search
712 const SparseColumnView& columns = inv_->model()->columns();
713 penalties_.assign(columns.size(), 0);
714 penalization_factor_ = alpha_ * inv_->cost() * 1.0 / (columns.size());
715 for (const SetCoverDecision& decision : inv_->trace()) {
716 const SubsetIndex subset = decision.subset();
717 if (inv_->is_selected()[subset]) {
718 utility_heap_.Insert(
719 {static_cast<float>(inv_->model()->subset_costs()[subset] /
720 (1 + penalties_[subset])),
721 subset.value()});
722 }
723 }
724}
725
726bool GuidedLocalSearch::NextSolution(int num_iterations) {
727 return NextSolution(inv_->model()->all_subsets(), num_iterations);
728}
729
730Cost GuidedLocalSearch::ComputeDelta(SubsetIndex subset) const {
731 const float delta = (penalization_factor_ * penalties_[subset] +
732 inv_->model()->subset_costs()[subset]);
733 if (inv_->is_selected()[subset] && inv_->ComputeIsRedundant(subset)) {
734 return delta;
735 } else if (!inv_->is_selected()[subset]) {
736 return -delta;
737 }
738 return kInfinity;
739}
740
741bool GuidedLocalSearch::NextSolution(absl::Span<const SubsetIndex> focus,
742 int num_iterations) {
743 inv_->Recompute(CL::kRedundancy);
744 Cost best_cost = inv_->cost();
745 SubsetBoolVector best_choices = inv_->is_selected();
746
747 for (const SubsetIndex& subset : focus) {
748 const float delta = ComputeDelta(subset);
749 if (delta < kInfinity) {
750 priority_heap_.Insert({delta, subset.value()});
751 }
752 }
753
754 for (int iteration = 0;
755 !priority_heap_.IsEmpty() && iteration < num_iterations; ++iteration) {
756 // Improve current solution respective to the current penalties.
757 const SubsetIndex best_subset(priority_heap_.TopIndex());
758 if (inv_->is_selected()[best_subset]) {
759 utility_heap_.Insert({0, best_subset.value()});
760 } else {
761 utility_heap_.Insert(
762 {static_cast<float>(inv_->model()->subset_costs()[best_subset] /
763 (1 + penalties_[best_subset])),
764 best_subset.value()});
765 }
766 inv_->Flip(best_subset, CL::kRedundancy); // Flip the best subset.
767 DCHECK(!utility_heap_.IsEmpty());
768
769 // Getting the subset with highest utility. utility_heap_ is not empty,
770 // because we just inserted a pair.
771 const SubsetIndex penalized_subset(utility_heap_.TopIndex());
772 utility_heap_.Pop();
773 ++penalties_[penalized_subset];
774 utility_heap_.Insert(
775 {static_cast<float>(inv_->model()->subset_costs()[penalized_subset] /
776 (1 + penalties_[penalized_subset])),
777 penalized_subset.value()});
778 DCHECK(!utility_heap_.IsEmpty());
779
780 // Get removable subsets (Add them to the heap).
781 for (const SubsetIndex subset : inv_->newly_removable_subsets()) {
782 const float delta_selected = (penalization_factor_ * penalties_[subset] +
783 inv_->model()->subset_costs()[subset]);
784 priority_heap_.Insert({delta_selected, subset.value()});
785 }
786 DCHECK(!priority_heap_.IsEmpty());
787
788 for (const SubsetIndex subset : {penalized_subset, best_subset}) {
789 const float delta = ComputeDelta(subset);
790 if (delta < kInfinity) {
791 priority_heap_.Insert({delta, subset.value()});
792 }
793 }
794 DCHECK(!priority_heap_.IsEmpty());
795
796 // Get new non removable subsets and remove them from the heap.
797 // This is when the priority_heap_ can become empty and end the outer loop
798 // early.
799 for (const SubsetIndex subset : inv_->newly_non_removable_subsets()) {
800 priority_heap_.Remove(subset.value());
801 }
802
803 if (inv_->cost() < best_cost) {
804 best_cost = inv_->cost();
805 best_choices = inv_->is_selected();
806 }
807 }
808 inv_->LoadSolution(best_choices);
809
810 // Improve the solution by removing redundant subsets.
811 for (const SubsetIndex& subset : focus) {
812 if (inv_->is_selected()[subset] && inv_->ComputeIsRedundant(subset))
813 inv_->Deselect(subset, CL::kRedundancy);
814 }
815 DCHECK_EQ(inv_->num_uncovered_elements(), 0);
816 return true;
817}
818
819namespace {
820void SampleSubsets(std::vector<SubsetIndex>* list, BaseInt num_subsets) {
821 num_subsets = std::min<BaseInt>(num_subsets, list->size());
822 CHECK_GE(num_subsets, 0);
823 std::shuffle(list->begin(), list->end(), absl::BitGen());
824 list->resize(num_subsets);
825}
826} // namespace
827
828std::vector<SubsetIndex> ClearRandomSubsets(BaseInt num_subsets,
829 SetCoverInvariant* inv) {
830 return ClearRandomSubsets(inv->model()->all_subsets(), num_subsets, inv);
831}
832
833std::vector<SubsetIndex> ClearRandomSubsets(absl::Span<const SubsetIndex> focus,
834 BaseInt num_subsets,
835 SetCoverInvariant* inv) {
836 num_subsets = std::min<BaseInt>(num_subsets, focus.size());
837 CHECK_GE(num_subsets, 0);
838 std::vector<SubsetIndex> chosen_indices;
839 for (const SubsetIndex subset : focus) {
840 if (inv->is_selected()[subset]) {
841 chosen_indices.push_back(subset);
842 }
843 }
844 SampleSubsets(&chosen_indices, num_subsets);
845 BaseInt num_deselected = 0;
846 for (const SubsetIndex subset : chosen_indices) {
847 inv->Deselect(subset, CL::kCostAndCoverage);
848 ++num_deselected;
849 for (IntersectingSubsetsIterator it(*inv->model(), subset); !it.at_end();
850 ++it) {
851 if (!inv->is_selected()[subset]) continue;
852 inv->Deselect(subset, CL::kCostAndCoverage);
853 ++num_deselected;
854 }
855 // Note that num_deselected may exceed num_subsets by more than 1.
856 if (num_deselected > num_subsets) break;
857 }
858 return chosen_indices;
859}
860
861std::vector<SubsetIndex> ClearMostCoveredElements(BaseInt max_num_subsets,
862 SetCoverInvariant* inv) {
863 return ClearMostCoveredElements(inv->model()->all_subsets(), max_num_subsets,
864 inv);
865}
866
867std::vector<SubsetIndex> ClearMostCoveredElements(
868 absl::Span<const SubsetIndex> focus, BaseInt max_num_subsets,
869 SetCoverInvariant* inv) {
870 // This is the vector we will return.
871 std::vector<SubsetIndex> sampled_subsets;
872
873 const ElementToIntVector& coverage = inv->coverage();
874 const BaseInt num_subsets = inv->model()->num_subsets();
875 const SparseRowView& rows = inv->model()->rows();
876
877 // Collect the sets which have at least one element whose coverage > 1,
878 // even if those sets are not removable.
879 SubsetBoolVector subset_is_collected(num_subsets, false);
880 for (const ElementIndex element : inv->model()->ElementRange()) {
881 if (coverage[element] <= 1) continue;
882 for (const SubsetIndex subset : rows[element]) {
883 if (inv->is_selected()[subset] && !subset_is_collected[subset]) {
884 subset_is_collected[subset] = true;
885 }
886 }
887 }
888
889 // Now intersect with focus: sampled_subsets = focus ⋂ impacted_subsets.
890 // NOTE(user): this might take too long. TODO(user):find another algorithm if
891 // necessary.
892 for (const SubsetIndex subset : focus) {
893 if (subset_is_collected[subset]) {
894 sampled_subsets.push_back(subset);
895 }
896 }
897
898 // Actually *sample* sampled_subset.
899 // TODO(user): find another algorithm if necessary.
900 std::shuffle(sampled_subsets.begin(), sampled_subsets.end(), absl::BitGen());
901 sampled_subsets.resize(
902 std::min<BaseInt>(sampled_subsets.size(), max_num_subsets));
903
904 // Testing has shown that sorting sampled_subsets is not necessary.
905 // Now, un-select the subset in sampled_subsets.
906 for (const SubsetIndex subset : sampled_subsets) {
907 inv->Deselect(subset, CL::kCostAndCoverage);
908 }
909 return sampled_subsets;
910}
911
912} // namespace operations_research
void Update(Aggregate element)
Change the value of an element.
bool IsEmpty() const
True iff the heap is empty.
void Initialize()
Initializes the Guided Local Search algorithm.
void Initialize()
Initializes the Guided Tabu Search algorithm.
bool at_end() const
Returns (true) whether the iterator is at the end.
bool NextSolution()
Returns true if a solution was found.
A helper class used to store the decisions made during a search.
bool ComputeIsRedundant(SubsetIndex subset) const
void Recompute(ConsistencyLevel target_consistency)
Recomputes the invariant to the given consistency level.
const SubsetToIntVector & num_free_elements() const
const std::vector< SetCoverDecision > & trace() const
Returns the vector of the decisions which have led to the current solution.
bool CheckConsistency(ConsistencyLevel consistency) const
Checks the consistency of the invariant at the given consistency level.
const SubsetBoolVector & is_selected() const
Returns the subset assignment vector.
SetCoverModel * model() const
Returns the weighted set covering model to which the state applies.
const ElementToIntVector & coverage() const
Returns vector containing number of subsets covering each element.
BaseInt ComputeNumFreeElements(SubsetIndex subset) const
Computes the number of free (uncovered) elements in the given subset.
void Deselect(SubsetIndex subset, ConsistencyLevel consistency)
BaseInt num_uncovered_elements() const
Returns the number of uncovered elements.
util_intops::StrongIntRange< ElementIndex > ElementRange() const
const SparseRowView & rows() const
Row view of the set covering problem.
const SparseColumnView & columns() const
Column view of the set covering problem.
std::vector< SubsetIndex > all_subsets() const
Returns the list of indices for all the subsets in the model.
void assign(size_type n, const value_type &val)
In SWIG mode, we don't want anything besides these top-level includes.
constexpr SubsetIndex kNotFound(-1)
static constexpr double kInfinity
util_intops::StrongVector< SubsetIndex, Cost > SubsetCostVector
SetCoverInvariant::ConsistencyLevel CL
std::vector< SubsetIndex > ClearRandomSubsets(BaseInt num_subsets, SetCoverInvariant *inv)
The consistency level is maintained up to kCostAndCoverage.
util_intops::StrongVector< SubsetIndex, BaseInt > SubsetToIntVector
static constexpr Cost kMaxPossibleCost
std::vector< SubsetIndex > ClearMostCoveredElements(BaseInt max_num_subsets, SetCoverInvariant *inv)
The consistency level is maintained up to kCostAndCoverage.
util_intops::StrongVector< ElementIndex, BaseInt > ElementToIntVector
util_intops::StrongVector< SubsetIndex, bool > SubsetBoolVector
util_intops::StrongVector< SubsetIndex, SparseColumn > SparseColumnView
double Cost
Basic non-strict type for cost. The speed penalty for using double is ~2%.
void RadixSort(absl::Span< T > values)
Definition radix_sort.h:247
util_intops::StrongVector< ElementIndex, SparseRow > SparseRowView