Google OR-Tools v9.15
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
set_cover_heuristics.cc
Go to the documentation of this file.
1// Copyright 2010-2025 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
15
16#include <algorithm>
17#include <climits>
18#include <cstdint>
19#include <limits>
20#include <numeric>
21#include <utility>
22#include <vector>
23
24#include "absl/base/casts.h"
25#include "absl/log/check.h"
26#include "absl/log/log.h"
27#include "absl/numeric/bits.h"
28#include "absl/random/distributions.h"
29#include "absl/random/random.h"
30#include "absl/types/span.h"
36
37namespace operations_research {
38
39constexpr SubsetIndex kNotFound(-1);
40static constexpr Cost kMaxPossibleCost = std::numeric_limits<Cost>::max();
41static constexpr double kInfinity = std::numeric_limits<float>::infinity();
42
44
46 return inv_->CheckConsistency(consistency_level_);
47}
48
49// TrivialSolutionGenerator.
50
52 absl::Span<const SubsetIndex> focus) {
53 StopWatch stop_watch(&run_time_);
54 const SubsetIndex num_subsets(model()->num_subsets());
55 SubsetBoolVector choices(num_subsets, false);
56 for (const SubsetIndex subset : focus) {
57 choices[subset] = true;
58 }
59 inv()->LoadSolution(choices);
60 inv()->Recompute(CL::kCostAndCoverage);
61 return true;
62}
63
64// RandomSolutionGenerator.
65
67 absl::Span<const SubsetIndex> focus) {
68 StopWatch stop_watch(&run_time_);
69 inv()->ClearTrace();
70 std::vector<SubsetIndex> shuffled(focus.begin(), focus.end());
71 std::shuffle(shuffled.begin(), shuffled.end(), absl::BitGen());
72 for (const SubsetIndex subset : shuffled) {
73 if (inv()->is_selected()[subset]) continue;
74 if (inv()->num_free_elements()[subset] != 0) {
75 inv()->Select(subset, CL::kFreeAndUncovered);
76 }
77 }
78 inv()->CompressTrace();
79 DCHECK(inv()->CheckConsistency(CL::kFreeAndUncovered));
80 return true;
81}
82
83// GreedySolutionGenerator.
84
86 absl::Span<const SubsetIndex> focus) {
87 StopWatch stop_watch(&run_time_);
88 DCHECK(inv()->CheckConsistency(CL::kCostAndCoverage));
89 inv()->Recompute(CL::kFreeAndUncovered);
90 inv()->ClearTrace();
91 std::vector<std::pair<float, SubsetIndex::ValueType>> subset_priorities;
92 DVLOG(1) << "focus.size(): " << focus.size();
93 subset_priorities.reserve(focus.size());
94 const SubsetCostVector& costs = model()->subset_costs();
95 for (const SubsetIndex subset : focus) {
96 if (!inv()->is_selected()[subset] &&
97 inv()->num_free_elements()[subset] != 0) {
98 // NOMUTANTS -- reason, for C++
99 const float priority = inv()->num_free_elements()[subset] / costs[subset];
100 subset_priorities.push_back({priority, subset.value()});
101 }
102 }
104 const SparseColumnView& colunms = model()->columns();
105 const SparseRowView& rows = model()->rows();
106 // The priority queue maintains the maximum number of elements covered by unit
107 // of cost. We chose 16 as the arity of the heap after some testing.
108 // TODO(user): research more about the best value for Arity.
110 subset_priorities, num_subsets);
111 SubsetBoolVector subset_seen(num_subsets, false);
112 std::vector<SubsetIndex> subsets_to_remove;
113 subsets_to_remove.reserve(focus.size());
114 while (!pq.IsEmpty() || inv()->num_uncovered_elements() > 0) {
115 VLOG_EVERY_N_SEC(1, 5) << "Queue size: " << pq.heap_size()
116 << ", #uncovered elements: "
118 const SubsetIndex best_subset(pq.TopIndex());
119 pq.Pop();
120 inv()->Select(best_subset, CL::kFreeAndUncovered);
121 // NOMUTANTS -- reason, for C++
122 subset_seen[best_subset] = true;
123 subsets_to_remove.push_back(best_subset);
124 for (const ElementIndex element : colunms[best_subset]) {
125 for (const SubsetIndex subset : rows[element]) {
126 if (subset_seen[subset]) continue;
127 subset_seen[subset] = true;
128 const BaseInt marginal_impact(inv()->num_free_elements()[subset]);
129 if (marginal_impact > 0) {
130 const float priority = marginal_impact / costs[subset];
131 pq.Update({priority, subset.value()});
132 } else {
133 pq.Remove(subset.value());
134 }
135 subsets_to_remove.push_back(subset);
136 }
137 }
138 for (const SubsetIndex subset : subsets_to_remove) {
139 subset_seen[subset] = false;
140 }
141 subsets_to_remove.clear();
142 DVLOG(1) << "Cost = " << inv()->cost()
143 << " num_uncovered_elements = " << inv()->num_uncovered_elements();
144 }
145 inv()->CompressTrace();
146 // Don't expect pq to be empty, because of the break in the while loop.
147 DCHECK(inv()->CheckConsistency(CL::kFreeAndUncovered));
148 return true;
149}
150
151namespace {
152// This class gathers statistics about the usefulness of the ratio computation.
153class ComputationUsefulnessStats {
154 public:
155 // If is_active is true, the stats are gathered, otherwise there is no
156 // overhead, in particular no memory allocation.
157 explicit ComputationUsefulnessStats(const SetCoverInvariant* inv,
158 bool is_active)
159 : inv_(inv),
160 is_active_(is_active),
161 num_ratio_computations_(),
162 num_useless_computations_(),
164 if (is_active) {
165 BaseInt num_subsets = inv_->model()->num_subsets();
166 num_ratio_computations_.assign(num_subsets, 0);
167 num_useless_computations_.assign(num_subsets, 0);
168 num_free_elements_.assign(num_subsets, -1); // -1 means not computed yet.
169 }
170 }
171
172 // To be called each time a num_free_elements is computed.
173 void Update(SubsetIndex subset, BaseInt new_num_free_elements) {
174 if (is_active_) {
175 if (new_num_free_elements == num_free_elements_[subset]) {
176 ++num_useless_computations_[subset];
177 }
178 ++num_ratio_computations_[subset];
179 num_free_elements_[subset] = new_num_free_elements;
180 }
181 }
182
183 // To be called at the end of the algorithm.
184 void PrintStats() {
185 if (is_active_) {
186 BaseInt num_subsets_considered = 0;
187 BaseInt num_ratio_updates = 0;
188 BaseInt num_wasted_ratio_updates = 0;
189 for (const SubsetIndex subset : inv_->model()->SubsetRange()) {
190 if (num_ratio_computations_[subset] > 0) {
191 ++num_subsets_considered;
192 if (num_ratio_computations_[subset] > 1) {
193 num_ratio_updates += num_ratio_computations_[subset] - 1;
194 }
195 }
196 num_wasted_ratio_updates += num_useless_computations_[subset];
197 }
198 LOG(INFO) << "num_subsets_considered = " << num_subsets_considered;
199 LOG(INFO) << "num_ratio_updates = " << num_ratio_updates;
200 LOG(INFO) << "num_wasted_ratio_updates = " << num_wasted_ratio_updates;
201 }
202 }
203
204 private:
205 // The invariant on which the stats are performed.
206 const SetCoverInvariant* inv_;
207
208 // Whether the stats are active or not.
209 bool is_active_;
210
211 // Number of times the ratio was computed for a subset.
212 SubsetToIntVector num_ratio_computations_;
213
214 // Number of times the ratio was computed for a subset and was the same as the
215 // previous one.
216 SubsetToIntVector num_useless_computations_;
217
218 // The value num_free_elements_ for the subset the last time it was computed.
219 // Used to detect useless computations.
220 SubsetToIntVector num_free_elements_;
221};
222} // namespace
223
224namespace {
225// Clearly not the fastest radix sort, but its complexity is the right one.
226// Furthermore:
227// - it is as memory-safe as std::vectors can be (no pointers),
228// - no multiplication is performed,
229// - it is stable
230// - it handles the cases of signed and unsigned integers automatically,
231// - bounds on the keys are optional, or they can be computed automatically,
232// - based on those bounds, the number of passes is automatically computed,
233// - a payload is associated to each key, and it is sorted in the same way
234// as the keys. This payload can be a vector of integers or a vector of
235// pointers to larger objects.
236// TODO(user): Make it an independent library.
237// - add support for decreasing counting sort,
238// - make payloads optional,
239// - support floats and doubles,
240// - improve performance.
241// - use vectorized code.
242namespace internal {
243template <typename T>
244auto RawBits(T x) {
245 if constexpr (sizeof(T) == sizeof(uint32_t)) {
246 return absl::bit_cast<uint32_t>(x);
247 } else {
248 static_assert(sizeof(T) == sizeof(uint64_t));
249 return absl::bit_cast<uint64_t>(x);
250 }
251}
252
253inline uint32_t Bucket(uint32_t x, uint32_t shift, uint32_t radix) {
254 DCHECK_EQ(0, radix & (radix - 1)); // Must be a power of two.
255 // NOMUTANTS -- a way to compute the remainder of a division when radix is a
256 // power of two.
257 return (x >> shift) & (radix - 1);
258}
259
260template <typename T>
261int NumBitsToRepresent(T value) {
262 DCHECK_LE(absl::countl_zero(RawBits(value)), sizeof(T) * CHAR_BIT);
263 return sizeof(T) * CHAR_BIT - absl::countl_zero(RawBits(value));
264}
265
266template <typename Key, typename Counter>
267void UpdateCounters(uint32_t radix, int shift, std::vector<Key>& keys,
268 std::vector<Counter>& counts) {
269 std::fill(counts.begin(), counts.end(), 0);
270 DCHECK_EQ(counts[0], 0);
271 DCHECK_EQ(0, radix & (radix - 1)); // Must be a power of two.
272 const auto num_keys = keys.size();
273 for (int64_t i = 0; i < num_keys; ++i) {
274 ++counts[Bucket(keys[i], shift, radix)];
275 }
276 // Now the counts will contain the sum of the sizes below and including each
277 // bucket.
278 for (uint64_t i = 1; i < radix; ++i) {
279 counts[i] += counts[i - 1];
280 }
281}
282
283template <typename Key, typename Payload, typename Counter>
284void IncreasingCountingSort(uint32_t radix, int shift, std::vector<Key>& keys,
285 std::vector<Payload>& payloads,
286 std::vector<Key>& scratch_keys,
287 std::vector<Payload>& scratch_payloads,
288 std::vector<Counter>& counts) {
289 DCHECK_EQ(0, radix & (radix - 1)); // Must be a power of two.
290 UpdateCounters(radix, shift, keys, counts);
291 const auto num_keys = keys.size();
292 // In this order for stability.
293 for (int64_t i = num_keys - 1; i >= 0; --i) {
294 Counter c = --counts[Bucket(keys[i], shift, radix)];
295 scratch_keys[c] = keys[i];
296 scratch_payloads[c] = payloads[i];
297 }
298 std::swap(keys, scratch_keys);
299 std::swap(payloads, scratch_payloads);
300}
301} // namespace internal
302
303template <typename Key, typename Payload>
304void RadixSort(int radix_log, std::vector<Key>& keys,
305 std::vector<Payload>& payloads, Key /*min_key*/, Key max_key) {
306 // range_log is the number of bits necessary to represent the max_key
307 // We could as well use max_key - min_key, but it is more expensive to
308 // compute.
309 const int range_log = internal::NumBitsToRepresent(max_key);
310 DCHECK_EQ(internal::NumBitsToRepresent(0), 0);
311 DCHECK_LE(internal::NumBitsToRepresent(std::numeric_limits<Key>::max()),
312 std::numeric_limits<Key>::digits);
313 const int radix = 1 << radix_log; // By definition.
314 std::vector<uint32_t> counters(radix, 0);
315 std::vector<Key> scratch_keys(keys.size());
316 std::vector<Payload> scratch_payloads(payloads.size());
317 for (int shift = 0; shift < range_log; shift += radix_log) {
318 DCHECK_LE(1 << shift, max_key);
319 internal::IncreasingCountingSort(radix, shift, keys, payloads, scratch_keys,
320 scratch_payloads, counters);
321 }
322}
323
324// TODO(user): Move this to SetCoverInvariant.
325
326std::vector<ElementIndex> GetUncoveredElementsSortedByDegree(
327 const SetCoverInvariant* const inv) {
328 const BaseInt num_elements = inv->model()->num_elements();
329 std::vector<ElementIndex> degree_sorted_elements; // payloads
330 degree_sorted_elements.reserve(num_elements);
331 std::vector<BaseInt> keys;
332 keys.reserve(num_elements);
333 const SparseRowView& rows = inv->model()->rows();
334 BaseInt max_degree = 0;
335 for (const ElementIndex element : inv->model()->ElementRange()) {
336 // Already covered elements should not be considered.
337 if (inv->coverage()[element] != 0) continue;
338 degree_sorted_elements.push_back(element);
339 const BaseInt size = rows[element].size();
340 max_degree = std::max(max_degree, size);
341 keys.push_back(size);
342 }
343 RadixSort(11, keys, degree_sorted_elements, 1, max_degree);
344#ifndef NDEBUG
345 BaseInt prev_key = -1;
346 for (const auto key : keys) {
347 DCHECK_LE(prev_key, key);
348 prev_key = key;
349 }
350#endif
351 return degree_sorted_elements;
352}
353
354// Computes: d = c1 * n2 - c2 * n1. This is an easy way to compare two ratios
355// without having to use a full division.
356// If d < 0 then c1 / n1 < c2 / n2,
357// If d == 0 then c1 / n1 == c2 / n2, etc...
358// NOTE(user): This can be implemented using SSE2 with a gain of 5-10%.
359double Determinant(Cost c1, BaseInt n1, Cost c2, BaseInt n2) {
360 return c1 * n2 - n1 * c2;
361}
362} // namespace
363
364// ElementDegreeSolutionGenerator.
365// There is no need to use a priority queue here, as the ratios are computed
366// on-demand. Also elements are sorted based on degree once and for all and
367// moved past when the elements become already covered.
368
370 const SubsetBoolVector& in_focus) {
371 StopWatch stop_watch(&run_time_);
372 DVLOG(1) << "Entering ElementDegreeSolutionGenerator::NextSolution";
373 inv()->Recompute(CL::kFreeAndUncovered);
374 // Create the list of all the indices in the problem.
375 std::vector<ElementIndex> degree_sorted_elements =
376 GetUncoveredElementsSortedByDegree(inv());
377 ComputationUsefulnessStats stats(inv(), false);
378 const SparseRowView& rows = model()->rows();
379 const SubsetCostVector& costs = model()->subset_costs();
380 for (const ElementIndex element : degree_sorted_elements) {
381 // No need to cover an element that is already covered.
382 if (inv()->coverage()[element] != 0) continue;
383 SubsetIndex best_subset(-1);
384 Cost best_subset_cost = 0.0;
385 BaseInt best_subset_num_free_elts = 0;
386 for (const SubsetIndex subset : rows[element]) {
387 if (!in_focus[subset]) continue;
388 const BaseInt num_free_elements = inv()->num_free_elements()[subset];
389 stats.Update(subset, num_free_elements);
390 const Cost det = Determinant(costs[subset], num_free_elements,
391 best_subset_cost, best_subset_num_free_elts);
392 // Compare R = costs[subset] / num_free_elements with
393 // B = best_subset_cost / best_subset_num_free_elts.
394 // If R < B, we choose subset.
395 // If the ratios are the same, we choose the subset with the most free
396 // elements.
397 // TODO(user): What about adding a tolerance for equality, which could
398 // further favor larger columns?
399 if (det < 0 ||
400 (det == 0 && num_free_elements > best_subset_num_free_elts)) {
401 best_subset = subset;
402 best_subset_cost = costs[subset];
403 best_subset_num_free_elts = num_free_elements;
404 }
405 }
406 if (best_subset.value() == -1) {
407 LOG(WARNING) << "Best subset not found. Algorithmic error or invalid "
408 "input.";
409 continue;
410 }
411 DCHECK_NE(best_subset.value(), -1);
412 inv()->Select(best_subset, CL::kFreeAndUncovered);
413 DVLOG(1) << "Cost = " << inv()->cost()
414 << " num_uncovered_elements = " << inv()->num_uncovered_elements();
415 }
416 inv()->CompressTrace();
417 stats.PrintStats();
418 DCHECK(inv()->CheckConsistency(CL::kFreeAndUncovered));
419 return true;
420}
421
422// LazyElementDegreeSolutionGenerator.
423// There is no need to use a priority queue here, as the ratios are computed
424// on-demand. Also elements are sorted based on degree once and for all and
425// moved past when the elements become already covered.
426
428 const SubsetBoolVector& in_focus) {
429 StopWatch stop_watch(&run_time_);
430 inv()->CompressTrace();
431 DVLOG(1) << "Entering LazyElementDegreeSolutionGenerator::NextSolution";
432 DCHECK(inv()->CheckConsistency(CL::kCostAndCoverage));
433 // Create the list of all the indices in the problem.
434 std::vector<ElementIndex> degree_sorted_elements =
435 GetUncoveredElementsSortedByDegree(inv());
436 const SparseRowView& rows = model()->rows();
437 const SparseColumnView& columns = model()->columns();
438 ComputationUsefulnessStats stats(inv(), false);
439 const SubsetCostVector& costs = model()->subset_costs();
440 for (const ElementIndex element : degree_sorted_elements) {
441 // No need to cover an element that is already covered.
442 if (inv()->coverage()[element] != 0) continue;
443 SubsetIndex best_subset(-1);
444 Cost best_subset_cost = 0.0; // Cost of the best subset.
445 BaseInt best_subset_num_free_elts = 0;
446 for (const SubsetIndex subset : rows[element]) {
447 if (!in_focus[subset]) continue;
448 const Cost filtering_det =
449 Determinant(costs[subset], columns[subset].size(), best_subset_cost,
450 best_subset_num_free_elts);
451 // If the ratio with the initial number elements is greater, we skip this
452 // subset.
453 if (filtering_det > 0) continue;
454 const BaseInt num_free_elements = inv()->ComputeNumFreeElements(subset);
455 stats.Update(subset, num_free_elements);
456 const Cost det = Determinant(costs[subset], num_free_elements,
457 best_subset_cost, best_subset_num_free_elts);
458 // Same as ElementDegreeSolutionGenerator.
459 if (det < 0 ||
460 (det == 0 && num_free_elements > best_subset_num_free_elts)) {
461 best_subset = subset;
462 best_subset_cost = costs[subset];
463 best_subset_num_free_elts = num_free_elements;
464 }
465 }
466 DCHECK_NE(best_subset, SubsetIndex(-1));
467 inv()->Select(best_subset, CL::kCostAndCoverage);
468 DVLOG(1) << "Cost = " << inv()->cost()
469 << " num_uncovered_elements = " << inv()->num_uncovered_elements();
470 }
471 DCHECK(inv()->CheckConsistency(CL::kCostAndCoverage));
472 stats.PrintStats();
473 return true;
474}
475
476// SteepestSearch.
477
479 StopWatch stop_watch(&run_time_);
480 const int64_t num_iterations = max_iterations();
481 DCHECK(inv()->CheckConsistency(CL::kCostAndCoverage));
482 inv()->Recompute(CL::kFreeAndUncovered);
483 DVLOG(1) << "Entering SteepestSearch::NextSolution, num_iterations = "
484 << num_iterations;
485 // Return false if inv() contains no solution.
486 // TODO(user): This should be relaxed for partial solutions.
487 if (inv()->num_uncovered_elements() != 0) {
488 return false;
489 }
490
491 // Create priority queue with cost of using a subset, by decreasing order.
492 // Do it only for selected AND removable subsets.
493 const SubsetCostVector& costs = model()->subset_costs();
494 std::vector<std::pair<float, SubsetIndex::ValueType>> subset_priorities;
495 subset_priorities.reserve(in_focus.size());
496 for (const SetCoverDecision& decision : inv()->trace()) {
497 const SubsetIndex subset = decision.subset();
498 if (in_focus[subset] && inv()->is_selected()[subset] &&
499 inv()->ComputeIsRedundant(subset)) {
500 const float delta_per_element = costs[subset];
501 subset_priorities.push_back({delta_per_element, subset.value()});
502 }
503 }
504 DVLOG(1) << "subset_priorities.size(): " << subset_priorities.size();
506 subset_priorities, model()->num_subsets());
507 for (int iteration = 0; iteration < num_iterations && !pq.IsEmpty();
508 ++iteration) {
509 const SubsetIndex best_subset(pq.TopIndex());
510 pq.Pop();
511 DCHECK(inv()->is_selected()[best_subset]);
512 DCHECK(inv()->ComputeIsRedundant(best_subset));
513 DCHECK_GT(costs[best_subset], 0.0);
514 inv()->Deselect(best_subset, CL::kFreeAndUncovered);
515 for (const SubsetIndex subset :
516 IntersectingSubsetsRange(*model(), best_subset)) {
517 if (!inv()->ComputeIsRedundant(subset)) {
518 pq.Remove(subset.value());
519 }
520 }
521 DVLOG(1) << "Cost = " << inv()->cost();
522 }
523 inv()->CompressTrace();
524 // TODO(user): change this to enable working on partial solutions.
525 DCHECK_EQ(inv()->num_uncovered_elements(), 0);
526 DCHECK(inv()->CheckConsistency(CL::kFreeAndUncovered));
527 return true;
528}
529
530// LazySteepestSearch.
531
533 StopWatch stop_watch(&run_time_);
534 const int64_t num_iterations = max_iterations();
535 DCHECK(inv()->CheckConsistency(CL::kCostAndCoverage));
536 DVLOG(1) << "Entering LazySteepestSearch::NextSolution, num_iterations = "
537 << num_iterations;
538 const BaseInt num_selected_subsets = inv()->trace().size();
539 std::vector<Cost> selected_costs;
540 selected_costs.reserve(num_selected_subsets);
541 // First part of the trick:
542 // Since the heuristic is greedy, it only considers subsets that are selected
543 // and in_focus.
544 const SubsetCostVector& costs = model()->subset_costs();
545 for (const SetCoverDecision& decision : inv()->trace()) {
546 if (in_focus[decision.subset()]) {
547 selected_costs.push_back(costs[decision.subset()]);
548 }
549 }
550 std::vector<BaseInt> cost_permutation(selected_costs.size());
551 std::iota(cost_permutation.begin(), cost_permutation.end(), 0);
552 // TODO(user): use RadixSort with doubles and payloads.
553 std::sort(cost_permutation.begin(), cost_permutation.end(),
554 [&selected_costs](const BaseInt i, const BaseInt j) {
555 if (selected_costs[i] == selected_costs[j]) {
556 return i < j;
557 }
558 return selected_costs[i] > selected_costs[j];
559 });
560 std::vector<SubsetIndex> cost_sorted_subsets;
561 cost_sorted_subsets.reserve(num_selected_subsets);
562 for (const BaseInt i : cost_permutation) {
563 cost_sorted_subsets.push_back(inv()->trace()[i].subset());
564 }
565 for (const SubsetIndex subset : cost_sorted_subsets) {
566 // Second part of the trick:
567 // ComputeIsRedundant is expensive, but it is going to be called only once
568 // per subset in the solution. Once this has been done, there is no need to
569 // update any priority queue, nor to use a stronger level of consistency
570 // than kCostAndCoverage.
571 // In the non-lazy version, the redundancy of a subset may be updated many
572 // times and the priority queue must be updated accordingly, including just
573 // for removing the subset that was just considered,
574 // A possible optimization would be to sort the elements by coverage and
575 // run ComputeIsRedundant with the new element order. This would make the
576 // subsets which cover only one element easier to prove non-redundant.
577 if (inv()->is_selected()[subset] && inv()->ComputeIsRedundant(subset)) {
578 inv()->Deselect(subset, CL::kCostAndCoverage);
579 }
580 }
581 inv()->CompressTrace();
582 DCHECK(inv()->CheckConsistency(CL::kCostAndCoverage));
583 return true;
584}
585
586// Guided Tabu Search
587
589 const SubsetIndex num_subsets(model()->num_subsets());
590 const SubsetCostVector& subset_costs = model()->subset_costs();
591 times_penalized_.assign(num_subsets.value(), 0);
592 augmented_costs_ = subset_costs;
593 utilities_ = subset_costs;
594}
595
596namespace {
597bool FlipCoin() {
598 // TODO(user): use STL for repeatable testing.
599 return absl::Bernoulli(absl::BitGen(), 0.5);
600}
601} // namespace
602
603void GuidedTabuSearch::UpdatePenalties(absl::Span<const SubsetIndex> focus) {
604 const SubsetCostVector& subset_costs = model()->subset_costs();
605 Cost max_utility = -1.0;
606 for (const SubsetIndex subset : focus) {
607 if (inv()->is_selected()[subset]) {
608 max_utility = std::max(max_utility, utilities_[subset]);
609 }
610 }
611 const double epsilon_utility = epsilon_ * max_utility;
612 for (const SubsetIndex subset : focus) {
613 if (inv()->is_selected()[subset]) {
614 const double utility = utilities_[subset];
615 if ((max_utility - utility <= epsilon_utility) && FlipCoin()) {
616 ++times_penalized_[subset];
617 const int times_penalized = times_penalized_[subset];
618 const Cost cost =
619 subset_costs[subset]; // / columns[subset].size().value();
620 utilities_[subset] = cost / (1 + times_penalized);
621 augmented_costs_[subset] =
622 cost * (1 + penalty_factor_ * times_penalized);
623 }
624 }
625 }
626}
627
628bool GuidedTabuSearch::NextSolution(absl::Span<const SubsetIndex> focus) {
629 StopWatch stop_watch(&run_time_);
630 const int64_t num_iterations = max_iterations();
631 DCHECK(inv()->CheckConsistency(CL::kFreeAndUncovered));
632 DVLOG(1) << "Entering GuidedTabuSearch::NextSolution, num_iterations = "
633 << num_iterations;
634 const SubsetCostVector& subset_costs = model()->subset_costs();
635 Cost best_cost = inv()->cost();
636 SubsetBoolVector best_choices = inv()->is_selected();
637 Cost augmented_cost =
638 std::accumulate(augmented_costs_.begin(), augmented_costs_.end(), 0.0);
639 BaseInt trace_size = inv()->trace().size();
640 for (int iteration = 0; iteration < num_iterations; ++iteration) {
641 if (inv()->trace().size() > 2 * trace_size) {
642 inv()->CompressTrace();
643 trace_size = inv()->trace().size();
644 }
645 Cost best_delta = kMaxPossibleCost;
646 SubsetIndex best_subset = kNotFound;
647 for (const SubsetIndex subset : focus) {
648 const Cost delta = augmented_costs_[subset];
649 DVLOG(1) << "Subset, " << subset.value() << ", at ,"
650 << inv()->is_selected()[subset] << ", delta =, " << delta
651 << ", best_delta =, " << best_delta;
652 if (inv()->is_selected()[subset]) {
653 // Try to remove subset from solution, if the gain from removing is
654 // worth it:
655 if (-delta < best_delta &&
656 // and it can be removed, and
657 inv()->ComputeIsRedundant(subset) &&
658 // it is not Tabu OR decreases the actual cost (aspiration):
659 (!tabu_list_.Contains(subset) ||
660 inv()->cost() - subset_costs[subset] < best_cost)) {
661 best_delta = -delta;
662 best_subset = subset;
663 }
664 } else {
665 // Try to use subset in solution, if its penalized delta is good.
666 if (delta < best_delta) {
667 // The limit kMaxPossibleCost is ill-defined,
668 // there is always a best_subset. Is it intended?
669 if (!tabu_list_.Contains(subset)) {
670 best_delta = delta;
671 best_subset = subset;
672 }
673 }
674 }
675 }
676 if (best_subset == kNotFound) { // Local minimum reached.
677 inv()->LoadSolution(best_choices);
678 return true;
679 }
680 DVLOG(1) << "Best subset, " << best_subset.value() << ", at ,"
681 << inv()->is_selected()[best_subset] << ", best_delta = ,"
682 << best_delta;
683
684 UpdatePenalties(focus);
685 tabu_list_.Add(best_subset);
686 if (inv()->is_selected()[best_subset]) {
687 inv()->Deselect(best_subset, CL::kFreeAndUncovered);
688 } else {
689 inv()->Select(best_subset, CL::kFreeAndUncovered);
690 }
691 // TODO(user): make the cost computation incremental.
692 augmented_cost =
693 std::accumulate(augmented_costs_.begin(), augmented_costs_.end(), 0.0);
694
695 DVLOG(1) << "Iteration, " << iteration << ", current cost = ,"
696 << inv()->cost() << ", best cost = ," << best_cost
697 << ", penalized cost = ," << augmented_cost;
698 if (inv()->cost() < best_cost) {
699 VLOG(1) << "Updated best cost, " << "Iteration, " << iteration
700 << ", current cost = ," << inv()->cost() << ", best cost = ,"
701 << best_cost << ", penalized cost = ," << augmented_cost;
702 best_cost = inv()->cost();
703 best_choices = inv()->is_selected();
704 }
705 }
706 inv()->LoadSolution(best_choices);
707 inv()->CompressTrace();
708 DCHECK(inv()->CheckConsistency(CL::kFreeAndUncovered));
709 return true;
710}
711
712// Guided Local Search
713void GuidedLocalSearch::Initialize() {
714 const SparseColumnView& columns = model()->columns();
715 penalties_.assign(columns.size(), 0);
716 penalization_factor_ = alpha_ * inv()->cost() * 1.0 / (columns.size());
717 for (const SetCoverDecision& decision : inv()->trace()) {
718 const SubsetIndex subset = decision.subset();
719 if (inv()->is_selected()[subset]) {
720 utility_heap_.Insert({static_cast<float>(model()->subset_costs()[subset] /
721 (1 + penalties_[subset])),
722 subset.value()});
723 }
724 }
725}
726
727Cost GuidedLocalSearch::ComputeDelta(SubsetIndex subset) const {
728 const float delta = (penalization_factor_ * penalties_[subset] +
729 model()->subset_costs()[subset]);
730 if (inv()->is_selected()[subset] && inv()->ComputeIsRedundant(subset)) {
731 return delta;
732 } else if (!inv()->is_selected()[subset]) {
733 return -delta;
734 }
735 return kInfinity;
736}
737
738bool GuidedLocalSearch::NextSolution(absl::Span<const SubsetIndex> focus) {
739 StopWatch stop_watch(&run_time_);
740 const int64_t num_iterations = max_iterations();
741 inv()->Recompute(CL::kRedundancy);
742 Cost best_cost = inv()->cost();
743 SubsetBoolVector best_choices = inv()->is_selected();
744
745 for (const SubsetIndex& subset : focus) {
746 const float delta = ComputeDelta(subset);
747 if (delta < kInfinity) {
748 priority_heap_.Insert({delta, subset.value()});
749 }
750 }
751
752 for (int iteration = 0;
753 !priority_heap_.IsEmpty() && iteration < num_iterations; ++iteration) {
754 // Improve current solution respective to the current penalties by flipping
755 // the best subset.
756 const SubsetIndex best_subset(priority_heap_.TopIndex());
757 if (inv()->is_selected()[best_subset]) {
758 utility_heap_.Insert({0, best_subset.value()});
759 inv()->Deselect(best_subset, CL::kRedundancy);
760 } else {
761 utility_heap_.Insert(
762 {static_cast<float>(model()->subset_costs()[best_subset] /
763 (1 + penalties_[best_subset])),
764 best_subset.value()});
765 inv()->Select(best_subset, CL::kRedundancy);
766 }
767 DCHECK(!utility_heap_.IsEmpty());
768
769 // Getting the subset with highest utility. utility_heap_ is not empty,
770 // because we just inserted a pair.
771 const SubsetIndex penalized_subset(utility_heap_.TopIndex());
772 utility_heap_.Pop();
773 ++penalties_[penalized_subset];
774 utility_heap_.Insert(
775 {static_cast<float>(model()->subset_costs()[penalized_subset] /
776 (1 + penalties_[penalized_subset])),
777 penalized_subset.value()});
778 DCHECK(!utility_heap_.IsEmpty());
779
780 // Get removable subsets (Add them to the heap).
781 for (const SubsetIndex subset : inv()->newly_removable_subsets()) {
782 const float delta_selected = (penalization_factor_ * penalties_[subset] +
783 model()->subset_costs()[subset]);
784 priority_heap_.Insert({delta_selected, subset.value()});
785 }
786 DCHECK(!priority_heap_.IsEmpty());
787
788 for (const SubsetIndex subset : {penalized_subset, best_subset}) {
789 const float delta = ComputeDelta(subset);
790 if (delta < kInfinity) {
791 priority_heap_.Insert({delta, subset.value()});
792 }
793 }
794 DCHECK(!priority_heap_.IsEmpty());
795
796 // Get new non removable subsets and remove them from the heap.
797 // This is when the priority_heap_ can become empty and end the outer loop
798 // early.
799 for (const SubsetIndex subset : inv()->newly_non_removable_subsets()) {
800 priority_heap_.Remove(subset.value());
801 }
802
803 if (inv()->cost() < best_cost) {
804 best_cost = inv()->cost();
805 best_choices = inv()->is_selected();
806 }
807 }
808 inv()->LoadSolution(best_choices);
809
810 // Improve the solution by removing redundant subsets.
811 for (const SubsetIndex& subset : focus) {
812 if (inv()->is_selected()[subset] && inv()->ComputeIsRedundant(subset))
813 inv()->Deselect(subset, CL::kRedundancy);
814 }
815 DCHECK_EQ(inv()->num_uncovered_elements(), 0);
816 return true;
817}
818
819namespace {
820void SampleSubsets(std::vector<SubsetIndex>* list, BaseInt num_subsets) {
821 num_subsets = std::min<BaseInt>(num_subsets, list->size());
822 CHECK_GE(num_subsets, 0);
823 std::shuffle(list->begin(), list->end(), absl::BitGen());
824 list->resize(num_subsets);
825}
826} // namespace
827
828std::vector<SubsetIndex> ClearRandomSubsets(BaseInt num_subsets,
829 SetCoverInvariant* inv) {
830 return ClearRandomSubsets(inv->model()->all_subsets(), num_subsets, inv);
831}
832
833std::vector<SubsetIndex> ClearRandomSubsets(absl::Span<const SubsetIndex> focus,
834 BaseInt num_subsets_to_choose,
835 SetCoverInvariant* inv) {
836 num_subsets_to_choose =
837 std::min<BaseInt>(num_subsets_to_choose, focus.size());
838 CHECK_GE(num_subsets_to_choose, 0);
839 std::vector<SubsetIndex> chosen_indices;
840 for (const SubsetIndex subset : focus) {
841 if (inv->is_selected()[subset]) {
842 chosen_indices.push_back(subset);
843 }
844 }
845 SampleSubsets(&chosen_indices, num_subsets_to_choose);
846 BaseInt num_deselected = 0;
847 for (const SubsetIndex subset : chosen_indices) {
848 if (inv->is_selected()[subset]) { // subset may have been deselected in a
849 // previous iteration.
850 inv->Deselect(subset, CL::kCostAndCoverage);
851 ++num_deselected;
852 }
853 for (const SubsetIndex connected_subset :
854 IntersectingSubsetsRange(*inv->model(), subset)) {
855 // connected_subset may have been deselected in a previous iteration.
856 if (inv->is_selected()[connected_subset]) {
857 inv->Deselect(connected_subset, CL::kCostAndCoverage);
858 ++num_deselected;
859 }
860 }
861 // Note that num_deselected may exceed num_subsets_to_choose by more than 1.
862 if (num_deselected > num_subsets_to_choose) break;
863 }
864 return chosen_indices;
865}
866
867std::vector<SubsetIndex> ClearMostCoveredElements(BaseInt max_num_subsets,
868 SetCoverInvariant* inv) {
869 return ClearMostCoveredElements(inv->model()->all_subsets(), max_num_subsets,
870 inv);
871}
872
873std::vector<SubsetIndex> ClearMostCoveredElements(
874 absl::Span<const SubsetIndex> focus, BaseInt max_num_subsets,
875 SetCoverInvariant* inv) {
876 // This is the vector we will return.
877 std::vector<SubsetIndex> sampled_subsets;
878
879 const ElementToIntVector& coverage = inv->coverage();
880 const BaseInt num_subsets = inv->model()->num_subsets();
881 const SparseRowView& rows = inv->model()->rows();
882
883 // Collect the sets which have at least one element whose coverage > 1,
884 // even if those sets are not removable.
885 SubsetBoolVector subset_is_collected(num_subsets, false);
886 for (const ElementIndex element : inv->model()->ElementRange()) {
887 if (coverage[element] <= 1) continue;
888 for (const SubsetIndex subset : rows[element]) {
889 if (inv->is_selected()[subset] && !subset_is_collected[subset]) {
890 subset_is_collected[subset] = true;
891 }
892 }
893 }
894
895 // Now intersect with focus: sampled_subsets = focus ⋂ impacted_subsets.
896 // NOTE(user): this might take too long. TODO(user):find another algorithm if
897 // necessary.
898 for (const SubsetIndex subset : focus) {
899 if (subset_is_collected[subset]) {
900 sampled_subsets.push_back(subset);
901 }
902 }
903
904 // Actually *sample* sampled_subset.
905 // TODO(user): find another algorithm if necessary.
906 std::shuffle(sampled_subsets.begin(), sampled_subsets.end(), absl::BitGen());
907 sampled_subsets.resize(
908 std::min<BaseInt>(sampled_subsets.size(), max_num_subsets));
909
910 // Testing has shown that sorting sampled_subsets is not necessary.
911 // Now, un-select the subset in sampled_subsets.
912 for (const SubsetIndex subset : sampled_subsets) {
913 inv->Deselect(subset, CL::kCostAndCoverage);
914 }
915 return sampled_subsets;
916}
917
918} // namespace operations_research
void Update(Aggregate element)
HeapIndex heap_size() const
bool NextSolution(absl::Span< const SubsetIndex > focus) final
void Recompute(ConsistencyLevel target_consistency)
const SubsetToIntVector & num_free_elements() const
const std::vector< SetCoverDecision > & trace() const
void LoadSolution(const SubsetBoolVector &solution)
bool Deselect(SubsetIndex subset, ConsistencyLevel consistency)
const SubsetBoolVector & is_selected() const
bool Select(SubsetIndex subset, ConsistencyLevel consistency)
const ElementToIntVector & coverage() const
BaseInt ComputeNumFreeElements(SubsetIndex subset) const
util_intops::StrongIntRange< ElementIndex > ElementRange() const
const SparseRowView & rows() const
const SubsetCostVector & subset_costs() const
const SparseColumnView & columns() const
std::vector< SubsetIndex > all_subsets() const
SetCoverInvariant::ConsistencyLevel consistency_level_
constexpr Fractional kInfinity
Definition lp_types.h:87
dual_gradient T(y - `dual_solution`) class DiagonalTrustRegionProblemFromQp
OR-Tools root namespace.
constexpr SubsetIndex kNotFound(-1)
static constexpr double kInfinity
util_intops::StrongVector< SubsetIndex, Cost > SubsetCostVector
Definition base_types.h:58
SetCoverInvariant::ConsistencyLevel CL
std::vector< SubsetIndex > ClearRandomSubsets(BaseInt num_subsets, SetCoverInvariant *inv)
util_intops::StrongVector< SubsetIndex, BaseInt > SubsetToIntVector
Definition base_types.h:65
static constexpr Cost kMaxPossibleCost
util_intops::StrongVector< ElementIndex, BaseInt > ElementToIntVector
Definition base_types.h:64
util_intops::StrongVector< SubsetIndex, bool > SubsetBoolVector
Definition base_types.h:71
void RadixSort(absl::Span< T > values, int num_bits=sizeof(T) *8)
Definition radix_sort.h:270
util_intops::StrongVector< SubsetIndex, SparseColumn > SparseColumnView
Definition base_types.h:68
std::vector< SubsetIndex > ClearMostCoveredElements(BaseInt max_num_subsets, SetCoverInvariant *inv)
util_intops::StrongVector< ElementIndex, SparseRow > SparseRowView
Definition base_types.h:69
trees with all degrees equal w the current value of int max_iterations