Google OR-Tools v9.14
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
set_cover_model.h
Go to the documentation of this file.
1// Copyright 2010-2025 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14#ifndef OR_TOOLS_SET_COVER_SET_COVER_MODEL_H_
15#define OR_TOOLS_SET_COVER_SET_COVER_MODEL_H_
16
17#include <cstdint>
18#include <string>
19#include <vector>
20
21#include "absl/log/check.h"
22#include "absl/strings/string_view.h"
23#include "absl/time/time.h"
24#include "absl/types/span.h"
28#include "ortools/set_cover/set_cover.pb.h"
29
30// Representation class for the weighted set-covering problem.
31//
32// Let E be a "universe" set, let (S_j) be a family (j in J) of subsets of E,
33// and c_j costs associated to each S_j. Note that J = {j in 1..|S|}.
34//
35// The minimum-cost set-covering problem consists in finding K (for covering),
36// a subset of J such that the union of all the S_j for k in K is equal to E
37// (the subsets indexed by K "cover" E), while minimizing total cost sum c_k (k
38// in K).
39//
40// In Mixed-Integer Programming and matrix terms, the goal is to find values
41// of binary variables x_j, where x_j is 1 when subset S_j is in K, 0
42// otherwise, that minimize the sum of c_j * x_j subject to M.x >= 1. Each row
43// corresponds to an element in E.
44//
45// The matrix M for linear constraints is defined as follows:
46// - it has as many rows as there are elements in E.
47// - its columns are such that M(i, j) = 1 iff the i-th element of E is present
48// in S_j.
49//
50// We also use m to denote |E|, the number of elements, and n to denote |S|, the
51// number of subsets.
52// Finally, NNZ denotes the numbers of non-zeros, i.e. the sum of the
53// cardinalities of all the subsets.
54
55namespace operations_research {
56
57// Main class for describing a weighted set-covering problem.
59 public:
60 // Constructs an empty weighted set-covering problem.
61 explicit SetCoverModel(const absl::string_view name = "SetCoverModel")
62 : name_(name),
63 num_elements_(0),
64 num_subsets_(0),
65 num_nonzeros_(0),
66 row_view_is_valid_(false),
67 elements_in_subsets_are_sorted_(false),
68 subset_costs_(),
69 is_unicost_(true),
70 is_unicost_valid_(false),
71 columns_(),
72 rows_(),
73 all_subsets_() {}
74
75 std::string name() const { return name_; }
76
77 void SetName(const absl::string_view name) { name_ = name; }
78
79 // Constructs a weighted set-covering problem from a seed model, with
80 // num_elements elements and num_subsets subsets.
81 // - The distributions of the degrees of the elements and the cardinalities of
82 // the subsets are based on those of the seed model. They are scaled
83 // affininely by row_scale and column_scale respectively.
84 // - By affine scaling, we mean that the minimum value of the distribution is
85 // not scaled, but the variation above this minimum value is.
86 // - For a given subset with a given cardinality in the generated model, its
87 // elements are sampled from the distribution of the degrees as computed
88 // above.
89 // - The costs of the subsets in the new model are sampled from the
90 // distribution of the costs of the subsets in the seed model, scaled by
91 // cost_scale.
92 // IMPORTANT NOTICE: The algorithm may not succeed in generating a model
93 // where all the elements can be covered. In that case, the model will be
94 // empty.
95
99 double row_scale,
100 double column_scale,
101 double cost_scale);
102
103 // Returns true if the model is empty, i.e. has no elements, no subsets, and
104 // no nonzeros.
105 bool IsEmpty() const { return rows_.empty() || columns_.empty(); }
106
107 // Current number of elements to be covered in the model, i.e. the number of
108 // elements in S. In matrix terms, this is the number of rows.
109 BaseInt num_elements() const { return num_elements_; }
110
111 // Current number of subsets in the model. In matrix terms, this is the
112 // number of columns.
114 CHECK_NE(this, nullptr);
115 return num_subsets_;
116 }
117
118 // Current number of nonzeros in the matrix.
119 int64_t num_nonzeros() const { return num_nonzeros_; }
120
121 // Returns the fill rate of the matrix.
122 double FillRate() const {
123 return 1.0 * num_nonzeros() / (1.0 * num_elements() * num_subsets());
124 }
125
126 // Computes the number of singleton columns in the model, i.e. subsets
127 // covering only one element.
129 BaseInt num_singleton_columns = 0;
130 for (const SparseColumn& column : columns_) {
131 if (column.size() == 1) {
132 ++num_singleton_columns;
133 }
134 }
135 return num_singleton_columns;
136 }
137
138 // Computes the number of singleton rows in the model, i.e. elements in the
139 // model that can be covered by one subset only.
141 BaseInt num_singleton_rows = 0;
142 for (const SparseRow& row : rows_) {
143 if (row.size() == 1) {
144 ++num_singleton_rows;
145 }
146 }
147 return num_singleton_rows;
148 }
149
150 // Vector of costs for each subset.
151 const SubsetCostVector& subset_costs() const { return subset_costs_; }
152
153 // Returns true if all subset costs are equal to 1.0. This is a fast check
154 // that is only valid if the subset costs are not modified.
155 bool is_unicost() {
156 if (is_unicost_valid_) {
157 return is_unicost_;
158 }
159 is_unicost_ = true;
160 for (const Cost cost : subset_costs_) {
161 if (cost != 1.0) {
162 is_unicost_ = false;
163 break;
164 }
165 }
166 is_unicost_valid_ = true;
167 return is_unicost_;
168 }
169
170 // Column view of the set covering problem.
171 const SparseColumnView& columns() const { return columns_; }
172
173 // Row view of the set covering problem.
174 const SparseRowView& rows() const {
175 DCHECK(row_view_is_valid_);
176 return rows_;
177 }
178
179 // Returns true if rows_ and columns_ represent the same problem.
180 bool row_view_is_valid() const { return row_view_is_valid_; }
181
182 // Access to the ranges of subsets and elements.
183 util_intops::StrongIntRange<SubsetIndex> SubsetRange() const {
184 return util_intops::StrongIntRange<SubsetIndex>(SubsetIndex(num_subsets_));
185 }
186
187 util_intops::StrongIntRange<ElementIndex> ElementRange() const {
188 return util_intops::StrongIntRange<ElementIndex>(
189 ElementIndex(num_elements_));
190 }
191
192 // Returns the list of indices for all the subsets in the model.
193 std::vector<SubsetIndex> all_subsets() const { return all_subsets_; }
194
195 // Accessors to the durations of the different stages of the model creation.
196 absl::Duration generation_duration() const { return generation_duration_; }
197
198 absl::Duration create_sparse_row_view_duration() const {
199 return create_sparse_row_view_duration_;
200 }
201
203 return compute_sparse_row_view_using_slices_duration_;
204 }
205
206 // Adds an empty subset with a cost to the problem. In matrix terms, this
207 // adds a column to the matrix.
208 void AddEmptySubset(Cost cost);
209
210 // Adds an element to the last subset created. In matrix terms, this adds a
211 // 1 on row 'element' of the current last column of the matrix.
212 void AddElementToLastSubset(BaseInt element);
213 void AddElementToLastSubset(ElementIndex element);
214
215 // Sets 'cost' to an already existing 'subset'.
216 // This will CHECK-fail if cost is infinite or a NaN.
217 void SetSubsetCost(BaseInt subset, Cost cost);
218 void SetSubsetCost(SubsetIndex subset, Cost cost);
219
220 // Adds 'element' to an already existing 'subset'.
221 // No check is done if element is already in the subset.
222 void AddElementToSubset(BaseInt element, BaseInt subset);
223 void AddElementToSubset(ElementIndex element, SubsetIndex subset);
224
225 // Sorts the elements in each subset. Should be called before exporting the
226 // model to a proto.
228
229 // Creates the sparse ("dual") representation of the problem. This also sorts
230 // the elements in each subset.
231 void CreateSparseRowView();
232
233 // Same as CreateSparseRowView, but uses a slicing algorithm, more prone to
234 // parallelism.
236
237 // The following functions are used by CreateSparseRowViewUsingSlices.
238 // They are exposed for testing purposes.
239
240 // Returns a vector of subset indices that partition columns into
241 // `num_partitions` partitions of roughly equal size in number of non-zeros.
242 // The resulting vector contains the index of the first subset in the next
243 // partition. Therefore the last element is the total number of subsets.
244 // Also the vector is sorted.
245 std::vector<SubsetIndex> ComputeSliceIndices(int num_partitions);
246
247 // Returns a view of the rows of the problem with subset in the range
248 // [begin_subset, end_subset).
249 SparseRowView ComputeSparseRowViewSlice(SubsetIndex begin_subset,
250 SubsetIndex end_subset);
251
252 // Returns a vector of row views, each corresponding to a partition of the
253 // problem. The partitions are defined by the given partition points.
254 std::vector<SparseRowView> CutSparseRowViewInSlices(
255 absl::Span<const SubsetIndex> partition_points);
256
257 // Returns the union of the rows of the given row views.
258 // The returned view is valid only as long as the given row views are valid.
259 // The indices in the rows are sorted.
261 absl::Span<const SparseRowView> row_slices);
262
263 // Returns true if the problem is feasible, i.e. if the subsets cover all
264 // the elements. Could be const, but it updates the feasibility_duration_
265 // field.
266 bool ComputeFeasibility();
267
268 // Resizes the model to have at least num_subsets columns. The new columns
269 // correspond to empty subsets.
270 // This function will never remove a column, i.e. if num_subsets is smaller
271 // than the current instance size the function does nothing.
273 void ResizeNumSubsets(SubsetIndex num_subsets);
274
275 // Reserves num_elements rows in the column indexed by subset.
277
278 // Returns the model as a SetCoverProto. Note that the elements of each subset
279 // are sorted locally before being exported to the proto. This is done to
280 // ensure that the proto is deterministic. The function is const because it
281 // does not modify the model. Therefore, the model as exported by this
282 // function may be different from the initial model.
283 SetCoverProto ExportModelAsProto() const;
284
285 // Imports the model from a SetCoverProto.
286 void ImportModelFromProto(const SetCoverProto& message);
287
288 // Returns a verbose string representation of the model, using the given
289 // separator.
290 std::string ToVerboseString(absl::string_view sep) const;
291
292 // Returns a string representation of the model, using the given separator.
293 std::string ToString(absl::string_view sep) const;
294
295 // A struct enabling to show basic statistics on rows and columns.
296 // The meaning of the fields is obvious.
297 struct Stats {
298 double min;
299 double max;
300 double median;
301 double mean;
302 double stddev;
303 double iqr; // Interquartile range.
304
305 // Returns a string representation of the stats. TODO(user): remove this as
306 // it is obsolete.
307 std::string DebugString() const { return ToVerboseString(", "); }
308
309 // Returns a string representation of the stats, using the given separator.
310 std::string ToString(absl::string_view sep) const;
311
312 // Returns a verbose string representation of the stats, using the given
313 // separator.
314 std::string ToVerboseString(absl::string_view sep) const;
315 };
316
317 // Computes basic statistics on costs and returns a Stats structure.
318 Stats ComputeCostStats() const;
319
320 // Computes basic statistics on rows and returns a Stats structure.
321 Stats ComputeRowStats() const;
322
323 // Computes basic statistics on columns and returns a Stats structure.
324 Stats ComputeColumnStats() const;
325
326 // Computes deciles on rows and returns a vector of deciles.
327 std::vector<int64_t> ComputeRowDeciles() const;
328
329 // Computes deciles on columns and returns a vector of deciles.
330 std::vector<int64_t> ComputeColumnDeciles() const;
331
332 // Computes basic statistics on the deltas of the row and column elements and
333 // returns a Stats structure. The deltas are computed as the difference
334 // between two consecutive indices in rows or columns. The number of bytes
335 // computed is meant using a variable-length base-128 encoding.
336 // TODO(user): actually use this to compress the rows and columns.
337 Stats ComputeRowDeltaSizeStats() const;
338 Stats ComputeColumnDeltaSizeStats() const;
339
340 private:
341 // Updates the all_subsets_ vector so that it always contains 0 to
342 // columns.size() - 1
343 void UpdateAllSubsetsList();
344
345 // The name of the model, "SetCoverModel" as default.
346 std::string name_;
347
348 // Number of elements.
349 BaseInt num_elements_;
350
351 // Number of subsets. Maintained for ease of access.
352 BaseInt num_subsets_;
353
354 // Number of nonzeros in the matrix. The value is an int64_t because there can
355 // be more than 1 << 31 nonzeros even with BaseInt = int32_t.
356 int64_t num_nonzeros_;
357
358 // True when the SparseRowView is up-to-date.
359 bool row_view_is_valid_;
360
361 // True when the elements in each subset are sorted.
362 bool elements_in_subsets_are_sorted_;
363
364 // Costs for each subset.
365 SubsetCostVector subset_costs_;
366
367 // True when all subset costs are equal to 1.0.
368 bool is_unicost_;
369
370 // True when is_unicost_ is up-to-date.
371 bool is_unicost_valid_;
372
373 // Stores the run time for CreateSparseRowView. Interesting to compare with
374 // the time spent to actually generate a solution to the model.
375 absl::Duration create_sparse_row_view_duration_;
376
377 // Stores the run time for ComputeSparseRowViewUsingSlices.
378 absl::Duration compute_sparse_row_view_using_slices_duration_;
379
380 // Stores the run time for GenerateRandomModelFrom.
381 absl::Duration generation_duration_;
382
383 // Stores the run time for ComputeFeasibility. A good estimate of the time
384 // needed to traverse the complete model.
385 absl::Duration feasibility_duration_;
386
387 // Vector of columns. Each column corresponds to a subset and contains the
388 // elements of the given subset.
389 // This takes NNZ (number of non-zeros) BaseInts, or |E| * |S| * fill_rate.
390 // On classical benchmarks, the fill rate is in the 2 to 5% range.
391 // Some synthetic benchmarks have fill rates of 20%, while benchmarks for
392 // rail rotations have a fill rate of 0.2 to 0.4%.
393 // TODO(user): try using a compressed representation like VarInt or LEB128,
394 // since the data is only iterated upon.
395 SparseColumnView columns_;
396
397 // Vector of rows. Each row corresponds to an element and contains the
398 // subsets containing the element.
399 // The size is exactly the same as for columns_ (or there would be a bug.)
400 SparseRowView rows_;
401
402 // Vector of indices from 0 to columns.size() - 1. (Like std::iota, but built
403 // incrementally.) Used to (un)focus optimization algorithms on the complete
404 // problem.
405 // This takes |S| BaseInts.
406 // TODO(user): use this to enable deletion and recycling of columns/subsets.
407 // TODO(user): replace this with an iterator?
408 std::vector<SubsetIndex> all_subsets_;
409};
410
411// IntersectingSubsetsIterator is a forward iterator that returns the next
412// intersecting subset for a fixed seed_subset.
413// The iterator is initialized with a model and a seed_subset and
414// allows a speedup in getting the intersecting subsets
415// by not storing them in memory.
416// The iterator is at the end when the last intersecting subset has been
417// returned.
419 public:
421 SubsetIndex seed_subset, bool at_end = false)
422 : model_(model),
423 seed_subset_(seed_subset),
424 seed_column_(model_.columns()[seed_subset]),
425 seed_column_size_(ColumnEntryIndex(seed_column_.size())),
426 intersecting_subset_(0), // meaningless, will be overwritten.
427 element_entry_(0),
428 rows_(model_.rows()),
429 subset_seen_() {
430 // For iterator to be as light as possible when created, we do not reserve
431 // space for the subset_seen_ vector, and we do not initialize it. This
432 // is done to avoid the overhead of creating the vector and initializing it
433 // when the iterator is created. The vector is created on the first call to
434 // the ++ operator.
435 DCHECK(model_.row_view_is_valid());
436 if (at_end) {
437 element_entry_ = seed_column_size_;
438 return;
439 }
440 for (; element_entry_ < seed_column_size_; ++element_entry_) {
441 const ElementIndex current_element = seed_column_[element_entry_];
442 const SparseRow& current_row = rows_[current_element];
443 const RowEntryIndex current_row_size = RowEntryIndex(current_row.size());
444 for (; subset_entry_ < current_row_size; ++subset_entry_) {
445 if (intersecting_subset_ == seed_subset_) continue;
446 intersecting_subset_ = current_row[subset_entry_];
447 return;
448 }
449 subset_entry_ = RowEntryIndex(0); // 'carriage-return'
450 }
451 }
452
453 // Returns (true) whether the iterator is at the end.
454 bool at_end() const { return element_entry_ == seed_column_size_; }
455
456 // Returns the intersecting subset.
457 SubsetIndex operator*() const { return intersecting_subset_; }
458
459 // Disequality operator.
460 bool operator!=(const IntersectingSubsetsIterator& other) const {
461 return element_entry_ != other.element_entry_ ||
462 subset_entry_ != other.subset_entry_ ||
463 seed_subset_ != other.seed_subset_;
464 }
465
466 // Advances the iterator to the next intersecting subset.
468 DCHECK(!at_end()) << "element_entry_ = " << element_entry_
469 << " subset_entry_ = " << subset_entry_
470 << " seed_column_size_ = " << seed_column_size_;
471 if (subset_seen_.empty()) {
472 subset_seen_.resize(model_.num_subsets(), false);
473 subset_seen_[seed_subset_] = true;
474 }
475 subset_seen_[intersecting_subset_] = true;
476 for (; element_entry_ < seed_column_size_; ++element_entry_) {
477 const ElementIndex current_element = seed_column_[element_entry_];
478 const SparseRow& current_row = rows_[current_element];
479 const RowEntryIndex current_row_size = RowEntryIndex(current_row.size());
480 for (; subset_entry_ < current_row_size; ++subset_entry_) {
481 intersecting_subset_ = current_row[subset_entry_];
482 if (!subset_seen_[intersecting_subset_]) {
483 return *this;
484 }
485 }
486 subset_entry_ = RowEntryIndex(0); // 'carriage-return'
487 }
488 return *this;
489 }
490
491 private:
492 // The model to which the iterator is applying.
493 const SetCoverModel& model_;
494
495 // The seed subset.
496 SubsetIndex seed_subset_;
497
498 // A reference to the column of the seed subset, kept here for ease of access.
499 const SparseColumn& seed_column_;
500
501 // The size of the column of the seed subset.
502 ColumnEntryIndex seed_column_size_;
503
504 // The intersecting subset.
505 SubsetIndex intersecting_subset_;
506
507 // The position of the entry in the column corresponding to `seed_subset_`.
508 ColumnEntryIndex element_entry_;
509
510 // The position of the entry in the row corresponding to `element_entry`.
511 RowEntryIndex subset_entry_;
512
513 // A reference to the rows of the model, kept here for ease of access.
514 const SparseRowView& rows_;
515
516 // A vector of Booleans indicating whether the current subset has been
517 // already seen by the iterator.
518 SubsetBoolVector subset_seen_;
519};
520
521// IntersectingSubsetsRange is a range of intersecting subsets for a fixed seed
522// subset. Can be used with range-based for-loops.
524 public:
527
528 IntersectingSubsetsRange(const SetCoverModel& model, SubsetIndex seed_subset)
529 : model_(model), seed_subset_(seed_subset) {}
530
531 iterator begin() { return IntersectingSubsetsIterator(model_, seed_subset_); }
532
534 return IntersectingSubsetsIterator(model_, seed_subset_, true);
535 }
536
538 return IntersectingSubsetsIterator(model_, seed_subset_);
539 }
540
542 return IntersectingSubsetsIterator(model_, seed_subset_, true);
543 }
544
545 private:
546 // The model to which the range is applying.
547 const SetCoverModel& model_;
548
549 // The seed subset for which the intersecting subsets are computed.
550 SubsetIndex seed_subset_;
551};
552
553} // namespace operations_research
554
555#endif // OR_TOOLS_SET_COVER_SET_COVER_MODEL_H_
bool at_end() const
Returns (true) whether the iterator is at the end.
SubsetIndex operator*() const
Returns the intersecting subset.
IntersectingSubsetsIterator & operator++()
Advances the iterator to the next intersecting subset.
bool operator!=(const IntersectingSubsetsIterator &other) const
Disequality operator.
IntersectingSubsetsIterator(const SetCoverModel &model, SubsetIndex seed_subset, bool at_end=false)
IntersectingSubsetsRange(const SetCoverModel &model, SubsetIndex seed_subset)
Main class for describing a weighted set-covering problem.
void ImportModelFromProto(const SetCoverProto &message)
Imports the model from a SetCoverProto.
Stats ComputeCostStats() const
Computes basic statistics on costs and returns a Stats structure.
std::string ToString(absl::string_view sep) const
Returns a string representation of the model, using the given separator.
double FillRate() const
Returns the fill rate of the matrix.
static SetCoverModel GenerateRandomModelFrom(const SetCoverModel &seed_model, BaseInt num_elements, BaseInt num_subsets, double row_scale, double column_scale, double cost_scale)
void ResizeNumSubsets(BaseInt num_subsets)
void ReserveNumElementsInSubset(BaseInt num_elements, BaseInt subset)
Reserves num_elements rows in the column indexed by subset.
absl::Duration create_sparse_row_view_duration() const
std::vector< int64_t > ComputeRowDeciles() const
Computes deciles on rows and returns a vector of deciles.
SetCoverModel(const absl::string_view name="SetCoverModel")
Constructs an empty weighted set-covering problem.
std::vector< SubsetIndex > ComputeSliceIndices(int num_partitions)
util_intops::StrongIntRange< ElementIndex > ElementRange() const
void AddElementToLastSubset(BaseInt element)
const SparseRowView & rows() const
Row view of the set covering problem.
std::vector< SparseRowView > CutSparseRowViewInSlices(absl::Span< const SubsetIndex > partition_points)
Stats ComputeRowStats() const
Computes basic statistics on rows and returns a Stats structure.
int64_t num_nonzeros() const
Current number of nonzeros in the matrix.
void SetName(const absl::string_view name)
util_intops::StrongIntRange< SubsetIndex > SubsetRange() const
Access to the ranges of subsets and elements.
std::string ToVerboseString(absl::string_view sep) const
absl::Duration compute_sparse_row_view_using_slices_duration() const
SparseRowView ComputeSparseRowViewSlice(SubsetIndex begin_subset, SubsetIndex end_subset)
void AddElementToSubset(BaseInt element, BaseInt subset)
void SetSubsetCost(BaseInt subset, Cost cost)
absl::Duration generation_duration() const
Accessors to the durations of the different stages of the model creation.
bool row_view_is_valid() const
Returns true if rows_ and columns_ represent the same problem.
const SubsetCostVector & subset_costs() const
Vector of costs for each subset.
SetCoverProto ExportModelAsProto() const
Stats ComputeColumnStats() const
Computes basic statistics on columns and returns a Stats structure.
std::vector< int64_t > ComputeColumnDeciles() const
Computes deciles on columns and returns a vector of deciles.
const SparseColumnView & columns() const
Column view of the set covering problem.
SparseRowView ReduceSparseRowViewSlices(absl::Span< const SparseRowView > row_slices)
std::vector< SubsetIndex > all_subsets() const
Returns the list of indices for all the subsets in the model.
In SWIG mode, we don't want anything besides these top-level includes.
util_intops::StrongVector< RowEntryIndex, SubsetIndex > SparseRow
Definition base_types.h:62
util_intops::StrongVector< SubsetIndex, Cost > SubsetCostVector
Definition base_types.h:58
util_intops::StrongVector< SubsetIndex, bool > SubsetBoolVector
Definition base_types.h:71
util_intops::StrongVector< SubsetIndex, SparseColumn > SparseColumnView
Views of the sparse vectors.
Definition base_types.h:68
double Cost
Basic non-strict type for cost. The speed penalty for using double is ~2%.
Definition base_types.h:32
util_intops::StrongVector< ColumnEntryIndex, ElementIndex > SparseColumn
Definition base_types.h:61
util_intops::StrongVector< ElementIndex, SparseRow > SparseRowView
Definition base_types.h:69
std::string ToVerboseString(absl::string_view sep) const
std::string ToString(absl::string_view sep) const
Returns a string representation of the stats, using the given separator.