Google OR-Tools v9.11
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
set_cover_model.h
Go to the documentation of this file.
1// Copyright 2010-2024 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14#ifndef OR_TOOLS_ALGORITHMS_SET_COVER_MODEL_H_
15#define OR_TOOLS_ALGORITHMS_SET_COVER_MODEL_H_
16
17#if defined(_MSC_VER)
18#include <BaseTsd.h>
19typedef SSIZE_T ssize_t;
20#else
21#include <sys/types.h>
22#endif // defined(_MSC_VER)
23
24#include <string>
25#include <vector>
26
27#include "absl/log/check.h"
28#include "absl/strings/str_cat.h"
29#include "ortools/algorithms/set_cover.pb.h"
33
34// Representation class for the weighted set-covering problem.
35//
36// Let E be a "universe" set, let (S_j) be a family (j in J) of subsets of E,
37// and c_j costs associated to each S_j. Note that J = {j in 1..|S|}.
38//
39// The minimum-cost set-covering problem consists in finding K (for covering),
40// a subset of J such that the union of all the S_j for k in K is equal to E
41// (the subsets indexed by K "cover" E), while minimizing total cost sum c_k (k
42// in K).
43//
44// In Mixed-Integer Programming and matrix terms, the goal is to find values
45// of binary variables x_j, where x_j is 1 when subset S_j is in K, 0
46// otherwise, that minimize the sum of c_j * x_j subject to M.x >= 1. Each row
47// corresponds to an element in E.
48//
49// The matrix M for linear constraints is defined as follows:
50// - it has as many rows as there are elements in E.
51// - its columns are such that M(i, j) = 1 iff the i-th element of E is present
52// in S_j.
53//
54// We alse use m to denote |E|, the number of elements, and n to denote |S|, the
55// number of subsets.
56// Finally, nnz or #nz denotes the numbers of non-zeros, i.e. the sum of the
57// cardinalities of all the subsets.
58
59namespace operations_research {
60// Basic non-strict type for cost. The speed penalty for using double is ~2%.
61using Cost = double;
62
63// Base non-strict integer type for counting elements and subsets.
64// Using ints makes it possible to represent problems with more than 2 billion
65// (2e9) elements and subsets. If need arises one day, BaseInt can be split
66// into SubsetBaseInt and ElementBaseInt.
67// Quick testing has shown a slowdown of about 20-25% when using int64_t.
68using BaseInt = int;
69
70// We make heavy use of strong typing to avoid obvious mistakes.
71// Subset index.
73
74// Element index.
76
77// Position in a vector. The vector may either represent a column, i.e. a
78// subset with all its elements, or a row, i,e. the list of subsets which
79// contain a given element.
82
86
87// SIMD operations require vectors to be aligned at 64-bytes on x86-64
88// processors as of 2024-05-03.
89// TODO(user): improve the code to make it possible to use unaligned memory.
90constexpr int kSetCoverAlignmentInBytes = 64;
91
97
102
107
113
114// Views of the sparse vectors. These need not be aligned as it's their contents
115// that need to be aligned.
118
120
121// Main class for describing a weighted set-covering problem.
123 public:
124 // Constructs an empty weighted set-covering problem.
126 : num_elements_(0),
127 num_subsets_(0),
128 num_nonzeros_(0),
129 row_view_is_valid_(false),
130 subset_costs_(),
131 columns_(),
132 rows_(),
133 all_subsets_() {}
134
135 // Current number of elements to be covered in the model, i.e. the number of
136 // elements in S. In matrix terms, this is the number of rows.
137 BaseInt num_elements() const { return num_elements_; }
138
139 // Current number of subsets in the model. In matrix terms, this is the
140 // number of columns.
141 BaseInt num_subsets() const { return num_subsets_; }
142
143 // Current number of nonzeros in the matrix.
144 ssize_t num_nonzeros() const { return num_nonzeros_; }
145
146 double FillRate() const {
147 return 1.0 * num_nonzeros() / (1.0 * num_elements() * num_subsets());
148 }
149
150 // Vector of costs for each subset.
151 const SubsetCostVector& subset_costs() const { return subset_costs_; }
152
153 // Column view of the set covering problem.
154 const SparseColumnView& columns() const { return columns_; }
155
156 // Row view of the set covering problem.
157 const SparseRowView& rows() const {
158 DCHECK(row_view_is_valid_);
159 return rows_;
160 }
161
162 // Returns true if rows_ and columns_ represent the same problem.
163 bool row_view_is_valid() const { return row_view_is_valid_; }
164
165 // Access to the ranges of subsets and elements.
169
172 ElementIndex(num_elements_));
173 }
174
175 // Returns the list of indices for all the subsets in the model.
176 std::vector<SubsetIndex> all_subsets() const { return all_subsets_; }
177
178 // Adds an empty subset with a cost to the problem. In matrix terms, this
179 // adds a column to the matrix.
180 void AddEmptySubset(Cost cost);
181
182 // Adds an element to the last subset created. In matrix terms, this adds a
183 // 1 on row 'element' of the current last column of the matrix.
184 void AddElementToLastSubset(BaseInt element);
185 void AddElementToLastSubset(ElementIndex element);
186
187 // Sets 'cost' to an already existing 'subset'.
188 // This will CHECK-fail if cost is infinite or a NaN.
189 void SetSubsetCost(BaseInt subset, Cost cost);
190 void SetSubsetCost(SubsetIndex subset, Cost cost);
191
192 // Adds 'element' to an already existing 'subset'.
193 // No check is done if element is already in the subset.
194 void AddElementToSubset(BaseInt element, BaseInt subset);
195 void AddElementToSubset(ElementIndex element, SubsetIndex subset);
196
197 // Creates the sparse ("dual") representation of the problem.
198 void CreateSparseRowView();
199
200 // Returns true if the problem is feasible, i.e. if the subsets cover all
201 // the elements.
202 bool ComputeFeasibility() const;
203
204 // Reserves num_subsets columns in the model.
206 void ReserveNumSubsets(SubsetIndex num_subsets);
207
208 // Reserves num_elements rows in the column indexed by subset.
211 SubsetIndex subset);
212
213 // Returns the model as a SetCoverProto. The function is not const because
214 // the element indices in the columns need to be sorted for the representation
215 // as a protobuf to be canonical.
216 SetCoverProto ExportModelAsProto();
217
218 // Imports the model from a SetCoverProto.
219 void ImportModelFromProto(const SetCoverProto& message);
220
221 // A struct enabling to show basic statistics on rows and columns.
222 // The meaning of the fields is obvious.
223 struct Stats {
224 double min;
225 double max;
226 double median;
227 double mean;
228 double stddev;
229
230 std::string DebugString() const {
231 return absl::StrCat("min = ", min, ", max = ", max, ", mean = ", mean,
232 ", median = ", median, ", stddev = ", stddev, ", ");
233 }
234 };
235
236 // Computes basic statistics on costs and returns a Stats structure.
237 Stats ComputeCostStats();
238
239 // Computes basic statistics on rows and returns a Stats structure.
240 Stats ComputeRowStats();
241
242 // Computes basic statistics on columns and returns a Stats structure.
243 Stats ComputeColumnStats();
244
245 // Computes deciles on rows and returns a vector of deciles.
246 std::vector<ssize_t> ComputeRowDeciles() const;
247
248 // Computes deciles on columns and returns a vector of deciles.
249 std::vector<ssize_t> ComputeColumnDeciles() const;
250
251 private:
252 // Updates the all_subsets_ vector so that it always contains 0 to
253 // columns.size() - 1
254 void UpdateAllSubsetsList();
255
256 // Number of elements.
257 BaseInt num_elements_;
258
259 // Number of subsets. Maintained for ease of access.
260 BaseInt num_subsets_;
261
262 // Number of nonzeros in the matrix.
263 ssize_t num_nonzeros_;
264
265 // True when the SparseRowView is up-to-date.
266 bool row_view_is_valid_;
267
268 // Costs for each subset.
269
270 SubsetCostVector subset_costs_;
271
272 // Vector of columns. Each column corresponds to a subset and contains the
273 // elements of the given subset.
274 // This takes nnz (number of non-zeros) BaseInts, or |E| * |S| * fill_rate.
275 // On classical benchmarks, the fill rate is in the 2 to 5% range.
276 // Some synthetic benchmarks have fill rates of 20%, while benchmarks for
277 // rail rotations have a fill rate of 0.2 to 0.4%.
278 // TODO(user): try using a compressed representation like Protocol Buffers,
279 // since the data is only iterated upon.
280 SparseColumnView columns_;
281
282 // Vector of rows. Each row corresponds to an element and contains the
283 // subsets containing the element.
284 // The size is exactly the same as for columns_.
285 SparseRowView rows_;
286
287 // Vector of indices from 0 to columns.size() - 1. (Like std::iota, but built
288 // incrementally.) Used to (un)focus optimization algorithms on the complete
289 // problem.
290 // This takes |S| BaseInts.
291 // TODO(user): use this to enable deletion and recycling of columns/subsets.
292 // TODO(user): replace this with an iterator?
293 std::vector<SubsetIndex> all_subsets_;
294};
295
296// The IntersectingSubsetsIterator is a forward iterator that returns the next
297// intersecting subset for a fixed seed_subset.
298// The iterator is initialized with a model and a seed_subset and
299// allows a speedup in getting the intersecting subsets
300// by not storing them in memory.
301// The iterator is at the end when the last intersecting subset has been
302// returned.
303// TODO(user): Add the possibility for range-for loops.
305 public:
307 SubsetIndex seed_subset)
308 : intersecting_subset_(-1),
309 element_entry_(0),
310 subset_entry_(0),
311 seed_subset_(seed_subset),
312 model_(model),
313 subset_seen_(model_.columns().size(), false) {
314 CHECK(model_.row_view_is_valid());
315 subset_seen_[seed_subset] = true; // Avoid iterating on `seed_subset`.
316 ++(*this); // Move to the first intersecting subset.
317 }
318
319 // Returns (true) whether the iterator is at the end.
320 bool at_end() const {
321 return element_entry_.value() == model_.columns()[seed_subset_].size();
322 }
323
324 // Returns the intersecting subset.
325 SubsetIndex operator*() const { return intersecting_subset_; }
326
327 // Move the iterator to the next intersecting subset.
329 DCHECK(model_.row_view_is_valid());
330 DCHECK(!at_end());
331 const SparseRowView& rows = model_.rows();
332 const SparseColumn& column = model_.columns()[seed_subset_];
333 for (; element_entry_ < ColumnEntryIndex(column.size()); ++element_entry_) {
334 const ElementIndex current_element = column[element_entry_];
335 const SparseRow& current_row = rows[current_element];
336 for (; subset_entry_ < RowEntryIndex(current_row.size());
337 ++subset_entry_) {
338 intersecting_subset_ = current_row[subset_entry_];
339 if (!subset_seen_[intersecting_subset_]) {
340 subset_seen_[intersecting_subset_] = true;
341 return *this;
342 }
343 }
344 subset_entry_ = RowEntryIndex(0); // 'carriage-return'
345 }
346 return *this;
347 }
348
349 private:
350 // The intersecting subset.
351 SubsetIndex intersecting_subset_;
352
353 // The position of the entry in the column corresponding to `seed_subset_`.
354 ColumnEntryIndex element_entry_;
355
356 // The position of the entry in the row corresponding to `element_entry`.
357 RowEntryIndex subset_entry_;
358
359 // The seed subset.
360 SubsetIndex seed_subset_;
361
362 // The model to which the iterator is applying.
363 const SetCoverModel& model_;
364
365 // A vector of Booleans indicating whether the current subset has been
366 // already seen by the iterator.
367 SubsetBoolVector subset_seen_;
368};
369
370} // namespace operations_research
371
372#endif // OR_TOOLS_ALGORITHMS_SET_COVER_MODEL_H_
IntegerValue size
bool at_end() const
Returns (true) whether the iterator is at the end.
SubsetIndex operator*() const
Returns the intersecting subset.
IntersectingSubsetsIterator & operator++()
Move the iterator to the next intersecting subset.
IntersectingSubsetsIterator(const SetCoverModel &model, SubsetIndex seed_subset)
Main class for describing a weighted set-covering problem.
void ImportModelFromProto(const SetCoverProto &message)
Imports the model from a SetCoverProto.
void ReserveNumElementsInSubset(BaseInt num_elements, BaseInt subset)
Reserves num_elements rows in the column indexed by subset.
Stats ComputeColumnStats()
Computes basic statistics on columns and returns a Stats structure.
util_intops::StrongIntRange< ElementIndex > ElementRange() const
void AddElementToLastSubset(BaseInt element)
Stats ComputeCostStats()
Computes basic statistics on costs and returns a Stats structure.
Stats ComputeRowStats()
Computes basic statistics on rows and returns a Stats structure.
std::vector< ssize_t > ComputeColumnDeciles() const
Computes deciles on columns and returns a vector of deciles.
const SparseRowView & rows() const
Row view of the set covering problem.
void ReserveNumSubsets(BaseInt num_subsets)
Reserves num_subsets columns in the model.
ssize_t num_nonzeros() const
Current number of nonzeros in the matrix.
util_intops::StrongIntRange< SubsetIndex > SubsetRange() const
Access to the ranges of subsets and elements.
void AddElementToSubset(BaseInt element, BaseInt subset)
std::vector< ssize_t > ComputeRowDeciles() const
Computes deciles on rows and returns a vector of deciles.
void SetSubsetCost(BaseInt subset, Cost cost)
bool row_view_is_valid() const
Returns true if rows_ and columns_ represent the same problem.
const SubsetCostVector & subset_costs() const
Vector of costs for each subset.
void CreateSparseRowView()
Creates the sparse ("dual") representation of the problem.
SetCoverModel()
Constructs an empty weighted set-covering problem.
const SparseColumnView & columns() const
Column view of the set covering problem.
std::vector< SubsetIndex > all_subsets() const
Returns the list of indices for all the subsets in the model.
GRBmodel * model
In SWIG mode, we don't want anything besides these top-level includes.
constexpr int kSetCoverAlignmentInBytes
double Cost
Basic non-strict type for cost. The speed penalty for using double is ~2%.
int column
#define DEFINE_STRONG_INT_TYPE(int_type_name, value_type)
Definition strong_int.h:168
std::string message
Definition trace.cc:397