26#include "absl/log/check.h"
27#include "absl/numeric/bits.h"
28#include "absl/random/discrete_distribution.h"
29#include "absl/random/distributions.h"
30#include "absl/random/random.h"
31#include "absl/strings/str_format.h"
32#include "absl/types/span.h"
34#include "ortools/algorithms/set_cover.pb.h"
44BaseInt DiscreteAffine(absl::BitGen& bitgen,
45 absl::discrete_distribution<BaseInt>& dist,
BaseInt min,
46 double scaling_factor) {
47 const BaseInt raw_value = dist(bitgen);
48 const double random_term = absl::Uniform<double>(bitgen, 0, 1.0);
51 std::floor((raw_value - min + random_term) * scaling_factor)) +
59template <
typename View>
60std::tuple<BaseInt, std::vector<BaseInt>> ComputeSizeHistogram(
63 BaseInt min_size = std::numeric_limits<BaseInt>::max();
64 for (
const auto& vec : view) {
65 const BaseInt size = vec.size();
66 min_size = std::min(min_size, size);
67 max_size = std::max(max_size, size);
69 std::vector<BaseInt> weights(max_size + 1, 0);
70 for (
const auto& vec : view) {
71 const BaseInt size = vec.size();
74 return {min_size, weights};
77template <
typename View>
78std::tuple<BaseInt, absl::discrete_distribution<BaseInt>>
79ComputeSizeDistribution(
const View& view) {
80 const auto [min_size, weights] = ComputeSizeHistogram(view);
81 absl::discrete_distribution<BaseInt> dist(weights.begin(), weights.end());
82 return {min_size, dist};
88 double row_scale,
double column_scale,
double cost_scale) {
90 DCHECK_GT(row_scale, 0.0);
91 DCHECK_GT(column_scale, 0.0);
92 DCHECK_GT(cost_scale, 0.0);
94 model.num_nonzeros_ = 0;
96 model.UpdateAllSubsetsList();
101 auto [min_column_size, column_dist] =
102 ComputeSizeDistribution(seed_model.
columns());
106 auto [min_row_size, row_dist] = ComputeSizeDistribution(seed_model.
rows());
112 for (ElementIndex element(0); element.value() <
num_elements; ++element) {
114 DiscreteAffine(bitgen, row_dist, min_row_size, row_scale);
116 absl::discrete_distribution<BaseInt> degree_dist(
degrees.begin(),
123 BaseInt num_elements_covered(0);
127 const int kMaxTries = 10;
132 for (SubsetIndex subset(0); subset.value() <
num_subsets; ++subset) {
133 LOG_EVERY_N_SEC(INFO, 5)
134 << absl::StrFormat(
"Generating subset %d (%.1f%%)", subset.value(),
137 DiscreteAffine(bitgen, column_dist, min_column_size, column_scale);
138 model.columns_[subset].
reserve(cardinality);
139 for (
BaseInt iter = 0; iter < cardinality; ++iter) {
141 ElementIndex element;
145 element = ElementIndex(degree_dist(bitgen));
148 }
while (num_tries < kMaxTries &&
149 subset_already_contains_element[element]);
150 ++model.num_nonzeros_;
151 model.columns_[subset].
push_back(element);
152 subset_already_contains_element[element] =
true;
153 if (!contains_element[element]) {
154 contains_element[element] =
true;
155 ++num_elements_covered;
158 for (
const ElementIndex element : model.columns_[subset]) {
159 subset_already_contains_element[element] =
false;
162 LOG(INFO) <<
"Finished genreating the model with " << num_elements_covered
163 <<
" elements covered.";
168 LOG(INFO) <<
"Generated model with " <<
num_elements - num_elements_covered
169 <<
" elements that cannot be covered. Adding them to random "
172 for (ElementIndex element(0); element.value() <
num_elements; ++element) {
173 LOG_EVERY_N_SEC(INFO, 5) << absl::StrFormat(
174 "Generating subsets for element %d (%.1f%%)", element.value(),
176 if (!contains_element[element]) {
178 DiscreteAffine(bitgen, row_dist, min_row_size, row_scale);
179 std::vector<SubsetIndex> generated_subsets;
180 generated_subsets.reserve(degree);
181 for (
BaseInt i = 0; i < degree; ++i) {
183 SubsetIndex subset_index;
186 subset_index = SubsetIndex(DiscreteAffine(
187 bitgen, column_dist, min_column_size, column_scale));
189 }
while (num_tries < kMaxTries &&
190 element_already_in_subset[subset_index]);
191 ++model.num_nonzeros_;
192 model.columns_[subset_index].
push_back(element);
193 element_already_in_subset[subset_index] =
true;
194 generated_subsets.push_back(subset_index);
196 for (
const SubsetIndex subset_index : generated_subsets) {
197 element_already_in_subset[subset_index] =
false;
199 contains_element[element] =
true;
200 ++num_elements_covered;
203 LOG(INFO) <<
"Finished generating subsets for elements that were not "
204 "covered in the original model.";
206 LOG(INFO) <<
"Finished generating the model. There are "
207 <<
num_elements - num_elements_covered <<
" uncovered elements.";
214 Cost min_cost = std::numeric_limits<Cost>::infinity();
215 Cost max_cost = -min_cost;
217 min_cost = std::min(min_cost, cost);
218 max_cost = std::max(max_cost, cost);
222 const Cost cost_range = cost_scale * (max_cost - min_cost);
223 for (
Cost& cost : model.subset_costs_) {
224 cost = min_cost + absl::Uniform<double>(bitgen, 0, cost_range);
230void SetCoverModel::UpdateAllSubsetsList() {
231 const BaseInt old_size = all_subsets_.size();
235 all_subsets_[subset] = SubsetIndex(subset);
240 elements_in_subsets_are_sorted_ =
false;
241 subset_costs_.push_back(cost);
243 all_subsets_.push_back(SubsetIndex(num_subsets_));
248 row_view_is_valid_ =
false;
252 elements_in_subsets_are_sorted_ =
false;
253 columns_.back().push_back(ElementIndex(element));
254 num_elements_ = std::max(num_elements_, element + 1);
257 row_view_is_valid_ =
false;
265 elements_in_subsets_are_sorted_ =
false;
266 CHECK(std::isfinite(cost));
267 DCHECK_GE(subset, 0);
269 num_subsets_ = std::max(num_subsets_, subset + 1);
271 subset_costs_.resize(num_subsets_, 0.0);
272 row_view_is_valid_ =
false;
273 UpdateAllSubsetsList();
275 subset_costs_[SubsetIndex(subset)] = cost;
283 elements_in_subsets_are_sorted_ =
false;
285 num_subsets_ = subset + 1;
286 subset_costs_.resize(num_subsets_, 0.0);
288 UpdateAllSubsetsList();
290 columns_[SubsetIndex(subset)].push_back(ElementIndex(element));
291 num_elements_ = std::max(num_elements_, element + 1);
293 row_view_is_valid_ =
false;
297 SubsetIndex subset) {
303 num_subsets_ = std::max(num_subsets_,
num_subsets);
305 subset_costs_.resize(num_subsets_, 0.0);
306 UpdateAllSubsetsList();
317 columns_[SubsetIndex(subset)].reserve(ColumnEntryIndex(
num_elements));
321 SubsetIndex subset) {
328 BaseInt* data =
reinterpret_cast<BaseInt*
>(columns_[subset].data());
329 RadixSort(absl::MakeSpan(data, columns_[subset].size()));
331 elements_in_subsets_are_sorted_ =
true;
335 if (row_view_is_valid_) {
338 rows_.resize(num_elements_,
SparseRow());
343 BaseInt* data =
reinterpret_cast<BaseInt*
>(columns_[subset].data());
344 RadixSort(absl::MakeSpan(data, columns_[subset].size()));
346 ElementIndex preceding_element(-1);
347 for (
const ElementIndex element : columns_[subset]) {
348 DCHECK_GT(element, preceding_element);
349 ++row_sizes[element];
350 preceding_element = element;
354 rows_[element].reserve(RowEntryIndex(row_sizes[element]));
357 for (
const ElementIndex element : columns_[subset]) {
358 rows_[element].emplace_back(subset);
361 row_view_is_valid_ =
true;
362 elements_in_subsets_are_sorted_ =
true;
372 for (
const Cost cost : subset_costs_) {
376 CHECK_GT(column.size(), 0);
377 for (
const ElementIndex element : column) {
382 CHECK_GE(coverage[element], 0);
383 if (coverage[element] == 0) {
387 VLOG(1) <<
"Max possible coverage = "
388 << *std::max_element(coverage.begin(), coverage.end());
390 CHECK_EQ(all_subsets_[subset.value()], subset) <<
"subset = " << subset;
396 CHECK(elements_in_subsets_are_sorted_);
397 SetCoverProto message;
399 LOG_EVERY_N_SEC(INFO, 5)
400 << absl::StrFormat(
"Exporting subset %d (%.1f%%)", subset.value(),
402 SetCoverProto::Subset* subset_proto = message.add_subset();
403 subset_proto->set_cost(subset_costs_[subset]);
407 RadixSort(absl::MakeSpan(data, column.size()));
408 for (
const ElementIndex element : column) {
409 subset_proto->add_element(element.value());
412 LOG(INFO) <<
"Finished exporting the model.";
418 subset_costs_.clear();
420 SubsetIndex subset_index(0);
421 for (
const SetCoverProto::Subset& subset_proto : message.subset()) {
422 subset_costs_[subset_index] = subset_proto.cost();
423 if (subset_proto.element_size() > 0) {
424 columns_[subset_index].reserve(
425 ColumnEntryIndex(subset_proto.element_size()));
426 for (
const BaseInt element : subset_proto.element()) {
427 columns_[subset_index].push_back(ElementIndex(element));
428 num_elements_ = std::max(num_elements_, element + 1);
433 UpdateAllSubsetsList();
441double StandardDeviation(
const std::vector<T>& values) {
442 const size_t size = values.size();
444 double sum_of_squares = 0.0;
446 for (
size_t i = 0; i < size; ++i) {
447 double sample =
static_cast<double>(values[i]);
448 if (sample == 0.0)
continue;
449 sum_of_squares += sample * sample;
455 return n == 0.0 ? 0.0 : sqrt((sum_of_squares - sum * sum / n) / n);
463class StatsAccumulator {
470 sum_of_squares_(0.0) {}
472 void Register(
double value) {
474 min_ = std::min(min_, value);
475 max_ = std::max(max_, value);
477 sum_of_squares_ += value * value;
481 const int64_t n = count_;
484 const double stddev =
485 n == 0 ? 0.0 : sqrt((sum_of_squares_ - sum_ * sum_ / n) / n);
486 return SetCoverModel::Stats{min_, max_, 0.0, sum_ / n, stddev};
490 static constexpr double kInfinity = std::numeric_limits<double>::infinity();
495 double sum_of_squares_;
502 stats.
min = *std::min_element(sizes.begin(), sizes.end());
503 stats.
max = *std::max_element(sizes.begin(), sizes.end());
504 stats.
mean = std::accumulate(sizes.begin(), sizes.end(), 0.0) / sizes.size();
505 std::nth_element(sizes.begin(), sizes.begin() + sizes.size() / 2,
507 stats.
median = sizes[sizes.size() / 2];
508 stats.
stddev = StandardDeviation(sizes);
514 const int kNumDeciles = 10;
515 std::vector<T> deciles(kNumDeciles, T{0});
516 const double step = values.size() / kNumDeciles;
517 for (
int i = 1; i < kNumDeciles; ++i) {
518 const size_t point = std::clamp<double>(i * step, 0, values.size() - 1);
519 std::nth_element(values.begin(), values.begin() + point, values.end());
520 deciles[i] = values[point];
527 std::copy(subset_costs_.begin(), subset_costs_.end(),
subset_costs.begin());
534 for (
const ElementIndex element : column) {
535 ++row_sizes[element.value()];
542 std::vector<int64_t> column_sizes(columns_.size());
544 column_sizes[subset.value()] = columns_[subset].size();
552 for (
const ElementIndex element : column) {
553 ++row_sizes[element.value()];
560 std::vector<int64_t> column_sizes(columns_.size());
562 column_sizes[subset.value()] = columns_[subset].size();
570 const uint64_t u = x == 0 ? 1 :
static_cast<uint64_t
>(x);
571 return (std::numeric_limits<uint64_t>::digits - absl::countl_zero(u) + 6) / 7;
576 StatsAccumulator acc;
578 int64_t previous = 0;
579 for (
const ElementIndex element : column) {
580 const int64_t delta = element.value() - previous;
581 previous = element.value();
582 acc.Register(Base128SizeInBytes(delta));
585 return acc.ComputeStats();
589 StatsAccumulator acc;
591 int64_t previous = 0;
592 for (
const SubsetIndex subset : row) {
593 const int64_t delta = subset.value() - previous;
594 previous = subset.value();
595 acc.Register(Base128SizeInBytes(delta));
598 return acc.ComputeStats();
void ImportModelFromProto(const SetCoverProto &message)
Imports the model from a SetCoverProto.
static SetCoverModel GenerateRandomModelFrom(const SetCoverModel &seed_model, BaseInt num_elements, BaseInt num_subsets, double row_scale, double column_scale, double cost_scale)
void ReserveNumElementsInSubset(BaseInt num_elements, BaseInt subset)
Reserves num_elements rows in the column indexed by subset.
Stats ComputeColumnStats()
Computes basic statistics on columns and returns a Stats structure.
std::vector< int64_t > ComputeRowDeciles() const
Computes deciles on rows and returns a vector of deciles.
util_intops::StrongIntRange< ElementIndex > ElementRange() const
void AddElementToLastSubset(BaseInt element)
Stats ComputeCostStats()
Computes basic statistics on costs and returns a Stats structure.
void AddEmptySubset(Cost cost)
Stats ComputeRowStats()
Computes basic statistics on rows and returns a Stats structure.
const SparseRowView & rows() const
Row view of the set covering problem.
void ReserveNumSubsets(BaseInt num_subsets)
Reserves num_subsets columns in the model.
BaseInt num_elements() const
void SortElementsInSubsets()
util_intops::StrongIntRange< SubsetIndex > SubsetRange() const
Access to the ranges of subsets and elements.
Stats ComputeColumnDeltaSizeStats() const
void AddElementToSubset(BaseInt element, BaseInt subset)
Stats ComputeRowDeltaSizeStats() const
void SetSubsetCost(BaseInt subset, Cost cost)
bool ComputeFeasibility() const
const SubsetCostVector & subset_costs() const
Vector of costs for each subset.
void CreateSparseRowView()
SetCoverProto ExportModelAsProto() const
SetCoverModel()
Constructs an empty weighted set-covering problem.
std::vector< int64_t > ComputeColumnDeciles() const
Computes deciles on columns and returns a vector of deciles.
const SparseColumnView & columns() const
Column view of the set covering problem.
BaseInt num_subsets() const
value_type * data()
– Pass-through methods to STL vector ----------------------------------—
void push_back(const value_type &val)
void reserve(size_type n)
In SWIG mode, we don't want anything besides these top-level includes.
util_intops::StrongVector< RowEntryIndex, SubsetIndex > SparseRow
util_intops::StrongVector< ElementIndex, BaseInt > ElementToIntVector
util_intops::StrongVector< SubsetIndex, bool > SubsetBoolVector
util_intops::StrongVector< ElementIndex, bool > ElementBoolVector
double Cost
Basic non-strict type for cost. The speed penalty for using double is ~2%.
std::vector< T > ComputeDeciles(std::vector< T > values)
void RadixSort(absl::Span< T > values)
util_intops::StrongVector< ColumnEntryIndex, ElementIndex > SparseColumn
SetCoverModel::Stats ComputeStats(std::vector< T > sizes)
trees with all degrees equal w the current value of degrees