27#include "absl/log/check.h"
28#include "absl/log/log.h"
29#include "absl/numeric/bits.h"
30#include "absl/random/discrete_distribution.h"
31#include "absl/random/distributions.h"
32#include "absl/random/random.h"
33#include "absl/strings/str_format.h"
34#include "absl/strings/str_join.h"
35#include "absl/strings/string_view.h"
36#include "absl/types/span.h"
39#include "ortools/set_cover/set_cover.pb.h"
48BaseInt DiscreteAffine(absl::BitGen& bitgen,
49 absl::discrete_distribution<BaseInt>& dist,
BaseInt min,
50 double scaling_factor) {
51 const BaseInt raw_value = dist(bitgen);
52 const double random_term = absl::Uniform<double>(bitgen, 0, 1.0);
55 std::floor((raw_value - min + random_term) * scaling_factor)) +
63template <
typename View>
64std::tuple<BaseInt, std::vector<BaseInt>> ComputeSizeHistogram(
67 BaseInt min_size = std::numeric_limits<BaseInt>::max();
68 for (
const auto& vec : view) {
69 const BaseInt size = vec.size();
70 min_size = std::min(min_size, size);
71 max_size = std::max(max_size, size);
73 std::vector<BaseInt> weights(max_size + 1, 0);
74 for (
const auto& vec : view) {
75 const BaseInt size = vec.size();
78 return {min_size, weights};
81template <
typename View>
82std::tuple<BaseInt, absl::discrete_distribution<BaseInt>>
83ComputeSizeDistribution(
const View& view) {
84 const auto [min_size, weights] = ComputeSizeHistogram(view);
85 absl::discrete_distribution<BaseInt> dist(weights.begin(), weights.end());
86 return {min_size, dist};
92 double row_scale,
double column_scale,
double cost_scale) {
94 StopWatch stop_watch(&(model.generation_duration_));
95 DCHECK_GT(row_scale, 0.0);
96 DCHECK_GT(column_scale, 0.0);
97 DCHECK_GT(cost_scale, 0.0);
99 model.num_nonzeros_ = 0;
101 model.UpdateAllSubsetsList();
106 auto [min_column_size, column_dist] =
107 ComputeSizeDistribution(seed_model.
columns());
111 auto [min_row_size, row_dist] = ComputeSizeDistribution(seed_model.
rows());
117 for (ElementIndex element(0); element.value() <
num_elements; ++element) {
119 DiscreteAffine(bitgen, row_dist, min_row_size, row_scale);
121 absl::discrete_distribution<BaseInt> degree_dist(
degrees.begin(),
128 BaseInt num_elements_covered(0);
132 const int kMaxTries = 10;
137 for (SubsetIndex subset(0); subset.value() <
num_subsets; ++subset) {
138 LOG_EVERY_N_SEC(INFO, 5)
139 << absl::StrFormat(
"Generating subset %d (%.1f%%)", subset.value(),
142 DiscreteAffine(bitgen, column_dist, min_column_size, column_scale);
143 model.columns_[subset].
reserve(cardinality);
144 for (
BaseInt iter = 0; iter < cardinality; ++iter) {
146 ElementIndex element;
150 element = ElementIndex(degree_dist(bitgen));
153 }
while (num_tries < kMaxTries &&
154 subset_already_contains_element[element]);
155 ++model.num_nonzeros_;
156 model.columns_[subset].
push_back(element);
157 subset_already_contains_element[element] =
true;
158 if (!contains_element[element]) {
159 contains_element[element] =
true;
160 ++num_elements_covered;
163 for (
const ElementIndex element : model.columns_[subset]) {
164 subset_already_contains_element[element] =
false;
167 VLOG(1) <<
"Finished generating the model with " << num_elements_covered
168 <<
" elements covered.";
173 VLOG(1) <<
"Generated model with " <<
num_elements - num_elements_covered
174 <<
" elements that cannot be covered. Adding them to random "
177 for (ElementIndex element(0); element.value() <
num_elements; ++element) {
178 VLOG_EVERY_N_SEC(1, 5) << absl::StrFormat(
179 "Generating subsets for element %d (%.1f%%)", element.value(),
181 if (!contains_element[element]) {
183 DiscreteAffine(bitgen, row_dist, min_row_size, row_scale);
184 std::vector<SubsetIndex> generated_subsets;
185 generated_subsets.reserve(degree);
186 for (
BaseInt i = 0; i < degree; ++i) {
188 SubsetIndex subset_index;
191 subset_index = SubsetIndex(DiscreteAffine(
192 bitgen, column_dist, min_column_size, column_scale));
194 }
while (num_tries < kMaxTries &&
195 element_already_in_subset[subset_index]);
196 ++model.num_nonzeros_;
197 model.columns_[subset_index].
push_back(element);
198 element_already_in_subset[subset_index] =
true;
199 generated_subsets.push_back(subset_index);
201 for (
const SubsetIndex subset_index : generated_subsets) {
202 element_already_in_subset[subset_index] =
false;
204 contains_element[element] =
true;
205 ++num_elements_covered;
208 VLOG(1) <<
"Finished generating subsets for elements that were not "
209 "covered in the original model.";
211 VLOG(1) <<
"Finished generating the model. There are "
212 <<
num_elements - num_elements_covered <<
" uncovered elements.";
219 Cost min_cost = std::numeric_limits<Cost>::infinity();
220 Cost max_cost = -min_cost;
222 min_cost = std::min(min_cost, cost);
223 max_cost = std::max(max_cost, cost);
227 const Cost cost_range = cost_scale * (max_cost - min_cost);
228 for (
Cost& cost : model.subset_costs_) {
229 cost = min_cost + absl::Uniform<double>(bitgen, 0, cost_range);
235void SetCoverModel::UpdateAllSubsetsList() {
236 const BaseInt old_size = all_subsets_.size();
240 all_subsets_[subset] = SubsetIndex(subset);
246 is_unicost_valid_ =
false;
247 elements_in_subsets_are_sorted_ =
false;
248 subset_costs_.push_back(cost);
250 all_subsets_.push_back(SubsetIndex(num_subsets_));
255 row_view_is_valid_ =
false;
259 elements_in_subsets_are_sorted_ =
false;
260 columns_.back().push_back(ElementIndex(element));
261 num_elements_ = std::max(num_elements_, element + 1);
264 row_view_is_valid_ =
false;
274 is_unicost_valid_ =
false;
275 elements_in_subsets_are_sorted_ =
false;
276 CHECK(std::isfinite(cost));
277 DCHECK_GE(subset, 0);
279 num_subsets_ = std::max(num_subsets_, subset + 1);
281 subset_costs_.resize(num_subsets_, 0.0);
282 row_view_is_valid_ =
false;
283 UpdateAllSubsetsList();
285 subset_costs_[SubsetIndex(subset)] = cost;
293 elements_in_subsets_are_sorted_ =
false;
295 num_subsets_ = subset + 1;
296 subset_costs_.resize(num_subsets_, 0.0);
298 UpdateAllSubsetsList();
300 columns_[SubsetIndex(subset)].push_back(ElementIndex(element));
301 num_elements_ = std::max(num_elements_, element + 1);
303 row_view_is_valid_ =
false;
307 SubsetIndex subset) {
312 num_subsets_ = std::max(num_subsets_,
num_subsets);
314 subset_costs_.resize(num_subsets_, 0.0);
315 UpdateAllSubsetsList();
325 columns_[SubsetIndex(subset)].reserve(ColumnEntryIndex(
num_elements));
331 BaseInt* data =
reinterpret_cast<BaseInt*
>(columns_[subset].data());
332 RadixSort(absl::MakeSpan(data, columns_[subset].size()));
334 elements_in_subsets_are_sorted_ =
true;
338 StopWatch stop_watch(&create_sparse_row_view_duration_);
339 if (row_view_is_valid_) {
340 VLOG(1) <<
"CreateSparseRowView: already valid";
343 VLOG(1) <<
"CreateSparseRowView started";
344 rows_.resize(num_elements_,
SparseRow());
347 BaseInt* data =
reinterpret_cast<BaseInt*
>(columns_[subset].data());
348 RadixSort(absl::MakeSpan(data, columns_[subset].size()));
350 ElementIndex preceding_element(-1);
351 for (
const ElementIndex element : columns_[subset]) {
352 CHECK_GT(element, preceding_element)
353 <<
"Repetition in column "
355 ++row_sizes[element];
356 preceding_element = element;
360 rows_[element].reserve(RowEntryIndex(row_sizes[element]));
363 for (
const ElementIndex element : columns_[subset]) {
364 rows_[element].emplace_back(subset);
367 row_view_is_valid_ =
true;
368 elements_in_subsets_are_sorted_ =
true;
369 VLOG(1) <<
"CreateSparseRowView finished";
373 int num_partitions) {
374 if (num_partitions <= 1 || columns_.empty()) {
375 return {SubsetIndex(columns_.size())};
378 std::vector<BaseInt> partial_sum_nnz(columns_.size());
379 partial_sum_nnz[0] = columns_[SubsetIndex(0)].size();
380 for (
BaseInt col = 1; col < columns_.size(); ++col) {
381 partial_sum_nnz[col] =
382 partial_sum_nnz[col - 1] + columns_[SubsetIndex(col)].size();
384 const BaseInt total_nnz = partial_sum_nnz.back();
385 const BaseInt target_nnz = (total_nnz + num_partitions - 1) / num_partitions;
387 std::vector<SubsetIndex> partition_points(num_partitions);
388 BaseInt threshold = target_nnz;
391 if (partial_sum_nnz[col.value()] >= threshold) {
392 partition_points[pos] = col;
394 threshold += target_nnz;
397 partition_points[num_partitions - 1] = SubsetIndex(columns_.size());
398 return partition_points;
402 SubsetIndex end_subset) {
404 rows.reserve(num_elements_);
406 for (SubsetIndex subset = begin_subset; subset < end_subset; ++subset) {
407 BaseInt* data =
reinterpret_cast<BaseInt*
>(columns_[subset].data());
408 RadixSort(absl::MakeSpan(data, columns_[subset].size()));
410 ElementIndex preceding_element(-1);
411 for (
const ElementIndex element : columns_[subset]) {
412 CHECK_GT(element, preceding_element)
413 <<
"Repetition in column "
415 ++row_sizes[element];
416 preceding_element = element;
420 rows[element].reserve(RowEntryIndex(row_sizes[element]));
422 for (SubsetIndex subset = begin_subset; subset < end_subset; ++subset) {
423 for (
const ElementIndex element : columns_[subset]) {
424 rows[element].emplace_back(subset);
431 absl::Span<const SubsetIndex> partition_points) {
432 std::vector<SparseRowView> row_views;
433 row_views.reserve(partition_points.size());
434 SubsetIndex begin_subset(0);
436 for (SubsetIndex end_subset : partition_points) {
438 begin_subset = end_subset;
444 absl::Span<const SparseRowView> slices) {
447 result_rows.
reserve(num_elements_);
452 result_rows[element].
insert(result_rows[element].
end(),
453 slice[element].
begin(), slice[element].
end());
460 StopWatch stop_watch(&compute_sparse_row_view_using_slices_duration_);
462 VLOG(1) <<
"CreateSparseRowViewUsingSlices started";
463 const std::vector<SubsetIndex> partition_points =
465 const std::vector<SparseRowView> slices =
468 VLOG(1) <<
"CreateSparseRowViewUsingSlices finished";
473 StopWatch stop_watch(&feasibility_duration_);
479 for (
const Cost cost : subset_costs_) {
483 SubsetIndex column_index(0);
485 DLOG_IF(INFO, column.empty()) <<
"Empty column " << column_index.value();
486 for (
const ElementIndex element : column) {
487 ++possible_coverage[element];
492 int64_t num_uncoverable_elements = 0;
495 if (possible_coverage[element] == 0) {
496 ++num_uncoverable_elements;
499 VLOG(1) <<
"num_uncoverable_elements = " << num_uncoverable_elements;
501 if (possible_coverage[element] > 0)
continue;
502 LOG(ERROR) <<
"Element " << element <<
" is not covered.";
505 VLOG(1) <<
"Max possible coverage = "
506 << *std::max_element(possible_coverage.begin(),
507 possible_coverage.end());
509 if (all_subsets_[subset.value()] == subset)
continue;
510 LOG(ERROR) <<
"subset = " << subset <<
" all_subsets_[subset.value()] = "
511 << all_subsets_[subset.value()];
518 CHECK(elements_in_subsets_are_sorted_);
519 SetCoverProto message;
521 VLOG_EVERY_N_SEC(1, 5) << absl::StrFormat(
522 "Exporting subset %d (%.1f%%)", subset.value(),
524 SetCoverProto::Subset* subset_proto = message.add_subset();
525 subset_proto->set_cost(subset_costs_[subset]);
528 RadixSort(absl::MakeSpan(data, column.size()));
529 for (
const ElementIndex element : column) {
530 subset_proto->add_element(element.value());
533 VLOG(1) <<
"Finished exporting the model.";
539 subset_costs_.clear();
541 SubsetIndex subset_index(0);
542 for (
const SetCoverProto::Subset& subset_proto : message.subset()) {
543 subset_costs_[subset_index] = subset_proto.cost();
544 if (subset_proto.element_size() > 0) {
545 columns_[subset_index].reserve(
546 ColumnEntryIndex(subset_proto.element_size()));
547 for (
const BaseInt element : subset_proto.element()) {
548 columns_[subset_index].push_back(ElementIndex(element));
549 num_elements_ = std::max(num_elements_, element + 1);
554 UpdateAllSubsetsList();
559 return absl::StrJoin(
560 std::make_tuple(
"num_elements",
num_elements(),
"num_subsets",
576double StandardDeviation(
const std::vector<T>& values) {
577 const size_t size = values.size();
579 double sum_of_squares = 0.0;
581 for (
size_t i = 0; i < size; ++i) {
582 double sample =
static_cast<double>(values[i]);
583 if (sample == 0.0)
continue;
584 sum_of_squares += sample * sample;
590 return n == 0.0 ? 0.0 : sqrt((sum_of_squares - sum * sum / n) / n);
598class StatsAccumulator {
605 sum_of_squares_(0.0) {}
607 void Register(
double value) {
609 min_ = std::min(min_, value);
610 max_ = std::max(max_, value);
612 sum_of_squares_ += value * value;
616 const int64_t n = count_;
619 const double stddev =
620 n == 0 ? 0.0 : sqrt((sum_of_squares_ - sum_ * sum_ / n) / n);
621 return SetCoverModel::Stats{min_, max_, 0.0, sum_ / n, stddev};
625 static constexpr double kInfinity = std::numeric_limits<double>::infinity();
630 double sum_of_squares_;
637 stats.
min = *std::min_element(samples.begin(), samples.end());
638 stats.
max = *std::max_element(samples.begin(), samples.end());
640 std::accumulate(samples.begin(), samples.end(), 0.0) / samples.size();
641 auto const q1 = samples.size() / 4;
642 auto const q2 = samples.size() / 2;
643 auto const q3 = q1 + q2;
650 std::nth_element(samples.begin(), samples.begin() + q2, samples.end());
651 std::nth_element(samples.begin(), samples.begin() + q1, samples.begin() + q2);
652 std::nth_element(samples.begin() + q2, samples.begin() + q3, samples.end());
653 stats.
median = samples[samples.size() / 2];
654 stats.
iqr = samples[q3] - samples[q1];
655 stats.
stddev = StandardDeviation(samples);
661 const int kNumDeciles = 10;
662 std::vector<T> deciles(kNumDeciles, T{0});
663 const double step = values.size() / kNumDeciles;
664 for (
int i = 1; i < kNumDeciles; ++i) {
665 const size_t point = std::clamp<double>(i * step, 0, values.size() - 1);
666 std::nth_element(values.begin(), values.begin() + point, values.end());
667 deciles[i] = values[point];
674 std::copy(subset_costs_.begin(), subset_costs_.end(),
subset_costs.begin());
681 for (
const ElementIndex element : column) {
682 ++row_sizes[element.value()];
689 std::vector<int64_t> column_sizes(columns_.size());
691 column_sizes[subset.value()] = columns_[subset].size();
702 return absl::StrJoin(
711 for (
const ElementIndex element : column) {
712 ++row_sizes[element.value()];
719 std::vector<int64_t> column_sizes(columns_.size());
721 column_sizes[subset.value()] = columns_[subset].size();
729 const uint64_t u = x == 0 ? 1 :
static_cast<uint64_t
>(x);
730 return (std::numeric_limits<uint64_t>::digits - absl::countl_zero(u) + 6) / 7;
735 StatsAccumulator acc;
737 int64_t previous = 0;
738 for (
const ElementIndex element : column) {
739 const int64_t delta = element.value() - previous;
740 previous = element.value();
741 acc.Register(Base128SizeInBytes(delta));
744 return acc.ComputeStats();
748 StatsAccumulator acc;
750 int64_t previous = 0;
751 for (
const SubsetIndex subset : row) {
752 const int64_t delta = subset.value() - previous;
753 previous = subset.value();
754 acc.Register(Base128SizeInBytes(delta));
757 return acc.ComputeStats();
void ImportModelFromProto(const SetCoverProto &message)
Imports the model from a SetCoverProto.
SparseRowView ComputeSparseRowViewUsingSlices()
Stats ComputeCostStats() const
Computes basic statistics on costs and returns a Stats structure.
std::string ToString(absl::string_view sep) const
Returns a string representation of the model, using the given separator.
double FillRate() const
Returns the fill rate of the matrix.
static SetCoverModel GenerateRandomModelFrom(const SetCoverModel &seed_model, BaseInt num_elements, BaseInt num_subsets, double row_scale, double column_scale, double cost_scale)
void ResizeNumSubsets(BaseInt num_subsets)
void ReserveNumElementsInSubset(BaseInt num_elements, BaseInt subset)
Reserves num_elements rows in the column indexed by subset.
std::vector< int64_t > ComputeRowDeciles() const
Computes deciles on rows and returns a vector of deciles.
SetCoverModel(const absl::string_view name="SetCoverModel")
Constructs an empty weighted set-covering problem.
std::vector< SubsetIndex > ComputeSliceIndices(int num_partitions)
util_intops::StrongIntRange< ElementIndex > ElementRange() const
void AddElementToLastSubset(BaseInt element)
void AddEmptySubset(Cost cost)
const SparseRowView & rows() const
Row view of the set covering problem.
std::vector< SparseRowView > CutSparseRowViewInSlices(absl::Span< const SubsetIndex > partition_points)
Stats ComputeRowStats() const
Computes basic statistics on rows and returns a Stats structure.
int64_t num_nonzeros() const
Current number of nonzeros in the matrix.
BaseInt num_elements() const
void SortElementsInSubsets()
util_intops::StrongIntRange< SubsetIndex > SubsetRange() const
Access to the ranges of subsets and elements.
std::string ToVerboseString(absl::string_view sep) const
Stats ComputeColumnDeltaSizeStats() const
SparseRowView ComputeSparseRowViewSlice(SubsetIndex begin_subset, SubsetIndex end_subset)
bool ComputeFeasibility()
void AddElementToSubset(BaseInt element, BaseInt subset)
Stats ComputeRowDeltaSizeStats() const
void SetSubsetCost(BaseInt subset, Cost cost)
const SubsetCostVector & subset_costs() const
Vector of costs for each subset.
void CreateSparseRowView()
SetCoverProto ExportModelAsProto() const
Stats ComputeColumnStats() const
Computes basic statistics on columns and returns a Stats structure.
std::vector< int64_t > ComputeColumnDeciles() const
Computes deciles on columns and returns a vector of deciles.
const SparseColumnView & columns() const
Column view of the set covering problem.
BaseInt num_subsets() const
SparseRowView ReduceSparseRowViewSlices(absl::Span< const SparseRowView > row_slices)
value_type * data()
– Pass-through methods to STL vector ----------------------------------—
void push_back(const value_type &val)
iterator insert(const_iterator pos, const value_type &x)
void reserve(size_type n)
In SWIG mode, we don't want anything besides these top-level includes.
SetCoverModel::Stats ComputeStats(std::vector< T > samples)
util_intops::StrongVector< RowEntryIndex, SubsetIndex > SparseRow
ClosedInterval::Iterator end(ClosedInterval interval)
util_intops::StrongVector< ElementIndex, BaseInt > ElementToIntVector
util_intops::StrongVector< SubsetIndex, bool > SubsetBoolVector
void RadixSort(absl::Span< T > values, int num_bits=sizeof(T) *8)
util_intops::StrongVector< ElementIndex, bool > ElementBoolVector
double Cost
Basic non-strict type for cost. The speed penalty for using double is ~2%.
std::vector< T > ComputeDeciles(std::vector< T > values)
util_intops::StrongVector< ColumnEntryIndex, ElementIndex > SparseColumn
ClosedInterval::Iterator begin(ClosedInterval interval)
util_intops::StrongVector< ElementIndex, SparseRow > SparseRowView
trees with all degrees equal w the current value of degrees
std::string ToVerboseString(absl::string_view sep) const
std::string ToString(absl::string_view sep) const
Returns a string representation of the stats, using the given separator.