25#include "absl/log/check.h"
26#include "ortools/algorithms/set_cover.pb.h"
31void SetCoverModel::UpdateAllSubsetsList() {
32 const BaseInt old_size = all_subsets_.size();
36 all_subsets_[subset] = SubsetIndex(subset);
43 all_subsets_.push_back(SubsetIndex(num_subsets_));
48 row_view_is_valid_ =
false;
52 columns_.back().
push_back(ElementIndex(element));
53 num_elements_ = std::max(num_elements_, element + 1);
56 row_view_is_valid_ =
false;
64 CHECK(std::isfinite(cost));
66 num_subsets_ = std::max(num_subsets_, subset + 1);
68 subset_costs_.
resize(num_subsets_, 0.0);
69 subset_costs_[SubsetIndex(subset)] = cost;
70 UpdateAllSubsetsList();
71 row_view_is_valid_ =
false;
79 num_subsets_ = std::max(num_subsets_, subset + 1);
80 subset_costs_.
resize(num_subsets_, 0.0);
82 UpdateAllSubsetsList();
83 columns_[SubsetIndex(subset)].
push_back(ElementIndex(element));
84 num_elements_ = std::max(num_elements_, element + 1);
86 row_view_is_valid_ =
false;
96 num_subsets_ = std::max(num_subsets_, number_of_subsets);
98 subset_costs_.
resize(num_subsets_, 0.0);
113 SubsetIndex subset) {
118 if (row_view_is_valid_) {
126 std::sort(columns_[subset].begin(), columns_[subset].
end());
127 for (
const ElementIndex element : columns_[subset]) {
128 ++row_sizes[element];
132 rows_[element].
reserve(RowEntryIndex(row_sizes[element]));
135 for (
const ElementIndex element : columns_[subset]) {
139 row_view_is_valid_ =
true;
149 for (
const Cost cost : subset_costs_) {
153 CHECK_GT(
column.size(), 0);
154 for (
const ElementIndex element :
column) {
159 CHECK_GE(coverage[element], 0);
160 if (coverage[element] == 0) {
164 VLOG(1) <<
"Max possible coverage = "
165 << *std::max_element(coverage.begin(), coverage.end());
167 CHECK_EQ(all_subsets_[subset.value()], subset) <<
"subset = " << subset;
175 SetCoverProto::Subset* subset_proto =
message.add_subset();
176 subset_proto->set_cost(subset_costs_[subset]);
177 std::sort(columns_[subset].begin(), columns_[subset].
end());
178 for (
const ElementIndex element : columns_[subset]) {
179 subset_proto->add_element(element.value());
187 subset_costs_.clear();
189 SubsetIndex subset_index(0);
190 for (
const SetCoverProto::Subset& subset_proto :
message.subset()) {
191 subset_costs_[subset_index] = subset_proto.cost();
192 if (subset_proto.element_size() > 0) {
193 columns_[subset_index].
reserve(
194 ColumnEntryIndex(subset_proto.element_size()));
195 for (
const BaseInt element : subset_proto.element()) {
196 columns_[subset_index].
push_back(ElementIndex(element));
197 num_elements_ = std::max(num_elements_, element + 1);
202 UpdateAllSubsetsList();
210double StandardDeviation(
const std::vector<T>& values) {
211 const size_t size = values.size();
213 double sum_of_squares = 0.0;
215 for (
size_t i = 0; i <
size; ++i) {
216 double sample =
static_cast<double>(values[i]);
217 if (sample == 0.0)
continue;
218 sum_of_squares += sample * sample;
222 return n == 0.0 ? 0.0 : sqrt((sum_of_squares - sum * sum / n) / n);
226SetCoverModel::Stats
ComputeStats(std::vector<T> sizes) {
227 SetCoverModel::Stats stats;
228 stats.min = *std::min_element(sizes.begin(), sizes.end());
229 stats.max = *std::max_element(sizes.begin(), sizes.end());
230 stats.mean = std::accumulate(sizes.begin(), sizes.end(), 0.0) / sizes.size();
231 std::nth_element(sizes.begin(), sizes.begin() + sizes.size() / 2,
233 stats.median = sizes[sizes.size() / 2];
234 stats.stddev = StandardDeviation(sizes);
239std::vector<T> ComputeDeciles(std::vector<T> values) {
240 const int kNumDeciles = 10;
241 std::vector<T> deciles;
242 deciles.reserve(kNumDeciles);
243 for (
int i = 1;
i <= kNumDeciles; ++
i) {
244 const size_t point = values.size() *
i / kNumDeciles - 1;
245 std::nth_element(values.begin(), values.begin() + point, values.end());
246 deciles.push_back(values[point]);
254 std::copy(subset_costs_.begin(), subset_costs_.end(),
subset_costs.begin());
261 for (
const ElementIndex element :
column) {
262 ++row_sizes[element.value()];
265 return ComputeStats(std::move(row_sizes));
269 std::vector<ssize_t> column_sizes(columns_.size());
271 column_sizes[subset.value()] = columns_[subset].size();
273 return ComputeStats(std::move(column_sizes));
279 for (
const ElementIndex element :
column) {
280 ++row_sizes[element.value()];
283 return ComputeDeciles(std::move(row_sizes));
287 std::vector<ssize_t> column_sizes(columns_.size());
289 column_sizes[subset.value()] = columns_[subset].size();
291 return ComputeDeciles(std::move(column_sizes));
void ImportModelFromProto(const SetCoverProto &message)
Imports the model from a SetCoverProto.
void ReserveNumElementsInSubset(BaseInt num_elements, BaseInt subset)
Reserves num_elements rows in the column indexed by subset.
Stats ComputeColumnStats()
Computes basic statistics on columns and returns a Stats structure.
util_intops::StrongIntRange< ElementIndex > ElementRange() const
void AddElementToLastSubset(BaseInt element)
Stats ComputeCostStats()
Computes basic statistics on costs and returns a Stats structure.
void AddEmptySubset(Cost cost)
Stats ComputeRowStats()
Computes basic statistics on rows and returns a Stats structure.
std::vector< ssize_t > ComputeColumnDeciles() const
Computes deciles on columns and returns a vector of deciles.
void ReserveNumSubsets(BaseInt num_subsets)
Reserves num_subsets columns in the model.
SetCoverProto ExportModelAsProto()
BaseInt num_elements() const
util_intops::StrongIntRange< SubsetIndex > SubsetRange() const
Access to the ranges of subsets and elements.
void AddElementToSubset(BaseInt element, BaseInt subset)
std::vector< ssize_t > ComputeRowDeciles() const
Computes deciles on rows and returns a vector of deciles.
void SetSubsetCost(BaseInt subset, Cost cost)
bool ComputeFeasibility() const
const SubsetCostVector & subset_costs() const
Vector of costs for each subset.
void CreateSparseRowView()
Creates the sparse ("dual") representation of the problem.
BaseInt num_subsets() const
void push_back(const value_type &val)
void reserve(size_type n)
void resize(size_type new_size)
QuadraticProgramStats ComputeStats(const ShardedQuadraticProgram &qp)
Returns a QuadraticProgramStats for a ShardedQuadraticProgram.
In SWIG mode, we don't want anything besides these top-level includes.
util_intops::StrongVector< ColumnEntryIndex, ElementIndex, ElementAllocator > SparseColumn
double Cost
Basic non-strict type for cost. The speed penalty for using double is ~2%.
util_intops::StrongVector< RowEntryIndex, SubsetIndex, SubsetAllocator > SparseRow
std::optional< int64_t > end