docs/cpp/set__cover__model_8cc_source.html

// Copyright 2010-2025 Google LLC

// Licensed under the Apache License, Version 2.0 (the "License");

// you may not use this file except in compliance with the License.

// You may obtain a copy of the License at

//

//     http://www.apache.org/licenses/LICENSE-2.0

//

// Unless required by applicable law or agreed to in writing, software

// distributed under the License is distributed on an "AS IS" BASIS,

// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

// See the License for the specific language governing permissions and

// limitations under the License.


#include "ortools/set_cover/set_cover_model.h"


#include <algorithm>

#include <cmath>

#include <cstddef>

#include <cstdint>

#include <limits>

#include <numeric>

#include <string>

#include <tuple>

#include <utility>

#include <vector>


#include "absl/log/check.h"

#include "absl/log/log.h"

#include "absl/numeric/bits.h"

#include "absl/random/discrete_distribution.h"

#include "absl/random/distributions.h"

#include "absl/random/random.h"

#include "absl/strings/str_format.h"

#include "absl/strings/str_join.h"

#include "absl/strings/string_view.h"

#include "absl/types/span.h"

#include "ortools/algorithms/radix_sort.h"

#include "ortools/set_cover/base_types.h"

#include "ortools/set_cover/set_cover.pb.h"


namespace operations_research {


namespace {


// Returns a value in [min, min + scaling_factor * (raw_value - min +

// random_term)], where raw_value is drawn from a discrete distribution, and

// random_term is a double drawn uniformly in [0, 1].

BaseInt DiscreteAffine(absl::BitGen& bitgen,

                       absl::discrete_distribution<BaseInt>& dist, BaseInt min,

                       double scaling_factor) {

  const BaseInt raw_value = dist(bitgen);

  const double random_term = absl::Uniform<double>(bitgen, 0, 1.0);

  const BaseInt affine_value =

      static_cast<BaseInt>(

          std::floor((raw_value - min + random_term) * scaling_factor)) +

      min;

  return affine_value;

}


// For a given view (SparseColumnView or SparseRowView), returns the

// distribution of the sizes of the vectors in the view, which can be used in

// an absl::discrete_distribution.

template <typename View>

std::tuple<BaseInt, std::vector<BaseInt>> ComputeSizeHistogram(

    const View& view) {

  BaseInt max_size = 0;

  BaseInt min_size = std::numeric_limits<BaseInt>::max();

  for (const auto& vec : view) {

    const BaseInt size = vec.size();

    min_size = std::min(min_size, size);

    max_size = std::max(max_size, size);

  }

  std::vector<BaseInt> weights(max_size + 1, 0);

  for (const auto& vec : view) {

    const BaseInt size = vec.size();

    ++weights[size];

  }

  return {min_size, weights};

}


template <typename View>

std::tuple<BaseInt, absl::discrete_distribution<BaseInt>>

ComputeSizeDistribution(const View& view) {

  const auto [min_size, weights] = ComputeSizeHistogram(view);

  absl::discrete_distribution<BaseInt> dist(weights.begin(), weights.end());

  return {min_size, dist};

}

}  // namespace


SetCoverModel SetCoverModel::GenerateRandomModelFrom(

    const SetCoverModel& seed_model, BaseInt num_elements, BaseInt num_subsets,

    double row_scale, double column_scale, double cost_scale) {

  SetCoverModel model;

  StopWatch stop_watch(&(model.generation_duration_));

  DCHECK_GT(row_scale, 0.0);

  DCHECK_GT(column_scale, 0.0);

  DCHECK_GT(cost_scale, 0.0);

  model.num_elements_ = num_elements;

  model.num_nonzeros_ = 0;

  model.ResizeNumSubsets(num_subsets);

  model.UpdateAllSubsetsList();

  absl::BitGen bitgen;


  // Create the distribution of the cardinalities of the subsets based on the

  // histogram of column sizes in the seed model.

  auto [min_column_size, column_dist] =

      ComputeSizeDistribution(seed_model.columns());


  // Create the distribution of the degrees of the elements based on the

  // histogram of row sizes in the seed model.

  auto [min_row_size, row_dist] = ComputeSizeDistribution(seed_model.rows());


  // Prepare the degrees of the elements in the generated model, and use them

  // in a distribution to generate the columns. This ponderates the columns

  // towards the elements with higher degrees. ???

  ElementToIntVector degrees(num_elements);

  for (ElementIndex element(0); element.value() < num_elements; ++element) {

    degrees[element] =

        DiscreteAffine(bitgen, row_dist, min_row_size, row_scale);

  }

  absl::discrete_distribution<BaseInt> degree_dist(degrees.begin(),

                                                   degrees.end());


  // Vector indicating whether the generated model covers an element.

  ElementBoolVector contains_element(num_elements, false);


  // Number of elements in the generated model, using the above vector.

  BaseInt num_elements_covered(0);


  // Maximum number of tries to generate a random element that is not yet in

  // the subset, or a random subset that does not contain the element.

  const int kMaxTries = 10;


  // Loop-local vector indicating whether the currently generated subset

  // contains an element.

  ElementBoolVector subset_already_contains_element(num_elements, false);

  for (SubsetIndex subset(0); subset.value() < num_subsets; ++subset) {

    LOG_EVERY_N_SEC(INFO, 5)

        << absl::StrFormat("Generating subset %d (%.1f%%)", subset.value(),

                           100.0 * subset.value() / num_subsets);

    const BaseInt cardinality =

        DiscreteAffine(bitgen, column_dist, min_column_size, column_scale);

    model.columns_[subset].reserve(cardinality);

    for (BaseInt iter = 0; iter < cardinality; ++iter) {

      int num_tries = 0;

      ElementIndex element;

      // Choose an element that is not yet in the subset at random with a

      // distribution that is proportional to the degree of the element.

      do {

        element = ElementIndex(degree_dist(bitgen));

        CHECK_LT(element.value(), num_elements);

        ++num_tries;

      } while (num_tries < kMaxTries &&

               subset_already_contains_element[element]);

      ++model.num_nonzeros_;

      model.columns_[subset].push_back(element);

      subset_already_contains_element[element] = true;

      if (!contains_element[element]) {

        contains_element[element] = true;

        ++num_elements_covered;

      }

    }

    for (const ElementIndex element : model.columns_[subset]) {

      subset_already_contains_element[element] = false;

    }

  }

  VLOG(1) << "Finished generating the model with " << num_elements_covered

          << " elements covered.";


  // It can happen -- rarely in practice -- that some of the elements cannot be

  // covered. Let's add them to randomly chosen subsets.

  if (num_elements_covered != num_elements) {

    VLOG(1) << "Generated model with " << num_elements - num_elements_covered

            << " elements that cannot be covered. Adding them to random "

               "subsets.";

    SubsetBoolVector element_already_in_subset(num_subsets, false);

    for (ElementIndex element(0); element.value() < num_elements; ++element) {

      VLOG_EVERY_N_SEC(1, 5) << absl::StrFormat(

          "Generating subsets for element %d (%.1f%%)", element.value(),

          100.0 * element.value() / num_elements);

      if (!contains_element[element]) {

        const BaseInt degree =

            DiscreteAffine(bitgen, row_dist, min_row_size, row_scale);

        std::vector<SubsetIndex> generated_subsets;

        generated_subsets.reserve(degree);

        for (BaseInt i = 0; i < degree; ++i) {

          int num_tries = 0;

          SubsetIndex subset_index;

          // The method is the same as above.

          do {

            subset_index = SubsetIndex(DiscreteAffine(

                bitgen, column_dist, min_column_size, column_scale));

            ++num_tries;

          } while (num_tries < kMaxTries &&

                   element_already_in_subset[subset_index]);

          ++model.num_nonzeros_;

          model.columns_[subset_index].push_back(element);

          element_already_in_subset[subset_index] = true;

          generated_subsets.push_back(subset_index);

        }

        for (const SubsetIndex subset_index : generated_subsets) {

          element_already_in_subset[subset_index] = false;

        }

        contains_element[element] = true;

        ++num_elements_covered;

      }

    }

    VLOG(1) << "Finished generating subsets for elements that were not "

               "covered in the original model.";

  }

  VLOG(1) << "Finished generating the model. There are "

          << num_elements - num_elements_covered << " uncovered elements.";


  CHECK_EQ(num_elements_covered, num_elements);


  // TODO(user): if necessary, use a better distribution for the costs.

  // The generation of the costs is done in two steps. First, compute the

  // minimum and maximum costs.

  Cost min_cost = std::numeric_limits<Cost>::infinity();

  Cost max_cost = -min_cost;

  for (const Cost cost : seed_model.subset_costs()) {

    min_cost = std::min(min_cost, cost);

    max_cost = std::max(max_cost, cost);

  }

  // Then, generate random numbers in [min_cost, min_cost + cost_range], where

  // cost_range is defined as:

  const Cost cost_range = cost_scale * (max_cost - min_cost);

  for (Cost& cost : model.subset_costs_) {

    cost = min_cost + absl::Uniform<double>(bitgen, 0, cost_range);

  }

  model.CreateSparseRowView();

  return model;

}


void SetCoverModel::UpdateAllSubsetsList() {

  const BaseInt old_size = all_subsets_.size();

  DCHECK_LE(old_size, num_subsets());

  all_subsets_.resize(num_subsets());

  for (BaseInt subset(old_size); subset < num_subsets(); ++subset) {

    all_subsets_[subset] = SubsetIndex(subset);

  }

}


void SetCoverModel::AddEmptySubset(Cost cost) {

  // TODO(user): refine the logic for is_unicost_ and is_unicost_valid_.

  is_unicost_valid_ = false;

  elements_in_subsets_are_sorted_ = false;

  subset_costs_.push_back(cost);

  columns_.push_back(SparseColumn());

  all_subsets_.push_back(SubsetIndex(num_subsets_));

  ++num_subsets_;

  CHECK_EQ(columns_.size(), num_subsets());

  CHECK_EQ(subset_costs_.size(), num_subsets());

  CHECK_EQ(all_subsets_.size(), num_subsets());

  row_view_is_valid_ = false;

}


void SetCoverModel::AddElementToLastSubset(BaseInt element) {

  elements_in_subsets_are_sorted_ = false;

  columns_.back().push_back(ElementIndex(element));

  num_elements_ = std::max(num_elements_, element + 1);

  // No need to update the list all_subsets_.

  ++num_nonzeros_;

  row_view_is_valid_ = false;

}


void SetCoverModel::AddElementToLastSubset(ElementIndex element) {

  AddElementToLastSubset(element.value());

}


void SetCoverModel::SetSubsetCost(BaseInt subset, Cost cost) {

  // TODO(user): refine the logic for is_unicost_ and is_unicost_valid_.

  // NOMUTANTS -- this is a performance optimization.

  is_unicost_valid_ = false;

  elements_in_subsets_are_sorted_ = false;

  CHECK(std::isfinite(cost));

  DCHECK_GE(subset, 0);

  if (subset >= num_subsets()) {

    num_subsets_ = std::max(num_subsets_, subset + 1);

    columns_.resize(num_subsets_, SparseColumn());

    subset_costs_.resize(num_subsets_, 0.0);

    row_view_is_valid_ = false;

    UpdateAllSubsetsList();

  }

  subset_costs_[SubsetIndex(subset)] = cost;

}


void SetCoverModel::SetSubsetCost(SubsetIndex subset, Cost cost) {

  SetSubsetCost(subset.value(), cost);

}


void SetCoverModel::AddElementToSubset(BaseInt element, BaseInt subset) {

  elements_in_subsets_are_sorted_ = false;

  if (subset >= num_subsets()) {

    num_subsets_ = subset + 1;

    subset_costs_.resize(num_subsets_, 0.0);

    columns_.resize(num_subsets_, SparseColumn());

    UpdateAllSubsetsList();

  }

  columns_[SubsetIndex(subset)].push_back(ElementIndex(element));

  num_elements_ = std::max(num_elements_, element + 1);

  ++num_nonzeros_;

  row_view_is_valid_ = false;

}


void SetCoverModel::AddElementToSubset(ElementIndex element,

                                       SubsetIndex subset) {

  AddElementToSubset(element.value(), subset.value());

}


void SetCoverModel::ResizeNumSubsets(BaseInt num_subsets) {

  num_subsets_ = std::max(num_subsets_, num_subsets);

  columns_.resize(num_subsets_, SparseColumn());

  subset_costs_.resize(num_subsets_, 0.0);

  UpdateAllSubsetsList();

}


void SetCoverModel::ResizeNumSubsets(SubsetIndex num_subsets) {

  ResizeNumSubsets(num_subsets.value());

}


void SetCoverModel::ReserveNumElementsInSubset(BaseInt num_elements,

                                               BaseInt subset) {

  ResizeNumSubsets(subset);

  columns_[SubsetIndex(subset)].reserve(ColumnEntryIndex(num_elements));

}


void SetCoverModel::SortElementsInSubsets() {

  for (const SubsetIndex subset : SubsetRange()) {

    // std::sort(columns_[subset].begin(), columns_[subset].end());

    BaseInt* data = reinterpret_cast<BaseInt*>(columns_[subset].data());

    RadixSort(absl::MakeSpan(data, columns_[subset].size()));

  }

  elements_in_subsets_are_sorted_ = true;

}


void SetCoverModel::CreateSparseRowView() {

  StopWatch stop_watch(&create_sparse_row_view_duration_);

  if (row_view_is_valid_) {

    VLOG(1) << "CreateSparseRowView: already valid";

    return;

  }

  VLOG(1) << "CreateSparseRowView started";

  rows_.resize(num_elements_, SparseRow());

  ElementToIntVector row_sizes(num_elements_, 0);

  for (const SubsetIndex subset : SubsetRange()) {

    BaseInt* data = reinterpret_cast<BaseInt*>(columns_[subset].data());

    RadixSort(absl::MakeSpan(data, columns_[subset].size()));


    ElementIndex preceding_element(-1);

    for (const ElementIndex element : columns_[subset]) {

      CHECK_GT(element, preceding_element)

          << "Repetition in column "

          << subset;  // Fail if there is a repetition.

      ++row_sizes[element];

      preceding_element = element;

    }

  }

  for (const ElementIndex element : ElementRange()) {

    rows_[element].reserve(RowEntryIndex(row_sizes[element]));

  }

  for (const SubsetIndex subset : SubsetRange()) {

    for (const ElementIndex element : columns_[subset]) {

      rows_[element].emplace_back(subset);

    }

  }

  row_view_is_valid_ = true;

  elements_in_subsets_are_sorted_ = true;

  VLOG(1) << "CreateSparseRowView finished";

}


std::vector<SubsetIndex> SetCoverModel::ComputeSliceIndices(

    int num_partitions) {

  if (num_partitions <= 1 || columns_.empty()) {

    return {SubsetIndex(columns_.size())};

  }


  std::vector<BaseInt> partial_sum_nnz(columns_.size());

  partial_sum_nnz[0] = columns_[SubsetIndex(0)].size();

  for (BaseInt col = 1; col < columns_.size(); ++col) {

    partial_sum_nnz[col] =

        partial_sum_nnz[col - 1] + columns_[SubsetIndex(col)].size();

  }

  const BaseInt total_nnz = partial_sum_nnz.back();

  const BaseInt target_nnz = (total_nnz + num_partitions - 1) / num_partitions;


  std::vector<SubsetIndex> partition_points(num_partitions);

  BaseInt threshold = target_nnz;

  BaseInt pos = 0;

  for (const SubsetIndex col : SubsetRange()) {

    if (partial_sum_nnz[col.value()] >= threshold) {

      partition_points[pos] = col;

      ++pos;

      threshold += target_nnz;

    }

  }

  partition_points[num_partitions - 1] = SubsetIndex(columns_.size());

  return partition_points;

}


SparseRowView SetCoverModel::ComputeSparseRowViewSlice(SubsetIndex begin_subset,

                                                       SubsetIndex end_subset) {

  SparseRowView rows;

  rows.reserve(num_elements_);

  ElementToIntVector row_sizes(num_elements_, 0);

  for (SubsetIndex subset = begin_subset; subset < end_subset; ++subset) {

    BaseInt* data = reinterpret_cast<BaseInt*>(columns_[subset].data());

    RadixSort(absl::MakeSpan(data, columns_[subset].size()));


    ElementIndex preceding_element(-1);

    for (const ElementIndex element : columns_[subset]) {

      CHECK_GT(element, preceding_element)

          << "Repetition in column "

          << subset;  // Fail if there is a repetition.

      ++row_sizes[element];

      preceding_element = element;

    }

  }

  for (const ElementIndex element : ElementRange()) {

    rows[element].reserve(RowEntryIndex(row_sizes[element]));

  }

  for (SubsetIndex subset = begin_subset; subset < end_subset; ++subset) {

    for (const ElementIndex element : columns_[subset]) {

      rows[element].emplace_back(subset);

    }

  }

  return rows;

}


std::vector<SparseRowView> SetCoverModel::CutSparseRowViewInSlices(

    absl::Span<const SubsetIndex> partition_points) {

  std::vector<SparseRowView> row_views;

  row_views.reserve(partition_points.size());

  SubsetIndex begin_subset(0);

  // This should be done in parallel. It is a bottleneck.

  for (SubsetIndex end_subset : partition_points) {

    row_views.push_back(ComputeSparseRowViewSlice(begin_subset, end_subset));

    begin_subset = end_subset;

  }

  return row_views;

}


SparseRowView SetCoverModel::ReduceSparseRowViewSlices(

    absl::Span<const SparseRowView> slices) {

  SparseRowView result_rows;

  // This is not a ReduceTree. This will be done later through parallelism.

  result_rows.reserve(num_elements_);

  ElementToIntVector row_sizes(num_elements_, 0);

  for (const SparseRowView& slice : slices) {

    // This should done as a reduce tree, in parallel.

    for (const ElementIndex element : ElementRange()) {

      result_rows[element].insert(result_rows[element].end(),

                                  slice[element].begin(), slice[element].end());

    }

  }

  return result_rows;

}


SparseRowView SetCoverModel::ComputeSparseRowViewUsingSlices() {

  StopWatch stop_watch(&compute_sparse_row_view_using_slices_duration_);

  SparseRowView rows;

  VLOG(1) << "CreateSparseRowViewUsingSlices started";

  const std::vector<SubsetIndex> partition_points =

      ComputeSliceIndices(num_subsets());

  const std::vector<SparseRowView> slices =

      CutSparseRowViewInSlices(partition_points);

  rows = ReduceSparseRowViewSlices(slices);

  VLOG(1) << "CreateSparseRowViewUsingSlices finished";

  return rows;

}


bool SetCoverModel::ComputeFeasibility() {

  StopWatch stop_watch(&feasibility_duration_);

  CHECK_GT(num_elements(), 0);

  CHECK_GT(num_subsets(), 0);

  CHECK_EQ(columns_.size(), num_subsets());

  CHECK_EQ(subset_costs_.size(), num_subsets());

  CHECK_EQ(all_subsets_.size(), num_subsets());

  for (const Cost cost : subset_costs_) {

    CHECK_GT(cost, 0.0);

  }

  ElementToIntVector possible_coverage(num_elements_, 0);

  SubsetIndex column_index(0);

  for (const SparseColumn& column : columns_) {

    DLOG_IF(INFO, column.empty()) << "Empty column " << column_index.value();

    for (const ElementIndex element : column) {

      ++possible_coverage[element];

    }

    // NOMUTANTS -- column_index is only used for logging in debug mode.

    ++column_index;

  }

  int64_t num_uncoverable_elements = 0;

  for (const ElementIndex element : ElementRange()) {

    // NOMUTANTS -- num_uncoverable_elements is only used for logging.

    if (possible_coverage[element] == 0) {

      ++num_uncoverable_elements;

    }

  }

  VLOG(1) << "num_uncoverable_elements = " << num_uncoverable_elements;

  for (const ElementIndex element : ElementRange()) {

    if (possible_coverage[element] > 0) continue;

    LOG(ERROR) << "Element " << element << " is not covered.";

    return false;

  }

  VLOG(1) << "Max possible coverage = "

          << *std::max_element(possible_coverage.begin(),

                               possible_coverage.end());

  for (const SubsetIndex subset : SubsetRange()) {

    if (all_subsets_[subset.value()] == subset) continue;

    LOG(ERROR) << "subset = " << subset << " all_subsets_[subset.value()] = "

               << all_subsets_[subset.value()];

    return false;

  }

  return true;

}


SetCoverProto SetCoverModel::ExportModelAsProto() const {

  CHECK(elements_in_subsets_are_sorted_);

  SetCoverProto message;

  for (const SubsetIndex subset : SubsetRange()) {

    VLOG_EVERY_N_SEC(1, 5) << absl::StrFormat(

        "Exporting subset %d (%.1f%%)", subset.value(),

        100.0 * subset.value() / num_subsets());

    SetCoverProto::Subset* subset_proto = message.add_subset();

    subset_proto->set_cost(subset_costs_[subset]);

    SparseColumn column = columns_[subset];  // Copy is intentional.

    BaseInt* data = reinterpret_cast<BaseInt*>(column.data());

    RadixSort(absl::MakeSpan(data, column.size()));

    for (const ElementIndex element : column) {

      subset_proto->add_element(element.value());

    }

  }

  VLOG(1) << "Finished exporting the model.";

  return message;

}


void SetCoverModel::ImportModelFromProto(const SetCoverProto& message) {

  columns_.clear();

  subset_costs_.clear();

  ResizeNumSubsets(message.subset_size());

  SubsetIndex subset_index(0);

  for (const SetCoverProto::Subset& subset_proto : message.subset()) {

    subset_costs_[subset_index] = subset_proto.cost();

    if (subset_proto.element_size() > 0) {

      columns_[subset_index].reserve(

          ColumnEntryIndex(subset_proto.element_size()));

      for (const BaseInt element : subset_proto.element()) {

        columns_[subset_index].push_back(ElementIndex(element));

        num_elements_ = std::max(num_elements_, element + 1);

      }

      ++subset_index;

    }

  }

  UpdateAllSubsetsList();

  CreateSparseRowView();

}


std::string SetCoverModel::ToVerboseString(absl::string_view sep) const {

  return absl::StrJoin(

      std::make_tuple("num_elements", num_elements(), "num_subsets",

                      num_subsets(), "num_nonzeros", num_nonzeros(),

                      "fill_rate", FillRate()),

      sep);

}


std::string SetCoverModel::ToString(absl::string_view sep) const {

  return absl::StrJoin(std::make_tuple(num_elements(), num_subsets(),

                                       num_nonzeros(), FillRate()),

                       sep);

}


namespace {

// Returns the standard deviation of the vector v, excluding those values that

// are zero.

template <typename T>

double StandardDeviation(const std::vector<T>& values) {

  const size_t size = values.size();

  double n = 0.0;  // n is used in a calculation involving doubles.

  double sum_of_squares = 0.0;

  double sum = 0.0;

  for (size_t i = 0; i < size; ++i) {

    double sample = static_cast<double>(values[i]);

    if (sample == 0.0) continue;

    sum_of_squares += sample * sample;

    sum += sample;

    ++n;

  }

  // Since we know all the values, we can compute the standard deviation

  // exactly.

  return n == 0.0 ? 0.0 : sqrt((sum_of_squares - sum * sum / n) / n);

}


// Statistics accumulation class used to compute statistics on the deltas of

// the row and column elements and their sizes in bytes.

// Since the values are not all stored, it's not possible to compute the median

// exactly. It is returned as 0.0. NaN would be a better choice, but it's just

// not a good idea as NaNs can propagate and cause problems.

class StatsAccumulator {

 public:

  StatsAccumulator()

      : count_(0),

        min_(kInfinity),

        max_(-kInfinity),

        sum_(0.0),

        sum_of_squares_(0.0) {}


  void Register(double value) {

    ++count_;

    min_ = std::min(min_, value);

    max_ = std::max(max_, value);

    sum_ += value;

    sum_of_squares_ += value * value;

  }


  SetCoverModel::Stats ComputeStats() const {

    const int64_t n = count_;

    // Since the code is used on a known number of values, we can compute the

    // standard deviation exactly, even if the values are not all stored.

    const double stddev =

        n == 0 ? 0.0 : sqrt((sum_of_squares_ - sum_ * sum_ / n) / n);

    return SetCoverModel::Stats{min_, max_, 0.0, sum_ / n, stddev};

  }


 private:

  static constexpr double kInfinity = std::numeric_limits<double>::infinity();

  int64_t count_;

  double min_;

  double max_;

  double sum_;

  double sum_of_squares_;

};

}  // namespace


template <typename T>


SetCoverModel::Stats ComputeStats(std::vector<T> samples) {

  SetCoverModel::Stats stats;

  stats.min = *std::min_element(samples.begin(), samples.end());

  stats.max = *std::max_element(samples.begin(), samples.end());

  stats.mean =

      std::accumulate(samples.begin(), samples.end(), 0.0) / samples.size();

  auto const q1 = samples.size() / 4;

  auto const q2 = samples.size() / 2;

  auto const q3 = q1 + q2;

  // The first call to nth_element is O(n). The 2nd and 3rd calls are O(n / 2).

  // Basically it's equivalent to running nth_element twice.

  // One should be tempted to use a faster sorting algorithm like radix sort,

  // it is not sure that we would gain a lot. There would be no gain in

  // complexity whatsoever anyway. On top of that, this code is called at most

  // one per problem, so it's not worth the effort.

  std::nth_element(samples.begin(), samples.begin() + q2, samples.end());

  std::nth_element(samples.begin(), samples.begin() + q1, samples.begin() + q2);

  std::nth_element(samples.begin() + q2, samples.begin() + q3, samples.end());

  stats.median = samples[samples.size() / 2];

  stats.iqr = samples[q3] - samples[q1];

  stats.stddev = StandardDeviation(samples);

  return stats;

}


template <typename T>


std::vector<T> ComputeDeciles(std::vector<T> values) {

  const int kNumDeciles = 10;

  std::vector<T> deciles(kNumDeciles, T{0});

  const double step = values.size() / kNumDeciles;

  for (int i = 1; i < kNumDeciles; ++i) {

    const size_t point = std::clamp<double>(i * step, 0, values.size() - 1);

    std::nth_element(values.begin(), values.begin() + point, values.end());

    deciles[i] = values[point];

  }

  return deciles;

}


SetCoverModel::Stats SetCoverModel::ComputeCostStats() const {

  std::vector<Cost> subset_costs(num_subsets());

  std::copy(subset_costs_.begin(), subset_costs_.end(), subset_costs.begin());

  return ComputeStats(std::move(subset_costs));

}


SetCoverModel::Stats SetCoverModel::ComputeRowStats() const {

  std::vector<int64_t> row_sizes(num_elements(), 0);

  for (const SparseColumn& column : columns_) {

    for (const ElementIndex element : column) {

      ++row_sizes[element.value()];

    }

  }

  return ComputeStats(std::move(row_sizes));

}


SetCoverModel::Stats SetCoverModel::ComputeColumnStats() const {

  std::vector<int64_t> column_sizes(columns_.size());

  for (const SubsetIndex subset : SubsetRange()) {

    column_sizes[subset.value()] = columns_[subset].size();

  }

  return ComputeStats(std::move(column_sizes));

}


std::string SetCoverModel::Stats::ToString(absl::string_view sep) const {

  return absl::StrJoin(std::make_tuple(min, max, median, mean, stddev, iqr),

                       sep);

}


std::string SetCoverModel::Stats::ToVerboseString(absl::string_view sep) const {

  return absl::StrJoin(

      std::make_tuple("min", min, "max", max, "median", median, "mean", mean,

                      "stddev", stddev, "iqr", iqr),

      sep);

}


std::vector<int64_t> SetCoverModel::ComputeRowDeciles() const {

  std::vector<int64_t> row_sizes(num_elements(), 0);

  for (const SparseColumn& column : columns_) {

    for (const ElementIndex element : column) {

      ++row_sizes[element.value()];

    }

  }

  return ComputeDeciles(std::move(row_sizes));

}


std::vector<int64_t> SetCoverModel::ComputeColumnDeciles() const {

  std::vector<int64_t> column_sizes(columns_.size());

  for (const SubsetIndex subset : SubsetRange()) {

    column_sizes[subset.value()] = columns_[subset].size();

  }

  return ComputeDeciles(std::move(column_sizes));

}


namespace {

// Returns the number of bytes needed to store x with a base-128 encoding.

BaseInt Base128SizeInBytes(BaseInt x) {

  const uint64_t u = x == 0 ? 1 : static_cast<uint64_t>(x);

  return (std::numeric_limits<uint64_t>::digits - absl::countl_zero(u) + 6) / 7;

}

}  // namespace


SetCoverModel::Stats SetCoverModel::ComputeColumnDeltaSizeStats() const {

  StatsAccumulator acc;

  for (const SparseColumn& column : columns_) {

    int64_t previous = 0;

    for (const ElementIndex element : column) {

      const int64_t delta = element.value() - previous;

      previous = element.value();

      acc.Register(Base128SizeInBytes(delta));

    }

  }

  return acc.ComputeStats();

}


SetCoverModel::Stats SetCoverModel::ComputeRowDeltaSizeStats() const {

  StatsAccumulator acc;

  for (const SparseRow& row : rows_) {

    int64_t previous = 0;

    for (const SubsetIndex subset : row) {

      const int64_t delta = subset.value() - previous;

      previous = subset.value();

      acc.Register(Base128SizeInBytes(delta));

    }

  }

  return acc.ComputeStats();

}


}  // namespace operations_research

base_types.h

operations_research::SetCoverModel::ImportModelFromProto
void ImportModelFromProto(const SetCoverProto &message)
Imports the model from a SetCoverProto.
Definition set_cover_model.cc:537

operations_research::SetCoverModel::ComputeSparseRowViewUsingSlices
SparseRowView ComputeSparseRowViewUsingSlices()
Definition set_cover_model.cc:459

operations_research::SetCoverModel::ComputeCostStats
Stats ComputeCostStats() const
Computes basic statistics on costs and returns a Stats structure.
Definition set_cover_model.cc:672

operations_research::SetCoverModel::ToString
std::string ToString(absl::string_view sep) const
Returns a string representation of the model, using the given separator.
Definition set_cover_model.cc:566

operations_research::SetCoverModel::FillRate
double FillRate() const
Returns the fill rate of the matrix.
Definition set_cover_model.h:122

operations_research::SetCoverModel::GenerateRandomModelFrom
static SetCoverModel GenerateRandomModelFrom(const SetCoverModel &seed_model, BaseInt num_elements, BaseInt num_subsets, double row_scale, double column_scale, double cost_scale)
Definition set_cover_model.cc:90

operations_research::SetCoverModel::ResizeNumSubsets
void ResizeNumSubsets(BaseInt num_subsets)
Definition set_cover_model.cc:311

operations_research::SetCoverModel::ReserveNumElementsInSubset
void ReserveNumElementsInSubset(BaseInt num_elements, BaseInt subset)
Reserves num_elements rows in the column indexed by subset.
Definition set_cover_model.cc:322

operations_research::SetCoverModel::ComputeRowDeciles
std::vector< int64_t > ComputeRowDeciles() const
Computes deciles on rows and returns a vector of deciles.
Definition set_cover_model.cc:708

operations_research::SetCoverModel::SetCoverModel
SetCoverModel(const absl::string_view name="SetCoverModel")
Constructs an empty weighted set-covering problem.
Definition set_cover_model.h:61

operations_research::SetCoverModel::ComputeSliceIndices
std::vector< SubsetIndex > ComputeSliceIndices(int num_partitions)
Definition set_cover_model.cc:372

operations_research::SetCoverModel::ElementRange
util_intops::StrongIntRange< ElementIndex > ElementRange() const
Definition set_cover_model.h:187

operations_research::SetCoverModel::AddElementToLastSubset
void AddElementToLastSubset(BaseInt element)
Definition set_cover_model.cc:258

operations_research::SetCoverModel::AddEmptySubset
void AddEmptySubset(Cost cost)
Definition set_cover_model.cc:244

operations_research::SetCoverModel::rows
const SparseRowView & rows() const
Row view of the set covering problem.
Definition set_cover_model.h:174

operations_research::SetCoverModel::CutSparseRowViewInSlices
std::vector< SparseRowView > CutSparseRowViewInSlices(absl::Span< const SubsetIndex > partition_points)
Definition set_cover_model.cc:430

operations_research::SetCoverModel::ComputeRowStats
Stats ComputeRowStats() const
Computes basic statistics on rows and returns a Stats structure.
Definition set_cover_model.cc:678

operations_research::SetCoverModel::num_nonzeros
int64_t num_nonzeros() const
Current number of nonzeros in the matrix.
Definition set_cover_model.h:119

operations_research::SetCoverModel::num_elements
BaseInt num_elements() const
Definition set_cover_model.h:109

operations_research::SetCoverModel::SortElementsInSubsets
void SortElementsInSubsets()
Definition set_cover_model.cc:328

operations_research::SetCoverModel::SubsetRange
util_intops::StrongIntRange< SubsetIndex > SubsetRange() const
Access to the ranges of subsets and elements.
Definition set_cover_model.h:183

operations_research::SetCoverModel::ToVerboseString
std::string ToVerboseString(absl::string_view sep) const
Definition set_cover_model.cc:558

operations_research::SetCoverModel::ComputeColumnDeltaSizeStats
Stats ComputeColumnDeltaSizeStats() const
Definition set_cover_model.cc:734

operations_research::SetCoverModel::ComputeSparseRowViewSlice
SparseRowView ComputeSparseRowViewSlice(SubsetIndex begin_subset, SubsetIndex end_subset)
Definition set_cover_model.cc:401

operations_research::SetCoverModel::ComputeFeasibility
bool ComputeFeasibility()
Definition set_cover_model.cc:472

operations_research::SetCoverModel::AddElementToSubset
void AddElementToSubset(BaseInt element, BaseInt subset)
Definition set_cover_model.cc:292

operations_research::SetCoverModel::ComputeRowDeltaSizeStats
Stats ComputeRowDeltaSizeStats() const
Definition set_cover_model.cc:747

operations_research::SetCoverModel::SetSubsetCost
void SetSubsetCost(BaseInt subset, Cost cost)
Definition set_cover_model.cc:271

operations_research::SetCoverModel::subset_costs
const SubsetCostVector & subset_costs() const
Vector of costs for each subset.
Definition set_cover_model.h:151

operations_research::SetCoverModel::CreateSparseRowView
void CreateSparseRowView()
Definition set_cover_model.cc:337

operations_research::SetCoverModel::ExportModelAsProto
SetCoverProto ExportModelAsProto() const
Definition set_cover_model.cc:517

operations_research::SetCoverModel::ComputeColumnStats
Stats ComputeColumnStats() const
Computes basic statistics on columns and returns a Stats structure.
Definition set_cover_model.cc:688

operations_research::SetCoverModel::ComputeColumnDeciles
std::vector< int64_t > ComputeColumnDeciles() const
Computes deciles on columns and returns a vector of deciles.
Definition set_cover_model.cc:718

operations_research::SetCoverModel::columns
const SparseColumnView & columns() const
Column view of the set covering problem.
Definition set_cover_model.h:171

operations_research::SetCoverModel::num_subsets
BaseInt num_subsets() const
Definition set_cover_model.h:113

operations_research::SetCoverModel::ReduceSparseRowViewSlices
SparseRowView ReduceSparseRowViewSlices(absl::Span< const SparseRowView > row_slices)
Definition set_cover_model.cc:443

operations_research::SetCoverProto_Subset::set_cost
void set_cost(double value)
Definition set_cover.pb.h:919

operations_research::SetCoverProto_Subset::add_element
void add_element(::int64_t value)
Definition set_cover.pb.h:952

operations_research::SetCoverProto
Definition set_cover.pb.h:654

operations_research::SetCoverProto::subset_size
int subset_size() const
repeated .operations_research.SetCoverProto.Subset subset = 1;
Definition set_cover.pb.h:988

operations_research::SetCoverProto::Subset
SetCoverProto_Subset Subset
nested types -------------------------------------------------—
Definition set_cover.pb.h:793

operations_research::SetCoverProto::subset
const ::operations_research::SetCoverProto_Subset & subset(int index) const
Definition set_cover.pb.h:1006

operations_research::SetCoverProto::add_subset
::operations_research::SetCoverProto_Subset *PROTOBUF_NONNULL add_subset()
Definition set_cover.pb.h:1011

operations_research::StopWatch
Definition base_types.h:384

util_intops::StrongVector::data
value_type * data()
– Pass-through methods to STL vector ----------------------------------—
Definition strong_vector.h:199

util_intops::StrongVector::push_back
void push_back(const value_type &val)
Definition strong_vector.h:263

util_intops::StrongVector::insert
iterator insert(const_iterator pos, const value_type &x)
Definition strong_vector.h:243

util_intops::StrongVector::reserve
void reserve(size_type n)
Definition strong_vector.h:272

operations_research
In SWIG mode, we don't want anything besides these top-level includes.
Definition binary_indexed_tree.h:21

operations_research::ComputeStats
SetCoverModel::Stats ComputeStats(std::vector< T > samples)
Definition set_cover_model.cc:635

operations_research::SparseRow
util_intops::StrongVector< RowEntryIndex, SubsetIndex > SparseRow
Definition base_types.h:62

operations_research::end
ClosedInterval::Iterator end(ClosedInterval interval)
Definition sorted_interval_list.h:735

operations_research::ElementToIntVector
util_intops::StrongVector< ElementIndex, BaseInt > ElementToIntVector
Definition base_types.h:64

operations_research::SubsetBoolVector
util_intops::StrongVector< SubsetIndex, bool > SubsetBoolVector
Definition base_types.h:71

operations_research::BaseInt
int32_t BaseInt
Definition base_types.h:39

operations_research::RadixSort
void RadixSort(absl::Span< T > values, int num_bits=sizeof(T) *8)
Definition radix_sort.h:270

operations_research::ElementBoolVector
util_intops::StrongVector< ElementIndex, bool > ElementBoolVector
Definition base_types.h:72

operations_research::Cost
double Cost
Basic non-strict type for cost. The speed penalty for using double is ~2%.
Definition base_types.h:32

operations_research::ComputeDeciles
std::vector< T > ComputeDeciles(std::vector< T > values)
Definition set_cover_model.cc:660

operations_research::SparseColumn
util_intops::StrongVector< ColumnEntryIndex, ElementIndex > SparseColumn
Definition base_types.h:61

operations_research::begin
ClosedInterval::Iterator begin(ClosedInterval interval)
Definition sorted_interval_list.h:732

operations_research::SparseRowView
util_intops::StrongVector< ElementIndex, SparseRow > SparseRowView
Definition base_types.h:69

degrees
trees with all degrees equal w the current value of degrees
Definition one_tree_lower_bound.h:159

radix_sort.h

set_cover.pb.h

set_cover_model.h

operations_research::SetCoverModel::Stats
Definition set_cover_model.h:297

operations_research::SetCoverModel::Stats::max
double max
Definition set_cover_model.h:299

operations_research::SetCoverModel::Stats::min
double min
Definition set_cover_model.h:298

operations_research::SetCoverModel::Stats::mean
double mean
Definition set_cover_model.h:301

operations_research::SetCoverModel::Stats::iqr
double iqr
Definition set_cover_model.h:303

operations_research::SetCoverModel::Stats::median
double median
Definition set_cover_model.h:300

operations_research::SetCoverModel::Stats::ToVerboseString
std::string ToVerboseString(absl::string_view sep) const
Definition set_cover_model.cc:701

operations_research::SetCoverModel::Stats::stddev
double stddev
Definition set_cover_model.h:302

operations_research::SetCoverModel::Stats::ToString
std::string ToString(absl::string_view sep) const
Returns a string representation of the stats, using the given separator.
Definition set_cover_model.cc:696