Google OR-Tools v9.11
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
sharded_optimization_utils.cc
Go to the documentation of this file.
1// Copyright 2010-2024 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
15
16#include <algorithm>
17#include <cmath>
18#include <cstdint>
19#include <limits>
20#include <optional>
21#include <random>
22#include <utility>
23#include <vector>
24
25#include "Eigen/Core"
26#include "Eigen/SparseCore"
27#include "absl/log/check.h"
28#include "absl/random/distributions.h"
34
36
37constexpr double kInfinity = std::numeric_limits<double>::infinity();
38using ::Eigen::ColMajor;
39using ::Eigen::SparseMatrix;
40using ::Eigen::VectorXd;
41using ::Eigen::VectorXi;
42
44 : sharder_(sharder) {
45 average_ = ZeroVector(*sharder_);
46}
47
48// We considered the five averaging algorithms M_* listed on the first page of
49// https://www.jstor.org/stable/2286154 and the Kahan summation algorithm
50// (https://en.wikipedia.org/wiki/Kahan_summation_algorithm). Of these only M_14
51// satisfies our desired property that a constant sequence is averaged without
52// roundoff while requiring only a single vector be stored. We therefore use
53// M_14 (actually a natural weighted generalization, see below).
54void ShardedWeightedAverage::Add(const VectorXd& datapoint, double weight) {
55 CHECK_GE(weight, 0.0);
56 CHECK_EQ(datapoint.size(), average_.size());
57 // This `if` protects against NaN if `sum_weights_` also == 0.0.
58 if (weight > 0.0) {
59 const double weight_ratio = weight / (sum_weights_ + weight);
60 sharder_->ParallelForEachShard([&](const Sharder::Shard& shard) {
61 shard(average_) += weight_ratio * (shard(datapoint) - shard(average_));
62 });
63 sum_weights_ += weight;
64 }
65 ++num_terms_;
66}
67
69 SetZero(*sharder_, average_);
70 sum_weights_ = 0.0;
71 num_terms_ = 0;
72}
73
75 VectorXd result;
76 // TODO(user): consider returning a reference to avoid this copy.
77 AssignVector(average_, *sharder_, result);
78 return result;
79}
80
81namespace {
82
83double CombineBounds(const double v1, const double v2) {
84 double max = 0.0;
85 if (std::abs(v1) < kInfinity) {
86 max = std::abs(v1);
87 }
88 if (std::abs(v2) < kInfinity) {
89 max = std::max(max, std::abs(v2));
90 }
91 return max;
92}
93
94struct VectorInfo {
95 int64_t num_finite_nonzero = 0;
96 int64_t num_infinite = 0;
97 int64_t num_zero = 0;
98 // The largest absolute value of the finite non-zero values.
99 double largest = 0.0;
100 // The smallest absolute value of the finite non-zero values.
101 double smallest = 0.0;
102 // The average absolute value of the finite values.
103 double average = 0.0;
104 // The L2 norm of the finite values.
105 double l2_norm = 0.0;
106};
107
108struct InfNormInfo {
109 VectorInfo row_norms;
110 VectorInfo col_norms;
111};
112
113// `VectorInfoAccumulator` accumulates values for a `VectorInfo`.
114// NOTE: In `VectorInfo`, the max and min of an empty set is 0.0 by convention.
115// In `VectorInfoAccumulator`, it is -`kInfinity` and `kInfinity` to simplify
116// adding additional values.
117class VectorInfoAccumulator {
118 public:
119 VectorInfoAccumulator() {}
120 // Move-only even though move and copy are the same cost, to help catch
121 // unintentional moves/copies (which are probably performance bugs).
122 VectorInfoAccumulator(const VectorInfoAccumulator&) = delete;
123 VectorInfoAccumulator& operator=(const VectorInfoAccumulator&) = delete;
124 VectorInfoAccumulator(VectorInfoAccumulator&&) = default;
125 VectorInfoAccumulator& operator=(VectorInfoAccumulator&&) = default;
126 void Add(double value);
127 void Add(const VectorInfoAccumulator& other);
128 explicit operator VectorInfo() const;
129
130 private:
131 int64_t num_infinite_ = 0;
132 int64_t num_zero_ = 0;
133 int64_t num_finite_nonzero_ = 0;
134 double max_ = -kInfinity;
135 double min_ = kInfinity;
136 double sum_ = 0.0;
137 double sum_squared_ = 0.0;
138};
139
140void VectorInfoAccumulator::Add(const double value) {
141 if (std::isinf(value)) {
142 ++num_infinite_;
143 } else if (value == 0) {
144 ++num_zero_;
145 } else {
146 ++num_finite_nonzero_;
147 const double abs_value = std::abs(value);
148 max_ = std::max(max_, abs_value);
149 min_ = std::min(min_, abs_value);
150 sum_ += abs_value;
151 sum_squared_ += abs_value * abs_value;
152 }
153}
154
155void VectorInfoAccumulator::Add(const VectorInfoAccumulator& other) {
156 num_infinite_ += other.num_infinite_;
157 num_zero_ += other.num_zero_;
158 num_finite_nonzero_ += other.num_finite_nonzero_;
159 max_ = std::max(max_, other.max_);
160 min_ = std::min(min_, other.min_);
161 sum_ += other.sum_;
162 sum_squared_ += other.sum_squared_;
163}
164
165VectorInfoAccumulator::operator VectorInfo() const {
166 return VectorInfo{
167 .num_finite_nonzero = num_finite_nonzero_,
168 .num_infinite = num_infinite_,
169 .num_zero = num_zero_,
170 .largest = num_finite_nonzero_ > 0 ? max_ : 0.0,
171 .smallest = num_finite_nonzero_ > 0 ? min_ : 0.0,
172 .average = num_finite_nonzero_ + num_zero_ > 0
173 ? sum_ / (num_finite_nonzero_ + num_zero_)
174 : std::numeric_limits<double>::quiet_NaN(),
175 .l2_norm = std::sqrt(sum_squared_),
176 };
177}
178
179VectorInfo CombineAccumulators(
180 const std::vector<VectorInfoAccumulator>& accumulators) {
181 VectorInfoAccumulator result;
182 for (const VectorInfoAccumulator& accumulator : accumulators) {
183 result.Add(accumulator);
184 }
185 return VectorInfo(result);
186}
187
188// TODO(b/223148482): Switch `vec` to `const Eigen::Ref<const VectorXd>` if/when
189// `Sharder` supports `Eigen::Ref`, to avoid a copy when called on
190// `qp.Qp().objective_matrix->diagonal()`.
191VectorInfo ComputeVectorInfo(const VectorXd& vec, const Sharder& sharder) {
192 std::vector<VectorInfoAccumulator> local_accumulator(sharder.NumShards());
193 sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
194 VectorInfoAccumulator shard_accumulator;
195 for (double element : shard(vec)) {
196 shard_accumulator.Add(element);
197 }
198 local_accumulator[shard.Index()] = std::move(shard_accumulator);
199 });
200 return CombineAccumulators(local_accumulator);
201}
202
203VectorInfo VariableBoundGapInfo(const VectorXd& lower_bounds,
204 const VectorXd& upper_bounds,
205 const Sharder& sharder) {
206 std::vector<VectorInfoAccumulator> local_accumulator(sharder.NumShards());
207 sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
208 VectorInfoAccumulator shard_accumulator;
209 for (double element : shard(upper_bounds) - shard(lower_bounds)) {
210 shard_accumulator.Add(element);
211 }
212 local_accumulator[shard.Index()] = std::move(shard_accumulator);
213 });
214 return CombineAccumulators(local_accumulator);
215}
216
217VectorInfo MatrixAbsElementInfo(
218 const SparseMatrix<double, ColMajor, int64_t>& matrix,
219 const Sharder& sharder) {
220 std::vector<VectorInfoAccumulator> local_accumulator(sharder.NumShards());
221 sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
222 VectorInfoAccumulator shard_accumulator;
223 const auto matrix_shard = shard(matrix);
224 for (int64_t col_idx = 0; col_idx < matrix_shard.outerSize(); ++col_idx) {
225 for (decltype(matrix_shard)::InnerIterator it(matrix_shard, col_idx); it;
226 ++it) {
227 shard_accumulator.Add(it.value());
228 }
229 }
230 local_accumulator[shard.Index()] = std::move(shard_accumulator);
231 });
232 return CombineAccumulators(local_accumulator);
233}
234
235VectorInfo CombinedBoundsInfo(const VectorXd& rhs_upper_bounds,
236 const VectorXd& rhs_lower_bounds,
237 const Sharder& sharder) {
238 std::vector<VectorInfoAccumulator> local_accumulator(sharder.NumShards());
239 sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
240 VectorInfoAccumulator shard_accumulator;
241 const auto lb_shard = shard(rhs_lower_bounds);
242 const auto ub_shard = shard(rhs_upper_bounds);
243 for (int64_t i = 0; i < lb_shard.size(); ++i) {
244 shard_accumulator.Add(CombineBounds(ub_shard[i], lb_shard[i]));
245 }
246 local_accumulator[shard.Index()] = std::move(shard_accumulator);
247 });
248 return CombineAccumulators(local_accumulator);
249}
250
251InfNormInfo ConstraintMatrixRowColInfo(
252 const SparseMatrix<double, ColMajor, int64_t>& constraint_matrix,
253 const SparseMatrix<double, ColMajor, int64_t>& constraint_matrix_transpose,
254 const Sharder& matrix_sharder, const Sharder& matrix_transpose_sharder,
255 const Sharder& primal_sharder, const Sharder& dual_sharder) {
256 VectorXd row_norms = ScaledColLInfNorm(
257 constraint_matrix_transpose,
258 /*col_scaling_vec=*/OnesVector(primal_sharder),
259 /*row_scaling_vec=*/OnesVector(dual_sharder), matrix_transpose_sharder);
260 VectorXd col_norms = ScaledColLInfNorm(
261 constraint_matrix,
262 /*row_scaling_vec=*/OnesVector(dual_sharder),
263 /*col_scaling_vec=*/OnesVector(primal_sharder), matrix_sharder);
264 return InfNormInfo{.row_norms = ComputeVectorInfo(row_norms, dual_sharder),
265 .col_norms = ComputeVectorInfo(col_norms, primal_sharder)};
266}
267
268} // namespace
269
270QuadraticProgramStats ComputeStats(const ShardedQuadraticProgram& qp) {
271 // Caution: if the constraint matrix is empty, elementwise operations
272 // (like `.coeffs().maxCoeff()` or `.minCoeff()`) will fail.
273 InfNormInfo cons_matrix_norm_info = ConstraintMatrixRowColInfo(
276 qp.PrimalSharder(), qp.DualSharder());
277 VectorInfo cons_matrix_info = MatrixAbsElementInfo(
279 VectorInfo combined_bounds_info =
280 CombinedBoundsInfo(qp.Qp().constraint_lower_bounds,
282 VectorInfo combined_variable_bounds_info =
283 CombinedBoundsInfo(qp.Qp().variable_lower_bounds,
285 VectorInfo obj_vec_info =
286 ComputeVectorInfo(qp.Qp().objective_vector, qp.PrimalSharder());
287 VectorInfo gaps_info =
288 VariableBoundGapInfo(qp.Qp().variable_lower_bounds,
290 QuadraticProgramStats program_stats;
291 program_stats.set_num_variables(qp.PrimalSize());
292 program_stats.set_num_constraints(qp.DualSize());
293 program_stats.set_constraint_matrix_col_min_l_inf_norm(
294 cons_matrix_norm_info.col_norms.smallest);
295 program_stats.set_constraint_matrix_row_min_l_inf_norm(
296 cons_matrix_norm_info.row_norms.smallest);
297 program_stats.set_constraint_matrix_num_nonzeros(
298 cons_matrix_info.num_finite_nonzero);
299 program_stats.set_constraint_matrix_abs_max(cons_matrix_info.largest);
300 program_stats.set_constraint_matrix_abs_min(cons_matrix_info.smallest);
301 program_stats.set_constraint_matrix_abs_avg(cons_matrix_info.average);
302 program_stats.set_constraint_matrix_l2_norm(cons_matrix_info.l2_norm);
303 program_stats.set_combined_bounds_max(combined_bounds_info.largest);
304 program_stats.set_combined_bounds_min(combined_bounds_info.smallest);
305 program_stats.set_combined_bounds_avg(combined_bounds_info.average);
306 program_stats.set_combined_bounds_l2_norm(combined_bounds_info.l2_norm);
307 program_stats.set_combined_variable_bounds_max(
308 combined_variable_bounds_info.largest);
309 program_stats.set_combined_variable_bounds_min(
310 combined_variable_bounds_info.smallest);
311 program_stats.set_combined_variable_bounds_avg(
312 combined_variable_bounds_info.average);
313 program_stats.set_combined_variable_bounds_l2_norm(
314 combined_variable_bounds_info.l2_norm);
315 program_stats.set_variable_bound_gaps_num_finite(
316 gaps_info.num_finite_nonzero + gaps_info.num_zero);
317 program_stats.set_variable_bound_gaps_max(gaps_info.largest);
318 program_stats.set_variable_bound_gaps_min(gaps_info.smallest);
319 program_stats.set_variable_bound_gaps_avg(gaps_info.average);
320 program_stats.set_variable_bound_gaps_l2_norm(gaps_info.l2_norm);
321 program_stats.set_objective_vector_abs_max(obj_vec_info.largest);
322 program_stats.set_objective_vector_abs_min(obj_vec_info.smallest);
323 program_stats.set_objective_vector_abs_avg(obj_vec_info.average);
324 program_stats.set_objective_vector_l2_norm(obj_vec_info.l2_norm);
325 if (IsLinearProgram(qp.Qp())) {
326 program_stats.set_objective_matrix_num_nonzeros(0);
327 program_stats.set_objective_matrix_abs_max(0);
328 program_stats.set_objective_matrix_abs_min(0);
329 program_stats.set_objective_matrix_abs_avg(
330 std::numeric_limits<double>::quiet_NaN());
331 program_stats.set_objective_matrix_l2_norm(0);
332 } else {
333 VectorInfo obj_matrix_info = ComputeVectorInfo(
334 qp.Qp().objective_matrix->diagonal(), qp.PrimalSharder());
335 program_stats.set_objective_matrix_num_nonzeros(
336 obj_matrix_info.num_finite_nonzero);
337 program_stats.set_objective_matrix_abs_max(obj_matrix_info.largest);
338 program_stats.set_objective_matrix_abs_min(obj_matrix_info.smallest);
339 program_stats.set_objective_matrix_abs_avg(obj_matrix_info.average);
340 program_stats.set_objective_matrix_l2_norm(obj_matrix_info.l2_norm);
341 }
342 return program_stats;
343}
344
345namespace {
346
347enum class ScalingNorm { kL2, kLInf };
348
349// Divides `vector` (componentwise) by the square root of `divisor`, updating
350// `vector` in-place. If a component of `divisor` is equal to zero, leaves the
351// component of `vector` unchanged. `sharder` should have the same size as
352// `vector`. For best performance `sharder` should have been created with the
353// `Sharder(int64_t, int, ThreadPool*)` constructor.
354void DivideBySquareRootOfDivisor(const VectorXd& divisor,
355 const Sharder& sharder, VectorXd& vector) {
356 sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
357 auto vec_shard = shard(vector);
358 const auto divisor_shard = shard(divisor);
359 for (int64_t index = 0; index < vec_shard.size(); ++index) {
360 if (divisor_shard[index] != 0) {
361 vec_shard[index] /= std::sqrt(divisor_shard[index]);
362 }
363 }
364 });
365}
366
367void ApplyScalingIterationsForNorm(const ShardedQuadraticProgram& sharded_qp,
368 const int num_iterations,
369 const ScalingNorm norm,
370 VectorXd& row_scaling_vec,
371 VectorXd& col_scaling_vec) {
372 const QuadraticProgram& qp = sharded_qp.Qp();
373 const int64_t num_col = qp.constraint_matrix.cols();
374 const int64_t num_row = qp.constraint_matrix.rows();
375 CHECK_EQ(num_col, col_scaling_vec.size());
376 CHECK_EQ(num_row, row_scaling_vec.size());
377 for (int i = 0; i < num_iterations; ++i) {
378 VectorXd col_norm;
379 VectorXd row_norm;
380 switch (norm) {
381 case ScalingNorm::kL2: {
382 col_norm = ScaledColL2Norm(qp.constraint_matrix, row_scaling_vec,
383 col_scaling_vec,
384 sharded_qp.ConstraintMatrixSharder());
385 row_norm = ScaledColL2Norm(
386 sharded_qp.TransposedConstraintMatrix(), col_scaling_vec,
387 row_scaling_vec, sharded_qp.TransposedConstraintMatrixSharder());
388 break;
389 }
390 case ScalingNorm::kLInf: {
391 col_norm = ScaledColLInfNorm(qp.constraint_matrix, row_scaling_vec,
392 col_scaling_vec,
393 sharded_qp.ConstraintMatrixSharder());
394 row_norm = ScaledColLInfNorm(
395 sharded_qp.TransposedConstraintMatrix(), col_scaling_vec,
396 row_scaling_vec, sharded_qp.TransposedConstraintMatrixSharder());
397 break;
398 }
399 }
400 DivideBySquareRootOfDivisor(col_norm, sharded_qp.PrimalSharder(),
401 col_scaling_vec);
402 DivideBySquareRootOfDivisor(row_norm, sharded_qp.DualSharder(),
403 row_scaling_vec);
404 }
405}
406
407} // namespace
408
410 const int num_iterations, VectorXd& row_scaling_vec,
411 VectorXd& col_scaling_vec) {
412 ApplyScalingIterationsForNorm(sharded_qp, num_iterations, ScalingNorm::kLInf,
413 row_scaling_vec, col_scaling_vec);
414}
415
417 VectorXd& row_scaling_vec, VectorXd& col_scaling_vec) {
418 ApplyScalingIterationsForNorm(sharded_qp, /*num_iterations=*/1,
419 ScalingNorm::kL2, row_scaling_vec,
420 col_scaling_vec);
421}
422
424 ShardedQuadraticProgram& sharded_qp) {
425 ScalingVectors scaling{
426 .row_scaling_vec = OnesVector(sharded_qp.DualSharder()),
427 .col_scaling_vec = OnesVector(sharded_qp.PrimalSharder())};
428 bool do_rescale = false;
429 if (rescaling_options.l_inf_ruiz_iterations > 0) {
430 do_rescale = true;
431 LInfRuizRescaling(sharded_qp, rescaling_options.l_inf_ruiz_iterations,
432 scaling.row_scaling_vec, scaling.col_scaling_vec);
433 }
434 if (rescaling_options.l2_norm_rescaling) {
435 do_rescale = true;
436 L2NormRescaling(sharded_qp, scaling.row_scaling_vec,
437 scaling.col_scaling_vec);
438 }
439 if (do_rescale) {
440 sharded_qp.RescaleQuadraticProgram(scaling.col_scaling_vec,
441 scaling.row_scaling_vec);
442 }
443 return scaling;
444}
445
447 const VectorXd& primal_solution,
448 const VectorXd& dual_product) {
449 LagrangianPart result{.gradient = VectorXd(sharded_qp.PrimalSize())};
450 const QuadraticProgram& qp = sharded_qp.Qp();
451 VectorXd value_parts(sharded_qp.PrimalSharder().NumShards());
453 [&](const Sharder::Shard& shard) {
454 if (IsLinearProgram(qp)) {
455 shard(result.gradient) =
456 shard(qp.objective_vector) - shard(dual_product);
457 value_parts[shard.Index()] =
458 shard(primal_solution).dot(shard(result.gradient));
459 } else {
460 // Note: using `auto` instead of `VectorXd` for the type of
461 // `objective_product` causes eigen to defer the matrix product until
462 // it is used (twice).
463 const VectorXd objective_product =
464 shard(*qp.objective_matrix) * shard(primal_solution);
465 shard(result.gradient) = shard(qp.objective_vector) +
466 objective_product - shard(dual_product);
467 value_parts[shard.Index()] =
468 shard(primal_solution)
469 .dot(shard(result.gradient) - 0.5 * objective_product);
470 }
471 });
472 result.value = value_parts.sum();
473 return result;
474}
475
476double DualSubgradientCoefficient(const double constraint_lower_bound,
477 const double constraint_upper_bound,
478 const double dual,
479 const double primal_product) {
480 if (dual < 0.0) {
481 return constraint_upper_bound;
482 } else if (dual > 0.0) {
483 return constraint_lower_bound;
484 } else if (std::isfinite(constraint_lower_bound) &&
485 std::isfinite(constraint_upper_bound)) {
486 if (primal_product < constraint_lower_bound) {
487 return constraint_lower_bound;
488 } else if (primal_product > constraint_upper_bound) {
489 return constraint_upper_bound;
490 } else {
491 return primal_product;
492 }
493 } else if (std::isfinite(constraint_lower_bound)) {
494 return constraint_lower_bound;
495 } else if (std::isfinite(constraint_upper_bound)) {
496 return constraint_upper_bound;
497 } else {
498 return 0.0;
499 }
500}
501
503 const VectorXd& dual_solution,
504 const VectorXd& primal_product) {
505 LagrangianPart result{.gradient = VectorXd(sharded_qp.DualSize())};
506 const QuadraticProgram& qp = sharded_qp.Qp();
507 VectorXd value_parts(sharded_qp.DualSharder().NumShards());
509 [&](const Sharder::Shard& shard) {
510 const auto constraint_lower_bounds = shard(qp.constraint_lower_bounds);
511 const auto constraint_upper_bounds = shard(qp.constraint_upper_bounds);
512 const auto dual_solution_shard = shard(dual_solution);
513 auto dual_gradient_shard = shard(result.gradient);
514 const auto primal_product_shard = shard(primal_product);
515 double value_sum = 0.0;
516 for (int64_t i = 0; i < dual_gradient_shard.size(); ++i) {
517 dual_gradient_shard[i] = DualSubgradientCoefficient(
518 constraint_lower_bounds[i], constraint_upper_bounds[i],
519 dual_solution_shard[i], primal_product_shard[i]);
520 value_sum += dual_gradient_shard[i] * dual_solution_shard[i];
521 }
522 value_parts[shard.Index()] = value_sum;
523 dual_gradient_shard -= primal_product_shard;
524 });
525 result.value = value_parts.sum();
526 return result;
527}
528
529namespace {
530
531using ::Eigen::ColMajor;
532using ::Eigen::SparseMatrix;
533
534// Scales `vector` (in-place) to have norm 1, unless it has norm 0 (in which
535// case it is left unscaled). Returns the original norm of `vector`.
536double NormalizeVector(const Sharder& sharder, VectorXd& vector) {
537 const double norm = Norm(vector, sharder);
538 if (norm != 0.0) {
539 sharder.ParallelForEachShard(
540 [&](const Sharder::Shard& shard) { shard(vector) /= norm; });
541 }
542 return norm;
543}
544
545// Estimates the probability that the power method, after k iterations, has
546// relative error > `epsilon`. This is based on Theorem 4.1(a) (on page 13) from
547// "Estimating the Largest Eigenvalue by the Power and Lanczos Algorithms with a
548// Random Start"
549// https://pdfs.semanticscholar.org/2b2e/a941e55e5fa2ee9d8f4ff393c14482051143.pdf
550double PowerMethodFailureProbability(int64_t dimension, double epsilon, int k) {
551 if (k < 2 || epsilon <= 0.0) {
552 // The theorem requires `epsilon > 0` and `k >= 2`.
553 return 1.0;
554 }
555 return std::min(0.824, 0.354 / std::sqrt(epsilon * (k - 1))) *
556 std::sqrt(dimension) * std::pow(1.0 - epsilon, k - 0.5);
557}
558
559SingularValueAndIterations EstimateMaximumSingularValue(
560 const SparseMatrix<double, ColMajor, int64_t>& matrix,
561 const SparseMatrix<double, ColMajor, int64_t>& matrix_transpose,
562 const std::optional<VectorXd>& active_set_indicator,
563 const std::optional<VectorXd>& transpose_active_set_indicator,
564 const Sharder& matrix_sharder, const Sharder& matrix_transpose_sharder,
565 const Sharder& primal_vector_sharder, const Sharder& dual_vector_sharder,
566 const double desired_relative_error, const double failure_probability,
567 std::mt19937& mt_generator) {
568 const int64_t dimension = matrix.cols();
569 VectorXd eigenvector(dimension);
570 // Even though it will be slower, we initialize `eigenvector` sequentially so
571 // that the result doesn't depend on the number of threads.
572 for (double& entry : eigenvector) {
573 entry = absl::Gaussian<double>(mt_generator);
574 }
575 if (active_set_indicator.has_value()) {
576 CoefficientWiseProductInPlace(*active_set_indicator, primal_vector_sharder,
577 eigenvector);
578 }
579 NormalizeVector(primal_vector_sharder, eigenvector);
580 double eigenvalue_estimate = 0.0;
581
582 int num_iterations = 0;
583 // The maximum singular value of A is the square root of the maximum
584 // eigenvalue of A^T A. `epsilon` is the relative error needed for the maximum
585 // eigenvalue of A^T A that gives `desired_relative_error` for the maximum
586 // singular value of A.
587 const double epsilon = 1.0 - MathUtil::Square(1.0 - desired_relative_error);
588 while (PowerMethodFailureProbability(dimension, epsilon, num_iterations) >
589 failure_probability) {
590 VectorXd dual_eigenvector = TransposedMatrixVectorProduct(
591 matrix_transpose, eigenvector, matrix_transpose_sharder);
592 if (transpose_active_set_indicator.has_value()) {
593 CoefficientWiseProductInPlace(*transpose_active_set_indicator,
594 dual_vector_sharder, dual_eigenvector);
595 }
596 VectorXd next_eigenvector =
597 TransposedMatrixVectorProduct(matrix, dual_eigenvector, matrix_sharder);
598 if (active_set_indicator.has_value()) {
599 CoefficientWiseProductInPlace(*active_set_indicator,
600 primal_vector_sharder, next_eigenvector);
601 }
602 eigenvalue_estimate =
603 Dot(eigenvector, next_eigenvector, primal_vector_sharder);
604 eigenvector = std::move(next_eigenvector);
605 ++num_iterations;
606 const double primal_norm =
607 NormalizeVector(primal_vector_sharder, eigenvector);
608
609 VLOG(1) << "Iteration " << num_iterations << " singular value estimate "
610 << std::sqrt(eigenvalue_estimate) << " primal norm " << primal_norm;
611 }
612 return SingularValueAndIterations{
613 .singular_value = std::sqrt(eigenvalue_estimate),
614 .num_iterations = num_iterations,
615 .estimated_relative_error = desired_relative_error};
616}
617
618// Given `primal_solution`, compute a {0, 1}-valued vector that is nonzero in
619// all the coordinates that are not saturating the primal variable bounds.
620VectorXd ComputePrimalActiveSetIndicator(
621 const ShardedQuadraticProgram& sharded_qp,
622 const VectorXd& primal_solution) {
623 VectorXd indicator(sharded_qp.PrimalSize());
624 sharded_qp.PrimalSharder().ParallelForEachShard(
625 [&](const Sharder::Shard& shard) {
626 const auto lower_bound_shard =
627 shard(sharded_qp.Qp().variable_lower_bounds);
628 const auto upper_bound_shard =
629 shard(sharded_qp.Qp().variable_upper_bounds);
630 const auto primal_solution_shard = shard(primal_solution);
631 auto indicator_shard = shard(indicator);
632 const int64_t shard_size =
633 sharded_qp.PrimalSharder().ShardSize(shard.Index());
634 for (int64_t i = 0; i < shard_size; ++i) {
635 if ((primal_solution_shard[i] == lower_bound_shard[i]) ||
636 (primal_solution_shard[i] == upper_bound_shard[i])) {
637 indicator_shard[i] = 0.0;
638 } else {
639 indicator_shard[i] = 1.0;
640 }
641 }
642 });
643 return indicator;
644}
645
646// Like `ComputePrimalActiveSetIndicator(sharded_qp, primal_solution)`, but this
647// time using the implicit bounds on the dual variables.
648VectorXd ComputeDualActiveSetIndicator(
649 const ShardedQuadraticProgram& sharded_qp, const VectorXd& dual_solution) {
650 VectorXd indicator(sharded_qp.DualSize());
651 sharded_qp.DualSharder().ParallelForEachShard(
652 [&](const Sharder::Shard& shard) {
653 const auto lower_bound_shard =
654 shard(sharded_qp.Qp().constraint_lower_bounds);
655 const auto upper_bound_shard =
656 shard(sharded_qp.Qp().constraint_upper_bounds);
657 const auto dual_solution_shard = shard(dual_solution);
658 auto indicator_shard = shard(indicator);
659 const int64_t shard_size =
660 sharded_qp.DualSharder().ShardSize(shard.Index());
661 for (int64_t i = 0; i < shard_size; ++i) {
662 if (dual_solution_shard[i] == 0.0 &&
663 (std::isinf(lower_bound_shard[i]) ||
664 std::isinf(upper_bound_shard[i]))) {
665 indicator_shard[i] = 0.0;
666 } else {
667 indicator_shard[i] = 1.0;
668 }
669 }
670 });
671 return indicator;
672}
673
674} // namespace
675
677 const ShardedQuadraticProgram& sharded_qp,
678 const std::optional<VectorXd>& primal_solution,
679 const std::optional<VectorXd>& dual_solution,
680 const double desired_relative_error, const double failure_probability,
681 std::mt19937& mt_generator) {
682 std::optional<VectorXd> primal_active_set_indicator;
683 std::optional<VectorXd> dual_active_set_indicator;
684 if (primal_solution.has_value()) {
685 primal_active_set_indicator =
686 ComputePrimalActiveSetIndicator(sharded_qp, *primal_solution);
687 }
688 if (dual_solution.has_value()) {
689 dual_active_set_indicator =
690 ComputeDualActiveSetIndicator(sharded_qp, *dual_solution);
691 }
692 return EstimateMaximumSingularValue(
693 sharded_qp.Qp().constraint_matrix,
694 sharded_qp.TransposedConstraintMatrix(), primal_active_set_indicator,
695 dual_active_set_indicator, sharded_qp.ConstraintMatrixSharder(),
697 sharded_qp.PrimalSharder(), sharded_qp.DualSharder(),
698 desired_relative_error, failure_probability, mt_generator);
699}
700
702 const QuadraticProgram& qp = sharded_qp.Qp();
703 const bool constraint_bounds_valid =
705 [&](const Sharder::Shard& shard) {
706 return (shard(qp.constraint_lower_bounds).array() <=
707 shard(qp.constraint_upper_bounds).array() &&
708 shard(qp.constraint_lower_bounds).array() < kInfinity &&
709 shard(qp.constraint_upper_bounds).array() > -kInfinity)
710 .all();
711 });
712 const bool variable_bounds_valid =
714 [&](const Sharder::Shard& shard) {
715 return (shard(qp.variable_lower_bounds).array() <=
716 shard(qp.variable_upper_bounds).array() &&
717 shard(qp.variable_lower_bounds).array() < kInfinity &&
718 shard(qp.variable_upper_bounds).array() > -kInfinity)
719 .all();
720 });
721
722 return constraint_bounds_valid && variable_bounds_valid;
723}
724
726 VectorXd& primal,
727 const bool use_feasibility_bounds) {
728 const auto make_finite_values_zero = [](const double x) {
729 return std::isfinite(x) ? 0.0 : x;
730 };
732 [&](const Sharder::Shard& shard) {
733 const QuadraticProgram& qp = sharded_qp.Qp();
734 if (use_feasibility_bounds) {
735 shard(primal) =
736 shard(primal)
737 .cwiseMin(shard(qp.variable_upper_bounds)
738 .unaryExpr(make_finite_values_zero))
739 .cwiseMax(shard(qp.variable_lower_bounds)
740 .unaryExpr(make_finite_values_zero));
741 } else {
742 shard(primal) = shard(primal)
743 .cwiseMin(shard(qp.variable_upper_bounds))
744 .cwiseMax(shard(qp.variable_lower_bounds));
745 }
746 });
747}
748
750 VectorXd& dual) {
751 const QuadraticProgram& qp = sharded_qp.Qp();
753 [&](const Sharder::Shard& shard) {
754 const auto lower_bound_shard = shard(qp.constraint_lower_bounds);
755 const auto upper_bound_shard = shard(qp.constraint_upper_bounds);
756 auto dual_shard = shard(dual);
757
758 // TODO(user): Investigate whether it is more efficient to
759 // use .cwiseMax() + .cwiseMin() with unaryExpr(s) that map
760 // upper_bound_shard and lower_bound_shard to appropriate values.
761 for (int64_t i = 0; i < dual_shard.size(); ++i) {
762 if (!std::isfinite(upper_bound_shard[i])) {
763 dual_shard[i] = std::max(dual_shard[i], 0.0);
764 }
765 if (!std::isfinite(lower_bound_shard[i])) {
766 dual_shard[i] = std::min(dual_shard[i], 0.0);
767 }
768 }
769 });
770}
771
772} // namespace operations_research::pdlp
int64_t max
static T Square(const T x)
Returns the square of x.
Definition mathutil.h:102
const Sharder & ConstraintMatrixSharder() const
Returns a Sharder intended for the columns of the QP's constraint matrix.
const Sharder & TransposedConstraintMatrixSharder() const
Returns a Sharder intended for the rows of the QP's constraint matrix.
const Eigen::SparseMatrix< double, Eigen::ColMajor, int64_t > & TransposedConstraintMatrix() const
Returns a reference to the transpose of the QP's constraint matrix.
const Sharder & DualSharder() const
Returns a Sharder intended for dual vectors.
const Sharder & PrimalSharder() const
Returns a Sharder intended for primal vectors.
void RescaleQuadraticProgram(const Eigen::VectorXd &col_scaling_vec, const Eigen::VectorXd &row_scaling_vec)
void Add(const Eigen::VectorXd &datapoint, double weight)
void Clear()
Clears the sum to zero, i.e., just constructed.
bool ParallelTrueForAllShards(const std::function< bool(const Shard &)> &func) const
Definition sharder.cc:152
void ParallelForEachShard(const std::function< void(const Shard &)> &func) const
Runs func on each of the shards.
Definition sharder.cc:109
int64_t value
int index
Validation utilities for solvers.proto.
void SetZero(const Sharder &sharder, VectorXd &dest)
Definition sharder.cc:178
LagrangianPart ComputeDualGradient(const ShardedQuadraticProgram &sharded_qp, const VectorXd &dual_solution, const VectorXd &primal_product)
QuadraticProgramStats ComputeStats(const ShardedQuadraticProgram &qp)
Returns a QuadraticProgramStats for a ShardedQuadraticProgram.
double Dot(const VectorXd &v1, const VectorXd &v2, const Sharder &sharder)
Definition sharder.cc:230
LagrangianPart ComputePrimalGradient(const ShardedQuadraticProgram &sharded_qp, const VectorXd &primal_solution, const VectorXd &dual_product)
VectorXd TransposedMatrixVectorProduct(const Eigen::SparseMatrix< double, Eigen::ColMajor, int64_t > &matrix, const VectorXd &vector, const Sharder &sharder)
Definition sharder.cc:163
void LInfRuizRescaling(const ShardedQuadraticProgram &sharded_qp, const int num_iterations, VectorXd &row_scaling_vec, VectorXd &col_scaling_vec)
SingularValueAndIterations EstimateMaximumSingularValueOfConstraintMatrix(const ShardedQuadraticProgram &sharded_qp, const std::optional< VectorXd > &primal_solution, const std::optional< VectorXd > &dual_solution, const double desired_relative_error, const double failure_probability, std::mt19937 &mt_generator)
VectorXd ScaledColLInfNorm(const Eigen::SparseMatrix< double, Eigen::ColMajor, int64_t > &matrix, const VectorXd &row_scaling_vec, const VectorXd &col_scaling_vec, const Sharder &sharder)
Definition sharder.cc:291
void ProjectToPrimalVariableBounds(const ShardedQuadraticProgram &sharded_qp, VectorXd &primal, const bool use_feasibility_bounds)
bool IsLinearProgram(const QuadraticProgram &qp)
void ProjectToDualVariableBounds(const ShardedQuadraticProgram &sharded_qp, VectorXd &dual)
double DualSubgradientCoefficient(const double constraint_lower_bound, const double constraint_upper_bound, const double dual, const double primal_product)
void CoefficientWiseProductInPlace(const VectorXd &scale, const Sharder &sharder, VectorXd &dest)
Definition sharder.cc:216
VectorXd ZeroVector(const Sharder &sharder)
Like VectorXd::Zero(sharder.NumElements()).
Definition sharder.cc:184
void L2NormRescaling(const ShardedQuadraticProgram &sharded_qp, VectorXd &row_scaling_vec, VectorXd &col_scaling_vec)
VectorXd ScaledColL2Norm(const Eigen::SparseMatrix< double, Eigen::ColMajor, int64_t > &matrix, const VectorXd &row_scaling_vec, const VectorXd &col_scaling_vec, const Sharder &sharder)
Definition sharder.cc:313
VectorXd OnesVector(const Sharder &sharder)
Like VectorXd::Ones(sharder.NumElements()).
Definition sharder.cc:190
bool HasValidBounds(const ShardedQuadraticProgram &sharded_qp)
ScalingVectors ApplyRescaling(const RescalingOptions &rescaling_options, ShardedQuadraticProgram &sharded_qp)
double Norm(const VectorXd &vector, const Sharder &sharder)
Definition sharder.cc:253
void AssignVector(const VectorXd &vec, const Sharder &sharder, VectorXd &dest)
Definition sharder.cc:204
STL namespace.
int64_t weight
Definition pack.cc:510
const Variable x
Definition qp_tests.cc:127
std::vector< double > lower_bounds
Definition lp_utils.cc:746
std::vector< double > upper_bounds
Definition lp_utils.cc:747
double smallest
The smallest absolute value of the finite non-zero values.
int64_t num_zero
double l2_norm
The L2 norm of the finite values.
double average
The average absolute value of the finite values.
int64_t num_finite_nonzero
double largest
The largest absolute value of the finite non-zero values.
VectorInfo row_norms
int64_t num_infinite
VectorInfo col_norms
std::optional< Eigen::DiagonalMatrix< double, Eigen::Dynamic > > objective_matrix
Eigen::SparseMatrix< double, Eigen::ColMajor, int64_t > constraint_matrix