Google OR-Tools v9.11
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
trust_region.cc
Go to the documentation of this file.
1// Copyright 2010-2024 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
15
16#include <algorithm>
17#include <cmath>
18#include <cstdint>
19#include <limits>
20#include <optional>
21#include <utility>
22#include <vector>
23
24#include "Eigen/Core"
25#include "absl/log/check.h"
32
34
35using ::Eigen::VectorXd;
36
37namespace {
38
39// The functions in this file that are templated on a `TrustRegionProblem` use
40// the templated class to specify the following trust-region problem with bound
41// constraints:
42// min_x Objective^T * (x - CenterPoint)
43// s.t. LowerBound <= x <= UpperBound
44// || x - Centerpoint ||_W <= target_radius
45// where ||y||_W = sqrt(sum_i NormWeight[i] * y[i]^2)
46// The templated `TrustRegionProblem` type should provide methods:
47// `double Objective(int64_t index) const;`
48// `double LowerBound(int64_t index) const;`
49// `double UpperBound(int64_t index) const;`
50// `double CenterPoint(int64_t index) const;`
51// `double NormWeight(int64_t index) const;`
52// which give the values of the corresponding terms in the problem
53// specification. See `VectorTrustRegionProblem` for an example. The
54// *TrustRegionProblem classes below implement several instances of
55// `TrustRegionProblem`.
56// On the other hand, the functions that are templated on a
57// `DiagonalTrustRegionProblem` use the templated class to specify the following
58// trust-region problem with bound constraints:
59// min_x (1 / 2) * (x - CenterPoint)^T * ObjectiveMatrix * (x - CenterPoint)
60// + Objective^T * (x - CenterPoint)
61// s.t. LowerBound <= x <= UpperBound
62// || x - CenterPoint ||_W <= target_radius,
63// where ||y||_W = sqrt(sum_i NormWeight[i] * y[i]^2) and ObjectiveMatrix is
64// assumed to be a diagonal matrix with nonnegative entries. Templated
65// `DiagonalTrustRegionProblem` types should provide all the methods provided by
66// templated `TrustRegionProblem` types, as well as:
67// `double ObjectiveMatrixDiagonalAt(int64_t index) const;`
68// which gives the value of the objective matrix diagonal at a specified index.
69// See `DiagonalTrustRegionProblemFromQp` for an example that sets up the
70// diagonal trust region problem from an existing `ShardedQuadraticProgram`.
71
72// `VectorTrustRegionProblem` uses explicit vectors to define the trust region
73// problem. It captures const references to the vectors used in the constructor,
74// which should outlive the class instance.
75class VectorTrustRegionProblem {
76 public:
77 VectorTrustRegionProblem(const VectorXd* objective,
78 const VectorXd* lower_bound,
79 const VectorXd* upper_bound,
80 const VectorXd* center_point,
81 const VectorXd* norm_weight)
82 : objective_(*objective),
83 lower_bound_(*lower_bound),
84 upper_bound_(*upper_bound),
85 center_point_(*center_point),
86 norm_weight_(*norm_weight) {}
87 double Objective(int64_t index) const { return objective_(index); }
88 double LowerBound(int64_t index) const { return lower_bound_(index); }
89 double UpperBound(int64_t index) const { return upper_bound_(index); }
90 double CenterPoint(int64_t index) const { return center_point_(index); }
91 double NormWeight(int64_t index) const { return norm_weight_(index); }
92
93 private:
94 const VectorXd& objective_;
95 const VectorXd& lower_bound_;
96 const VectorXd& upper_bound_;
97 const VectorXd& center_point_;
98 const VectorXd& norm_weight_;
99};
100
101// `JointTrustRegionProblem` defines the joint primal/dual trust region problem
102// given a `QuadraticProgram`, primal and dual solutions, primal and dual
103// gradients, and the primal weight. The joint problem (implicitly) concatenates
104// the primal and dual vectors. The class captures const references to the
105// constructor arguments (except `primal_weight`), which should outlive the
106// class instance.
107// The corresponding trust region problem is
108// min `primal_gradient`^T * (x - `primal_solution`)
109// - `dual_gradient`^T * (y - `dual_solution`)
110// s.t. `qp.variable_lower_bounds` <= x <= `qp.variable_upper_bounds`
111// `qp`.implicit_dual_lower_bounds <= y <= `qp`.implicit_dual_upper_bounds
112// || (x, y) - (`primal_solution`, `dual_solution`) ||_2 <= `target_radius`
113// where the implicit dual bounds are those given in
114// https://developers.google.com/optimization/lp/pdlp_math#dual_variable_bounds
115class JointTrustRegionProblem {
116 public:
117 JointTrustRegionProblem(const QuadraticProgram* qp,
118 const VectorXd* primal_solution,
119 const VectorXd* dual_solution,
120 const VectorXd* primal_gradient,
121 const VectorXd* dual_gradient,
122 const double primal_weight)
123 : qp_(*qp),
124 primal_size_(qp_.variable_lower_bounds.size()),
125 primal_solution_(*primal_solution),
126 dual_solution_(*dual_solution),
127 primal_gradient_(*primal_gradient),
128 dual_gradient_(*dual_gradient),
129 primal_weight_(primal_weight) {}
130 double Objective(int64_t index) const {
131 return index < primal_size_ ? primal_gradient_[index]
132 : -dual_gradient_[index - primal_size_];
133 }
134 double LowerBound(int64_t index) const {
135 return index < primal_size_ ? qp_.variable_lower_bounds[index]
136 : std::isfinite(qp_.constraint_upper_bounds[index - primal_size_])
137 ? -std::numeric_limits<double>::infinity()
138 : 0.0;
139 }
140 double UpperBound(int64_t index) const {
141 return index < primal_size_ ? qp_.variable_upper_bounds[index]
142 : std::isfinite(qp_.constraint_lower_bounds[index - primal_size_])
143 ? std::numeric_limits<double>::infinity()
144 : 0.0;
145 }
146 double CenterPoint(int64_t index) const {
147 return index < primal_size_ ? primal_solution_[index]
148 : dual_solution_[index - primal_size_];
149 }
150 double NormWeight(int64_t index) const {
151 return index < primal_size_ ? 0.5 * primal_weight_ : 0.5 / primal_weight_;
152 }
153
154 private:
155 const QuadraticProgram& qp_;
156 const int64_t primal_size_;
157 const VectorXd& primal_solution_;
158 const VectorXd& dual_solution_;
159 const VectorXd& primal_gradient_;
160 const VectorXd& dual_gradient_;
161 const double primal_weight_;
162};
163
164struct TrustRegionResultStepSize {
165 // The step_size of the solution.
167 // The value objective_vector^T * (solution - center_point).
169};
170
171// `problem` is sharded according to `sharder`. Within each shard, this function
172// selects the subset of elements corresponding to
173// `indexed_components_by_shard`, and takes the median of the critical step
174// sizes of these elements, producing an array A of shard medians. Then returns
175// the median of the array A. CHECK-fails if `indexed_components_by_shard` is
176// empty for all shards.
177template <typename TrustRegionProblem>
178double MedianOfShardMedians(
179 const TrustRegionProblem& problem,
180 const std::vector<std::vector<int64_t>>& indexed_components_by_shard,
181 const Sharder& sharder) {
182 std::vector<std::optional<double>> shard_medians(sharder.NumShards(),
183 std::nullopt);
184 sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
185 const auto& indexed_shard_components =
186 indexed_components_by_shard[shard.Index()];
187 if (!indexed_shard_components.empty()) {
188 shard_medians[shard.Index()] = internal::EasyMedian(
189 indexed_shard_components, [&](const int64_t index) {
190 return internal::CriticalStepSize(problem, index);
191 });
192 }
193 });
194 std::vector<double> non_empty_medians;
195 for (const auto& median : shard_medians) {
196 if (median.has_value()) {
197 non_empty_medians.push_back(*median);
198 }
199 }
200 CHECK(!non_empty_medians.empty());
201 return internal::EasyMedian(non_empty_medians,
202 [](const double x) { return x; });
203}
204
205struct InitialState {
206 std::vector<std::vector<int64_t>> undecided_components_by_shard;
208};
209
210template <typename TrustRegionProblem>
211InitialState ComputeInitialState(const TrustRegionProblem& problem,
212 const Sharder& sharder) {
213 InitialState result;
214 result.undecided_components_by_shard.resize(sharder.NumShards());
215 result.radius_coefficient_of_decided_components =
216 sharder.ParallelSumOverShards([&](const Sharder::Shard& shard) {
217 const int64_t shard_start = sharder.ShardStart(shard.Index());
218 const int64_t shard_size = sharder.ShardSize(shard.Index());
220 problem, shard_start, shard_start + shard_size,
221 result.undecided_components_by_shard[shard.Index()]);
222 });
223 return result;
224}
225
226template <typename TrustRegionProblem>
228 const TrustRegionProblem& problem, const double step_size,
229 const Sharder& sharder,
230 const std::vector<std::vector<int64_t>>& undecided_components_by_shard) {
231 return sharder.ParallelSumOverShards([&](const Sharder::Shard& shard) {
233 problem, step_size, undecided_components_by_shard[shard.Index()]);
234 });
235}
236
237template <typename TrustRegionProblem>
239 const TrustRegionProblem& problem, const double step_size_threshold,
240 const Sharder& sharder,
241 std::vector<std::vector<int64_t>>& undecided_components_by_shard) {
242 return sharder.ParallelSumOverShards([&](const Sharder::Shard& shard) {
244 problem, step_size_threshold,
245 undecided_components_by_shard[shard.Index()]);
246 });
247}
248
249template <typename TrustRegionProblem>
251 const TrustRegionProblem& problem, const double step_size_threshold,
252 const Sharder& sharder,
253 std::vector<std::vector<int64_t>>& undecided_components_by_shard) {
254 return sharder.ParallelSumOverShards([&](const Sharder::Shard& shard) {
256 problem, step_size_threshold,
257 undecided_components_by_shard[shard.Index()]);
258 });
259}
260
261int64_t NumUndecidedComponents(
262 const std::vector<std::vector<int64_t>>& undecided_components_by_shard) {
263 int64_t num_undecided_components = 0;
264 for (const auto& undecided_components : undecided_components_by_shard) {
265 num_undecided_components += undecided_components.size();
266 }
267 return num_undecided_components;
268}
269
270int64_t MaxUndecidedComponentsInAnyShard(
271 const std::vector<std::vector<int64_t>>& undecided_components_by_shard) {
272 int64_t max = 0;
273 for (const auto& undecided_components : undecided_components_by_shard) {
274 max = std::max<int64_t>(max, undecided_components.size());
275 }
276 return max;
277}
278
279template <typename TrustRegionProblem>
280VectorXd ComputeSolution(const TrustRegionProblem& problem,
281 const double step_size, const Sharder& sharder) {
282 VectorXd solution(sharder.NumElements());
283 sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
284 const int64_t shard_start = sharder.ShardStart(shard.Index());
285 const int64_t shard_size = sharder.ShardSize(shard.Index());
286 for (int64_t index = shard_start; index < shard_start + shard_size;
287 ++index) {
288 solution[index] = internal::ProjectedValue(problem, index, step_size);
289 }
290 });
291 return solution;
292}
293
294template <typename TrustRegionProblem>
295double ComputeObjectiveValue(const TrustRegionProblem& problem,
296 const double step_size, const Sharder& sharder) {
297 return sharder.ParallelSumOverShards([&](const Sharder::Shard& shard) {
298 const int64_t shard_start = sharder.ShardStart(shard.Index());
299 const int64_t shard_size = sharder.ShardSize(shard.Index());
300 double shard_value = 0.0;
301 for (int64_t index = shard_start; index < shard_start + shard_size;
302 ++index) {
303 shard_value += problem.Objective(index) *
304 (internal::ProjectedValue(problem, index, step_size) -
305 problem.CenterPoint(index));
306 }
307 return shard_value;
308 });
309}
310
311// Solves the following trust-region problem with bound constraints:
312// min_x Objective^T * (x - CenterPoint)
313// s.t. LowerBound <= x <= UpperBound
314// || x - Centerpoint ||_W <= target_radius
315// where ||y||_W = sqrt(sum_i NormWeight[i] * y[i]^2)
316// given by a `TrustRegionProblem` (see description at the top of this file),
317// using an exact linear-time method. The size of `sharder` is used to determine
318// the size of the problem. Assumes that there is always a feasible solution,
319// that is, that `problem.LowerBound(i)` <= `problem.CenterPoint(i)` <=
320// `problem.UpperBound(i)`, and that `problem.NormWeight(i) > 0`, for
321// 0 <= i < `sharder.NumElements()`.
322//
323// The linear-time method is based on the observation that the optimal x will be
324// of the form x(delta) =
325// proj(center_point - delta * objective_vector / norm_weights, bounds)
326// for some delta such that || x(delta) - center_point ||_W = target_radius
327// (except for corner cases where the radius constraint is inactive) and the
328// vector division is element-wise. Therefore we find the critical threshold for
329// each coordinate, and repeatedly: (1) take the median delta, (2) check the
330// corresponding radius, and (3) eliminate half of the data points from
331// consideration.
332template <typename TrustRegionProblem>
333TrustRegionResultStepSize SolveTrustRegionStepSize(
334 const TrustRegionProblem& problem, const double target_radius,
335 const Sharder& sharder) {
336 CHECK_GE(target_radius, 0.0);
337
338 const bool norm_weights_are_positive =
339 sharder.ParallelTrueForAllShards([&](const Sharder::Shard& shard) {
340 const int64_t shard_start = sharder.ShardStart(shard.Index());
341 const int64_t shard_size = sharder.ShardSize(shard.Index());
342 for (int64_t index = shard_start; index < shard_start + shard_size;
343 ++index) {
344 if (problem.NormWeight(index) <= 0.0) return false;
345 }
346 return true;
347 });
348 CHECK(norm_weights_are_positive);
349
350 if (target_radius == 0.0) {
351 return {.solution_step_size = 0.0, .objective_value = 0.0};
352 }
353
354 const bool objective_is_all_zeros =
355 sharder.ParallelTrueForAllShards([&](const Sharder::Shard& shard) {
356 const int64_t shard_start = sharder.ShardStart(shard.Index());
357 const int64_t shard_size = sharder.ShardSize(shard.Index());
358 for (int64_t index = shard_start; index < shard_start + shard_size;
359 ++index) {
360 if (problem.Objective(index) != 0.0) return false;
361 }
362 return true;
363 });
364 if (objective_is_all_zeros) {
365 return {.solution_step_size = 0.0, .objective_value = 0.0};
366 }
367
368 InitialState initial_state = ComputeInitialState(problem, sharder);
369
370 // The contribution to the weighted radius squared from the variables that we
371 // know are at their bounds in the solution.
372 double fixed_radius_squared = 0.0;
373
374 // This value times step_size^2 gives the contribution to the weighted radius
375 // squared from the variables determined in the solution by the formula
376 // center_point - step_size * objective / norm_weights. These variables are
377 // not at their bounds in the solution, except in degenerate cases.
378 double variable_radius_coefficient =
379 initial_state.radius_coefficient_of_decided_components;
380
381 // For each shard, the components of the variables that aren't accounted for
382 // in `fixed_radius_squared` or `variable_radius_coefficient`, i.e., we don't
383 // know if they're at their bounds in the solution.
384 std::vector<std::vector<int64_t>> undecided_components_by_shard(
385 std::move(initial_state.undecided_components_by_shard));
386
387 // These are counters for the number of variables we inspect overall during
388 // the solve, including in the initialization. The "worst case" accounts for
389 // imbalance across the shards by charging each round for the maximum number
390 // of elements in a shard, because shards with fewer elements may correspond
391 // to idle threads.
392 int64_t actual_elements_seen = sharder.NumElements();
393 int64_t worst_case_elements_seen = sharder.NumElements();
394
395 while (NumUndecidedComponents(undecided_components_by_shard) > 0) {
396 worst_case_elements_seen +=
397 MaxUndecidedComponentsInAnyShard(undecided_components_by_shard) *
398 sharder.NumShards();
399 actual_elements_seen +=
400 NumUndecidedComponents(undecided_components_by_shard);
401
402 const double step_size_threshold =
403 MedianOfShardMedians(problem, undecided_components_by_shard, sharder);
404 const double radius_squared_of_undecided_components =
406 problem, /*step_size=*/step_size_threshold, sharder,
408
409 const double radius_squared_at_threshold =
410 radius_squared_of_undecided_components + fixed_radius_squared +
411 variable_radius_coefficient * MathUtil::Square(step_size_threshold);
412
413 if (radius_squared_at_threshold > MathUtil::Square(target_radius)) {
414 variable_radius_coefficient += RemoveCriticalStepsAboveThreshold(
415 problem, step_size_threshold, sharder, undecided_components_by_shard);
416 } else {
417 fixed_radius_squared += RemoveCriticalStepsBelowThreshold(
418 problem, step_size_threshold, sharder, undecided_components_by_shard);
419 }
420 }
421 VLOG(1) << "Total passes through variables: "
422 << actual_elements_seen / static_cast<double>(sharder.NumElements());
423 VLOG(1) << "Theoretical slowdown because of shard imbalance: "
424 << static_cast<double>(worst_case_elements_seen) /
425 actual_elements_seen -
426 1.0;
427
428 // Now that we know exactly which variables are fixed at their bounds,
429 // compute the step size that will give us the exact target radius.
430 // This is the solution to: `fixed_radius_squared` +
431 // `variable_radius_coefficient` * `step_size`^2 == `target_radius`^2.
432 double step_size = 0.0;
433 if (variable_radius_coefficient > 0.0) {
434 step_size =
435 std::sqrt((MathUtil::Square(target_radius) - fixed_radius_squared) /
436 variable_radius_coefficient);
437 } else {
438 // All variables are fixed at their bounds. So we can take a very large
439 // finite step. We don't use infinity as the step in order to avoid 0 *
440 // infinity = NaN when zeros are present in the objective vector. It's ok if
441 // the result of step_size * objective_vector has infinity components
442 // because these are projected correctly to bounds.
443 step_size = std::numeric_limits<double>::max();
444 }
445
446 return {
447 .solution_step_size = step_size,
448 .objective_value = ComputeObjectiveValue(problem, step_size, sharder)};
449}
450
451} // namespace
452
453TrustRegionResult SolveTrustRegion(const VectorXd& objective_vector,
454 const VectorXd& variable_lower_bounds,
455 const VectorXd& variable_upper_bounds,
456 const VectorXd& center_point,
457 const VectorXd& norm_weights,
458 const double target_radius,
459 const Sharder& sharder) {
460 VectorTrustRegionProblem problem(&objective_vector, &variable_lower_bounds,
461 &variable_upper_bounds, &center_point,
462 &norm_weights);
463 TrustRegionResultStepSize solution =
464 SolveTrustRegionStepSize(problem, target_radius, sharder);
465 return TrustRegionResult{
466 .solution_step_size = solution.solution_step_size,
467 .objective_value = solution.objective_value,
468 .solution =
469 ComputeSolution(problem, solution.solution_step_size, sharder),
470 };
471}
472
473// A generic trust region problem of the form:
474// min_{x} (1 / 2) * (x - `center_point`)^T Q (x - `center_point`)
475// + c^T (x - `center_point`)
476// s.t. `lower_bounds` <= (x - `center_point`) <= `upper_bounds`
477// ||x - `center_point`||_W <= radius
478// where ||z||_W = sqrt(sum_i w_i z_i^2) is a weighted Euclidean norm.
479// It is assumed that the objective matrix Q is a nonnegative diagonal matrix.
481 public:
482 // A reference to the objects passed in the constructor is kept, so they must
483 // outlive the `DiagonalTrustRegionProblem` instance.
484 DiagonalTrustRegionProblem(const VectorXd* objective_vector,
485 const VectorXd* objective_matrix_diagonal,
486 const VectorXd* lower_bounds,
487 const VectorXd* upper_bounds,
488 const VectorXd* center_point,
489 const VectorXd* norm_weights)
490 : objective_vector_(*objective_vector),
491 objective_matrix_diagonal_(*objective_matrix_diagonal),
492 variable_lower_bounds_(*lower_bounds),
493 variable_upper_bounds_(*upper_bounds),
494 center_point_(*center_point),
495 norm_weight_(*norm_weights) {}
496
497 double CenterPoint(int64_t index) const { return center_point_[index]; }
498
499 double NormWeight(int64_t index) const { return norm_weight_[index]; }
500
501 double LowerBound(int64_t index) const {
502 return variable_lower_bounds_[index];
503 }
504
505 double UpperBound(int64_t index) const {
506 return variable_upper_bounds_[index];
507 }
508
509 double Objective(int64_t index) const { return objective_vector_[index]; }
510
511 double ObjectiveMatrixDiagonalAt(int64_t index) const {
512 return objective_matrix_diagonal_[index];
513 }
514
515 private:
516 const VectorXd& objective_vector_;
517 const VectorXd& objective_matrix_diagonal_;
518 const VectorXd& variable_lower_bounds_;
519 const VectorXd& variable_upper_bounds_;
520 const VectorXd& center_point_;
521 const VectorXd& norm_weight_;
522};
523
524// `DiagonalTrustRegionProblemFromQp` accepts a diagonal quadratic program and
525// information about the current solution and gradient and sets up the following
526// trust-region subproblem:
527// min_{x, y} (x - `primal_solution`)^T Q (x - `primal_solution`)
528// + `primal_gradient`^T (x - `primal_solution`)
529// - `dual_gradient`^T (y - `dual_solution`)
530// s.t. l <= x - `primal_solution` <= u
531// l_implicit <= y - `dual_solution` <= u_implicit
532// ||(x, y) - (`primal_solution`, `dual_solution`)||_W <= r,
533// where
534// ||(x, y)||_W = sqrt(0.5 * `primal_weight` ||x||^2 +
535// (0.5 / `primal_weight`) ||y||^2).
536// This class implements the same methods as `DiagonalTrustRegionProblem`, but
537// without the need to explicitly copy vectors.
538class DiagonalTrustRegionProblemFromQp {
539 public:
540 // A reference to the objects passed in the constructor is kept, so they must
541 // outlive the `DiagonalTrustRegionProblemFromQp` instance.
542 DiagonalTrustRegionProblemFromQp(const QuadraticProgram* qp,
543 const VectorXd* primal_solution,
544 const VectorXd* dual_solution,
545 const VectorXd* primal_gradient,
546 const VectorXd* dual_gradient,
547 const double primal_weight)
548 : qp_(*qp),
549 primal_solution_(*primal_solution),
550 dual_solution_(*dual_solution),
551 primal_gradient_(*primal_gradient),
552 dual_gradient_(*dual_gradient),
553 primal_size_(primal_solution->size()),
554 primal_weight_(primal_weight) {}
555
556 double CenterPoint(int64_t index) const {
557 return (index < primal_size_) ? primal_solution_[index]
558 : dual_solution_[index - primal_size_];
559 }
560
561 double NormWeight(int64_t index) const {
562 return (index < primal_size_) ? 0.5 * primal_weight_ : 0.5 / primal_weight_;
563 }
564
565 double LowerBound(int64_t index) const {
566 if (index < primal_size_) {
567 return qp_.variable_lower_bounds[index];
568 } else {
569 return std::isfinite(qp_.constraint_upper_bounds[index - primal_size_])
570 ? -std::numeric_limits<double>::infinity()
571 : 0.0;
572 }
573 }
574
575 double UpperBound(int64_t index) const {
576 if (index < primal_size_) {
577 return qp_.variable_upper_bounds[index];
578 } else {
579 return std::isfinite(qp_.constraint_lower_bounds[index - primal_size_])
580 ? std::numeric_limits<double>::infinity()
581 : 0.0;
582 }
583 }
584
585 double Objective(int64_t index) const {
586 return (index < primal_size_) ? primal_gradient_[index]
587 : -dual_gradient_[index - primal_size_];
588 }
589
590 double ObjectiveMatrixDiagonalAt(int64_t index) const {
591 if (qp_.objective_matrix.has_value()) {
592 return (index < primal_size_) ? qp_.objective_matrix->diagonal()[index]
593 : 0.0;
594 } else {
595 return 0.0;
596 }
597 }
598
599 private:
600 const QuadraticProgram& qp_;
601 const VectorXd& primal_solution_;
602 const VectorXd& dual_solution_;
603 const VectorXd& primal_gradient_;
604 const VectorXd& dual_gradient_;
605 const int64_t primal_size_;
606 const double primal_weight_;
607};
608
609// Computes a single coordinate projection of the scaled difference,
610// sqrt(NormWeight(i)) * (x[i] - CenterPoint(i)), to the corresponding box
611// constraints. As a function of scaling_factor, the difference is equal to
612// (Q[i, i] / NormWeight(i)) + `scaling_factor`)^{-1} *
613// (-c[i] / sqrt(NormWeight(i))),
614// where Q, c are the objective matrix and vector, respectively.
615template <typename DiagonalTrustRegionProblem>
617 const DiagonalTrustRegionProblem& problem, const int64_t index,
618 const double scaling_factor) {
619 const double weight = problem.NormWeight(index);
620 return std::min(
621 std::max((-problem.Objective(index) / std::sqrt(weight)) /
623 scaling_factor),
624 std::sqrt(weight) *
625 (problem.LowerBound(index) - problem.CenterPoint(index))),
626 std::sqrt(weight) *
627 (problem.UpperBound(index) - problem.CenterPoint(index)));
628}
629
630// Computes the norm of the projection of the difference vector,
631// x - center_point, to the corresponding box constraints. We are using the
632// standard Euclidean norm (instead of the weighted norm) because the solver
633// implicitly reformulates the problem to one with a Euclidean ball constraint
634// first.
635template <typename DiagonalTrustRegionProblem>
637 const Sharder& sharder,
638 const double scaling_factor) {
639 const double squared_norm =
640 sharder.ParallelSumOverShards([&](const Sharder::Shard& shard) {
641 const int64_t shard_start = sharder.ShardStart(shard.Index());
642 const int64_t shard_end =
643 shard_start + sharder.ShardSize(shard.Index());
644 double sum = 0.0;
645 for (int64_t i = shard_start; i < shard_end; ++i) {
646 const double projected_coordinate =
647 ProjectedValueOfScaledDifference(problem, i, scaling_factor);
648 sum += MathUtil::Square(projected_coordinate);
649 }
650 return sum;
651 });
652 return std::sqrt(squared_norm);
653}
654
655// Finds an approximately optimal scaling factor for the solution of the trust
656// region subproblem, which can be passed on to `ProjectedCoordinate()` to find
657// an approximately optimal solution to the trust region subproblem. The value
658// returned is guaranteed to be within `solve_tol` * max(1, s*) of the optimal
659// scaling s*.
660// TODO(user): figure out what accuracy is useful to callers and redo the
661// stopping criterion accordingly.
662template <typename DiagonalTrustRegionProblem>
664 const Sharder& sharder, const double target_radius,
665 const double solve_tol) {
666 // Determine a search interval using monotonicity of the squared norm of the
667 // candidate solution with respect to the scaling factor.
668 double scaling_factor_lower_bound = 0.0;
669 double scaling_factor_upper_bound = 1.0;
670 while (NormOfDeltaProjection(problem, sharder, scaling_factor_upper_bound) >=
671 target_radius) {
672 scaling_factor_lower_bound = scaling_factor_upper_bound;
673 scaling_factor_upper_bound *= 2;
674 }
675 // Invariant: `scaling_factor_upper_bound >= scaling_factor_lower_bound`.
676 while ((scaling_factor_upper_bound - scaling_factor_lower_bound) >=
677 solve_tol * std::max(1.0, scaling_factor_lower_bound)) {
678 const double middle =
679 (scaling_factor_lower_bound + scaling_factor_upper_bound) / 2.0;
680 // Norm is monotonically non-increasing as a function of scaling_factor.
681 if (NormOfDeltaProjection(problem, sharder, middle) <= target_radius) {
682 scaling_factor_upper_bound = middle;
683 } else {
684 scaling_factor_lower_bound = middle;
685 }
686 }
687 return (scaling_factor_upper_bound + scaling_factor_lower_bound) / 2.0;
688}
689
690// Solves the diagonal trust region problem using a binary search algorithm.
691// See comment above `SolveDiagonalTrustRegion()` in trust_region.h for the
692// meaning of `solve_tol`.
693template <typename DiagonalTrustRegionProblem>
695 const DiagonalTrustRegionProblem& problem, const Sharder& sharder,
696 const double target_radius, const double solve_tol) {
697 CHECK_GE(target_radius, 0.0);
698 const bool norm_weights_are_positive =
699 sharder.ParallelTrueForAllShards([&](const Sharder::Shard& shard) {
700 const int64_t shard_start = sharder.ShardStart(shard.Index());
701 for (int64_t i = shard_start;
702 i < shard_start + sharder.ShardSize(shard.Index()); ++i) {
703 if (problem.NormWeight(i) <= 0) {
704 return false;
705 }
706 }
707 return true;
708 });
709 CHECK(norm_weights_are_positive);
710 if (target_radius == 0.0) {
711 VectorXd solution(sharder.NumElements());
712 sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
713 const int64_t shard_start = sharder.ShardStart(shard.Index());
714 const int64_t shard_size = sharder.ShardSize(shard.Index());
715 for (int64_t i = shard_start; i < shard_start + shard_size; ++i) {
716 solution[i] = problem.CenterPoint(i);
717 }
718 });
719 return {.solution_step_size = 0.0,
720 .objective_value = 0.0,
721 .solution = std::move(solution)};
722 }
723 const double optimal_scaling =
724 FindScalingFactor(problem, sharder, target_radius, solve_tol);
725 VectorXd solution(sharder.NumElements());
726 sharder.ParallelForEachShard([&](const Sharder::Shard& shard) {
727 const int64_t shard_start = sharder.ShardStart(shard.Index());
728 const int64_t shard_size = sharder.ShardSize(shard.Index());
729 for (int64_t i = shard_start; i < shard_start + shard_size; ++i) {
730 const double weight = problem.NormWeight(i);
731 const double projected_value =
732 ProjectedValueOfScaledDifference(problem, i, optimal_scaling);
733 solution[i] =
734 problem.CenterPoint(i) + std::sqrt(1 / weight) * projected_value;
735 }
736 });
737 const double final_objective_value =
738 sharder.ParallelSumOverShards([&](const Sharder::Shard& shard) {
739 double local_sum = 0.0;
740 const int64_t shard_start = sharder.ShardStart(shard.Index());
741 for (int64_t i = shard_start;
742 i < shard_start + sharder.ShardSize(shard.Index()); ++i) {
743 const double diff = solution[i] - problem.CenterPoint(i);
744 local_sum +=
745 0.5 * diff * problem.ObjectiveMatrixDiagonalAt(i) * diff +
746 diff * problem.Objective(i);
747 }
748 return local_sum;
749 });
750 return {.solution_step_size = optimal_scaling,
751 .objective_value = final_objective_value,
752 .solution = solution};
753}
754
756 const VectorXd& objective_vector, const VectorXd& objective_matrix_diagonal,
757 const VectorXd& variable_lower_bounds,
758 const VectorXd& variable_upper_bounds, const VectorXd& center_point,
759 const VectorXd& norm_weights, const double target_radius,
760 const Sharder& sharder, const double solve_tolerance) {
762 &objective_vector, &objective_matrix_diagonal, &variable_lower_bounds,
763 &variable_upper_bounds, &center_point, &norm_weights);
764 return SolveDiagonalTrustRegionProblem(problem, sharder, target_radius,
765 solve_tolerance);
766}
767
769 const ShardedQuadraticProgram& sharded_qp, const VectorXd& primal_solution,
770 const VectorXd& dual_solution, const VectorXd& primal_gradient,
771 const VectorXd& dual_gradient, const double primal_weight,
772 double target_radius, const double solve_tolerance) {
773 const int64_t problem_size = sharded_qp.PrimalSize() + sharded_qp.DualSize();
774 DiagonalTrustRegionProblemFromQp problem(&sharded_qp.Qp(), &primal_solution,
775 &dual_solution, &primal_gradient,
776 &dual_gradient, primal_weight);
777
778 const Sharder joint_sharder(sharded_qp.PrimalSharder(), problem_size);
779 const bool norm_weights_are_positive =
780 joint_sharder.ParallelTrueForAllShards([&](const Sharder::Shard& shard) {
781 const int64_t shard_start = joint_sharder.ShardStart(shard.Index());
782 for (int64_t i = shard_start;
783 i < shard_start + joint_sharder.ShardSize(shard.Index()); ++i) {
784 if (problem.NormWeight(i) <= 0) {
785 return false;
786 }
787 }
788 return true;
789 });
790 CHECK(norm_weights_are_positive);
791 return SolveDiagonalTrustRegionProblem(problem, joint_sharder, target_radius,
792 solve_tolerance);
793}
794
795namespace {
796
797struct MaxNormBoundResult {
798 // `LagrangianPart::value` from `ComputePrimalGradient` and
799 // `ComputeDualGradient`, respectively.
801 // For the primal, the value
802 // ∇_x L(primal_solution, dual_solution)^T (x^* - primal_solution) where
803 // x^* is the solution of the primal trust region subproblem.
804 // For the dual, the value
805 // ∇_y L(primal_solution, dual_solution)^T (y^* - dual_solution) where
806 // y^* is the solution of the dual trust region subproblem.
807 // This will be a non-positive value for the primal and a non-negative
808 // value for the dual.
810};
811
812MaxNormBoundResult ComputeMaxNormPrimalTrustRegionBound(
813 const ShardedQuadraticProgram& sharded_qp, const VectorXd& primal_solution,
814 const double primal_radius, const VectorXd& dual_product) {
815 LagrangianPart primal_part =
816 ComputePrimalGradient(sharded_qp, primal_solution, dual_product);
817 internal::PrimalTrustRegionProblem primal_problem(
818 &sharded_qp.Qp(), &primal_solution, &primal_part.gradient);
819 TrustRegionResultStepSize trust_region_result = SolveTrustRegionStepSize(
820 primal_problem, primal_radius, sharded_qp.PrimalSharder());
821 return {.part_of_lagrangian_value = primal_part.value,
822 .trust_region_objective_delta = trust_region_result.objective_value};
823}
824
825MaxNormBoundResult ComputeMaxNormDualTrustRegionBound(
826 const ShardedQuadraticProgram& sharded_qp, const VectorXd& dual_solution,
827 const double dual_radius, const VectorXd& primal_product) {
828 LagrangianPart dual_part =
829 ComputeDualGradient(sharded_qp, dual_solution, primal_product);
830 internal::DualTrustRegionProblem dual_problem(
831 &sharded_qp.Qp(), &dual_solution, &dual_part.gradient);
832 TrustRegionResultStepSize trust_region_result = SolveTrustRegionStepSize(
833 dual_problem, dual_radius, sharded_qp.DualSharder());
834 return {.part_of_lagrangian_value = dual_part.value,
835 .trust_region_objective_delta = -trust_region_result.objective_value};
836}
837
838// Returns the largest radius that the primal could move (in Euclidean distance)
839// to match `weighted_distance`. This is the largest value of ||x||_2 such
840// that there exists a y such that max{||x||_P, ||y||_D} <= `weighted_distance`.
841double MaximumPrimalDistanceGivenWeightedDistance(
842 const double weighted_distance, const double primal_weight) {
843 return std::sqrt(2) * weighted_distance / std::sqrt(primal_weight);
844}
845
846// Returns the largest radius that the dual could move (in Euclidean distance)
847// to match `weighted_distance`. This is the largest value of ||y||_2 such
848// that there exists an x such that max{||x||_P, ||y||_D} <=
849// `weighted_distance`.
850double MaximumDualDistanceGivenWeightedDistance(const double weighted_distance,
851 const double primal_weight) {
852 return std::sqrt(2) * weighted_distance * std::sqrt(primal_weight);
853}
854
855LocalizedLagrangianBounds ComputeMaxNormLocalizedLagrangianBounds(
856 const ShardedQuadraticProgram& sharded_qp, const VectorXd& primal_solution,
857 const VectorXd& dual_solution, const double primal_weight,
858 const double radius, const Eigen::VectorXd& primal_product,
859 const Eigen::VectorXd& dual_product) {
860 const double primal_radius =
861 MaximumPrimalDistanceGivenWeightedDistance(radius, primal_weight);
862 const double dual_radius =
863 MaximumDualDistanceGivenWeightedDistance(radius, primal_weight);
864
865 // The max norm means that the optimization over the primal and the dual can
866 // be done independently.
867
868 MaxNormBoundResult primal_result = ComputeMaxNormPrimalTrustRegionBound(
869 sharded_qp, primal_solution, primal_radius, dual_product);
870
871 MaxNormBoundResult dual_result = ComputeMaxNormDualTrustRegionBound(
872 sharded_qp, dual_solution, dual_radius, primal_product);
873
874 const double lagrangian_value = primal_result.part_of_lagrangian_value +
875 dual_result.part_of_lagrangian_value;
876
877 return LocalizedLagrangianBounds{
878 .lagrangian_value = lagrangian_value,
879 .lower_bound =
880 lagrangian_value + primal_result.trust_region_objective_delta,
881 .upper_bound =
882 lagrangian_value + dual_result.trust_region_objective_delta,
883 .radius = radius};
884}
885
886LocalizedLagrangianBounds ComputeEuclideanNormLocalizedLagrangianBounds(
887 const ShardedQuadraticProgram& sharded_qp, const VectorXd& primal_solution,
888 const VectorXd& dual_solution, const double primal_weight,
889 const double radius, const Eigen::VectorXd& primal_product,
890 const Eigen::VectorXd& dual_product,
891 const bool use_diagonal_qp_trust_region_solver,
892 const double diagonal_qp_trust_region_solver_tolerance) {
893 const QuadraticProgram& qp = sharded_qp.Qp();
894 const LagrangianPart primal_part =
895 ComputePrimalGradient(sharded_qp, primal_solution, dual_product);
896 const LagrangianPart dual_part =
897 ComputeDualGradient(sharded_qp, dual_solution, primal_product);
898
899 VectorXd trust_region_solution;
900 const double lagrangian_value = primal_part.value + dual_part.value;
901
902 Sharder joint_sharder(
903 sharded_qp.PrimalSharder(),
904 /*num_elements=*/sharded_qp.PrimalSize() + sharded_qp.DualSize());
905
906 if (use_diagonal_qp_trust_region_solver) {
907 DiagonalTrustRegionProblemFromQp problem(
908 &qp, &primal_solution, &dual_solution, &primal_part.gradient,
909 &dual_part.gradient, primal_weight);
910
911 trust_region_solution = SolveDiagonalTrustRegionProblem(
912 problem, joint_sharder, radius,
913 diagonal_qp_trust_region_solver_tolerance)
914 .solution;
915 } else {
916 JointTrustRegionProblem joint_problem(&qp, &primal_solution, &dual_solution,
917 &primal_part.gradient,
918 &dual_part.gradient, primal_weight);
919
920 TrustRegionResultStepSize trust_region_result =
921 SolveTrustRegionStepSize(joint_problem, radius, joint_sharder);
922
923 trust_region_solution = ComputeSolution(
924 joint_problem, trust_region_result.solution_step_size, joint_sharder);
925 }
926
927 auto primal_trust_region_solution =
928 trust_region_solution.segment(0, sharded_qp.PrimalSize());
929 auto dual_trust_region_solution = trust_region_solution.segment(
930 sharded_qp.PrimalSize(), sharded_qp.DualSize());
931
932 // ∇_x L(`primal_solution`, `dual_solution`)^T (x - `primal_solution`)
933 double primal_objective_delta =
934 sharded_qp.PrimalSharder().ParallelSumOverShards(
935 [&](const Sharder::Shard& shard) {
936 return shard(primal_part.gradient)
937 .dot(shard(primal_trust_region_solution) -
938 shard(primal_solution));
939 });
940
941 // Take into account the quadratic's contribution if the diagonal QP solver
942 // is enabled.
943 if (use_diagonal_qp_trust_region_solver &&
944 sharded_qp.Qp().objective_matrix.has_value()) {
945 primal_objective_delta += sharded_qp.PrimalSharder().ParallelSumOverShards(
946 [&](const Sharder::Shard& shard) {
947 const int shard_start =
948 sharded_qp.PrimalSharder().ShardStart(shard.Index());
949 const int shard_size =
950 sharded_qp.PrimalSharder().ShardSize(shard.Index());
951 double sum = 0.0;
952 for (int i = shard_start; i < shard_start + shard_size; ++i) {
953 sum += 0.5 * sharded_qp.Qp().objective_matrix->diagonal()[i] *
954 MathUtil::Square(primal_trust_region_solution[i] -
955 primal_solution[i]);
956 }
957 return sum;
958 });
959 }
960
961 // ∇_y L(`primal_solution`, `dual_solution`)^T (y - `dual_solution`)
962 const double dual_objective_delta =
963 sharded_qp.DualSharder().ParallelSumOverShards(
964 [&](const Sharder::Shard& shard) {
965 return shard(dual_part.gradient)
966 .dot(shard(dual_trust_region_solution) - shard(dual_solution));
967 });
968
969 return LocalizedLagrangianBounds{
970 .lagrangian_value = lagrangian_value,
971 .lower_bound = lagrangian_value + primal_objective_delta,
972 .upper_bound = lagrangian_value + dual_objective_delta,
973 .radius = radius};
974}
975
976} // namespace
977
979 const ShardedQuadraticProgram& sharded_qp, const VectorXd& primal_solution,
980 const VectorXd& dual_solution, const PrimalDualNorm primal_dual_norm,
981 const double primal_weight, const double radius,
982 const VectorXd* primal_product, const VectorXd* dual_product,
983 const bool use_diagonal_qp_trust_region_solver,
984 const double diagonal_qp_trust_region_solver_tolerance) {
985 const QuadraticProgram& qp = sharded_qp.Qp();
986 VectorXd primal_product_storage;
987 VectorXd dual_product_storage;
988
989 if (primal_product == nullptr) {
990 primal_product_storage = TransposedMatrixVectorProduct(
993 primal_product = &primal_product_storage;
994 }
995 if (dual_product == nullptr) {
996 dual_product_storage =
998 sharded_qp.ConstraintMatrixSharder());
999 dual_product = &dual_product_storage;
1000 }
1001
1002 switch (primal_dual_norm) {
1004 return ComputeMaxNormLocalizedLagrangianBounds(
1005 sharded_qp, primal_solution, dual_solution, primal_weight, radius,
1006 *primal_product, *dual_product);
1008 return ComputeEuclideanNormLocalizedLagrangianBounds(
1009 sharded_qp, primal_solution, dual_solution, primal_weight, radius,
1010 *primal_product, *dual_product, use_diagonal_qp_trust_region_solver,
1011 diagonal_qp_trust_region_solver_tolerance);
1012 }
1013 LOG(FATAL) << "Unrecognized primal dual norm";
1014
1016}
1017
1018} // namespace operations_research::pdlp
IntegerValue size
int64_t max
static T Square(const T x)
Returns the square of x.
Definition mathutil.h:102
DiagonalTrustRegionProblem(const VectorXd *objective_vector, const VectorXd *objective_matrix_diagonal, const VectorXd *lower_bounds, const VectorXd *upper_bounds, const VectorXd *center_point, const VectorXd *norm_weights)
const Sharder & ConstraintMatrixSharder() const
Returns a Sharder intended for the columns of the QP's constraint matrix.
const Sharder & TransposedConstraintMatrixSharder() const
Returns a Sharder intended for the rows of the QP's constraint matrix.
const Eigen::SparseMatrix< double, Eigen::ColMajor, int64_t > & TransposedConstraintMatrix() const
Returns a reference to the transpose of the QP's constraint matrix.
const Sharder & PrimalSharder() const
Returns a Sharder intended for primal vectors.
bool ParallelTrueForAllShards(const std::function< bool(const Shard &)> &func) const
Definition sharder.cc:152
double ParallelSumOverShards(const std::function< double(const Shard &)> &func) const
Runs func on each of the shards and sums the results.
Definition sharder.cc:143
int64_t ShardSize(int shard) const
Definition sharder.h:186
void ParallelForEachShard(const std::function< void(const Shard &)> &func) const
Runs func on each of the shards.
Definition sharder.cc:109
int64_t NumElements() const
The number of elements that are split into shards.
Definition sharder.h:184
int64_t ShardStart(int shard) const
Definition sharder.h:192
double upper_bound
double lower_bound
int index
double solution
double LowerBound(const LinearExpression &linear_expression)
double UpperBound(const LinearExpression &linear_expression)
double ComputeInitialUndecidedComponents(const TrustRegionProblem &problem, int64_t start_index, int64_t end_index, std::vector< int64_t > &undecided_components)
double ProjectedValue(const TrustRegionProblem &problem, const int64_t index, const double step_size)
double RemoveCriticalStepsAboveThreshold(const TrustRegionProblem &problem, const double step_size_threshold, std::vector< int64_t > &undecided_components)
double CriticalStepSize(const TrustRegionProblem &problem, const int64_t index)
double RemoveCriticalStepsBelowThreshold(const TrustRegionProblem &problem, const double step_size_threshold, std::vector< int64_t > &undecided_components)
double EasyMedian(ArrayType array, ValueFunction value_function)
double RadiusSquaredOfUndecidedComponents(const TrustRegionProblem &problem, const double step_size, const std::vector< int64_t > &undecided_components)
Validation utilities for solvers.proto.
LagrangianPart ComputeDualGradient(const ShardedQuadraticProgram &sharded_qp, const VectorXd &dual_solution, const VectorXd &primal_product)
LagrangianPart ComputePrimalGradient(const ShardedQuadraticProgram &sharded_qp, const VectorXd &primal_solution, const VectorXd &dual_product)
TrustRegionResult SolveDiagonalTrustRegionProblem(const DiagonalTrustRegionProblem &problem, const Sharder &sharder, const double target_radius, const double solve_tol)
double NormOfDeltaProjection(const DiagonalTrustRegionProblem &problem, const Sharder &sharder, const double scaling_factor)
VectorXd TransposedMatrixVectorProduct(const Eigen::SparseMatrix< double, Eigen::ColMajor, int64_t > &matrix, const VectorXd &vector, const Sharder &sharder)
Definition sharder.cc:163
TrustRegionResult SolveDiagonalTrustRegion(const VectorXd &objective_vector, const VectorXd &objective_matrix_diagonal, const VectorXd &variable_lower_bounds, const VectorXd &variable_upper_bounds, const VectorXd &center_point, const VectorXd &norm_weights, const double target_radius, const Sharder &sharder, const double solve_tolerance)
TrustRegionResult SolveDiagonalQpTrustRegion(const ShardedQuadraticProgram &sharded_qp, const VectorXd &primal_solution, const VectorXd &dual_solution, const VectorXd &primal_gradient, const VectorXd &dual_gradient, const double primal_weight, double target_radius, const double solve_tolerance)
double FindScalingFactor(const DiagonalTrustRegionProblem &problem, const Sharder &sharder, const double target_radius, const double solve_tol)
@ kMaxNorm
The joint norm ||(x,y)||_PD = max{|x|_P, |y|_D}.
@ kEuclideanNorm
The joint norm (||(x,y)||_PD)^2 = (|x|_P)^2 + (|y|_D)^2.
TrustRegionResult SolveTrustRegion(const VectorXd &objective_vector, const VectorXd &variable_lower_bounds, const VectorXd &variable_upper_bounds, const VectorXd &center_point, const VectorXd &norm_weights, const double target_radius, const Sharder &sharder)
double ProjectedValueOfScaledDifference(const DiagonalTrustRegionProblem &problem, const int64_t index, const double scaling_factor)
LocalizedLagrangianBounds ComputeLocalizedLagrangianBounds(const ShardedQuadraticProgram &sharded_qp, const VectorXd &primal_solution, const VectorXd &dual_solution, const PrimalDualNorm primal_dual_norm, const double primal_weight, const double radius, const VectorXd *primal_product, const VectorXd *dual_product, const bool use_diagonal_qp_trust_region_solver, const double diagonal_qp_trust_region_solver_tolerance)
Coefficient ComputeObjectiveValue(const LinearBooleanProblem &problem, const std::vector< bool > &assignment)
Returns the objective value under the current assignment.
int64_t weight
Definition pack.cc:510
const Variable x
Definition qp_tests.cc:127
std::vector< double > lower_bounds
Definition lp_utils.cc:746
std::vector< double > upper_bounds
Definition lp_utils.cc:747
Eigen::SparseMatrix< double, Eigen::ColMajor, int64_t > constraint_matrix
Eigen::VectorXd solution
The solution.
double trust_region_objective_delta
double part_of_lagrangian_value
double objective_value
The value objective_vector^T * (solution - center_point).
std::vector< std::vector< int64_t > > undecided_components_by_shard
double solution_step_size
The step_size of the solution.
double radius_coefficient_of_decided_components