Google OR-Tools v9.11
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
trust_region.h
Go to the documentation of this file.
1// Copyright 2010-2024 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14#ifndef PDLP_TRUST_REGION_H_
15#define PDLP_TRUST_REGION_H_
16
17#include <algorithm>
18#include <cmath>
19#include <cstdint>
20#include <limits>
21#include <vector>
22
23#include "Eigen/Core"
24#include "absl/algorithm/container.h"
25#include "absl/log/check.h"
29
31
33 // The step_size of the solution.
35 // The value objective_vector^T * (solution - center_point)
36 // when using the linear-time solver for LPs and QPs with objective matrix not
37 // treated in the prox term. When using the approximate solver for QPs, this
38 // field contains the value
39 // 0.5 * (solution - center_point)^T * objective_matrix * (
40 // solution - center_point)
41 // + objective_vector^T * (solution - center_point)
42 // instead.
44 // The solution.
45 Eigen::VectorXd solution;
46};
47
48// Solves the following trust-region problem with bound constraints:
49// min_x `objective_vector`^T * (x - `center_point`)
50// s.t. `variable_lower_bounds` <= x <= `variable_upper_bounds`
51// || x - `center_point` ||_W <= `target_radius`
52// where ||y||_W = sqrt(sum_i `norm_weights`[i] * y[i]^2)
53// using an exact linear-time method.
54// `sharder` should have the same size as the number of variables in the
55// problem.
56// Assumes that there is always a feasible solution, that is, that
57// `variable_lower_bounds` <= `center_point` <= `variable_upper_bounds`, and
58// that `norm_weights` > 0, for 0 <= i < `sharder.NumElements()`.
59TrustRegionResult SolveTrustRegion(const Eigen::VectorXd& objective_vector,
60 const Eigen::VectorXd& variable_lower_bounds,
61 const Eigen::VectorXd& variable_upper_bounds,
62 const Eigen::VectorXd& center_point,
63 const Eigen::VectorXd& norm_weights,
64 double target_radius,
65 const Sharder& sharder);
66
67// Solves the following trust-region problem with bound constraints:
68// min_x (1/2) * (x - `center_point`)^T * Q * (x - `center_point`)
69// + `objective_vector`^T * (x - `center_point`)
70// s.t. `variable_lower_bounds` <= x <= `variable_upper_bounds`
71// || x - `center_point` ||_W <= `target_radius`
72// where ||y||_W = sqrt(sum_i `norm_weights`[i] * y[i]^2).
73// It replaces the ball constraint || x - `center_point` ||_W <= `target_radius`
74// with the equivalent constraint 0.5 * || x - `center_point` ||_W^2 <= 0.5 *
75// `target_radius`^2 and does a binary search for a Lagrange multiplier for the
76// latter constraint that is at most (`solve_tolerance` * max(1, lambda*)) away
77// from the optimum Lagrange multiplier lambda*.
78// `sharder` should have the same size as the number of variables in the
79// problem.
80// Assumes that there is always a feasible solution, that is, that
81// `variable_lower_bounds` <= `center_point` <= `variable_upper_bounds`, and
82// that `norm_weights` > 0, for 0 <= i < `sharder.NumElements()`.
84 const Eigen::VectorXd& objective_vector,
85 const Eigen::VectorXd& objective_matrix_diagonal,
86 const Eigen::VectorXd& variable_lower_bounds,
87 const Eigen::VectorXd& variable_upper_bounds,
88 const Eigen::VectorXd& center_point, const Eigen::VectorXd& norm_weights,
89 double target_radius, const Sharder& sharder, double solve_tolerance);
90
91// Like `SolveDiagonalTrustRegion`, but extracts the problem data from a
92// `ShardedQuadraticProgram` and implicitly concatenates the primal and dual
93// parts before solving the trust-region subproblem.
95 const ShardedQuadraticProgram& sharded_qp,
96 const Eigen::VectorXd& primal_solution,
97 const Eigen::VectorXd& dual_solution,
98 const Eigen::VectorXd& primal_gradient,
99 const Eigen::VectorXd& dual_gradient, const double primal_weight,
100 double target_radius, double solve_tolerance);
101
103 // The value of the Lagrangian function L(x, y) at the given solution.
105 // A lower bound on the Lagrangian value, valid for the given radius.
107 // An upper bound on the Lagrangian value, valid for the given radius.
109 // The radius used when computing the bounds.
110 double radius;
111};
112
113inline double BoundGap(const LocalizedLagrangianBounds& bounds) {
114 return bounds.upper_bound - bounds.lower_bound;
115}
116
117// Defines a norm on a vector partitioned as (x, y) where x is the primal and y
118// is the dual. The enum values define a joint norm as a function of ||x||_P and
119// ||y||_D, whose definition depends on the context.
120enum class PrimalDualNorm {
121 // The joint norm ||(x,y)||_PD = max{||x||_P, ||y||_D}.
122 kMaxNorm,
123 // The joint norm (||(x,y)||_PD)^2 = (||x||_P)^2 + (||y||_D)^2.
125};
126
127// Recall the saddle-point formulation OPT = min_x max_y L(x, y) defined at
128// https://developers.google.com/optimization/lp/pdlp_math#saddle-point_formulation.
129// This function computes lower and upper bounds on OPT with an additional ball
130// or "trust-region" constraint on the domains of x and y.
131//
132// The bounds are derived from the solution of the following problem:
133// min_{x,y}
134// ∇_x L(`primal_solution`, `dual_solution`)^T (x - `primal_solution`)
135// - ∇_y L(`primal_solution`, `dual_solution`)^T (y - `dual_solution`)
136// subject to
137// ||(x - `primal_solution`, y - `dual_solution`)||_PD <= `radius`,
138// where x and y are constrained to their respective bounds and ||(x,y)||_PD is
139// defined by `primal_dual_norm`.
140// When `use_diagonal_qp_trust_region_solver` is true, the solver instead solves
141// the following problem:
142// min_{x,y}
143// ∇_x L(`primal_solution`, `dual_solution`)^T (x - `primal_solution`)
144// - ∇_y L(`primal_solution`, `dual_solution`)^T (y - `dual_solution`)
145// + (1 / 2) * (x - `primal_solution`)^T * `objective_matrix`
146// * (x - `primal_solution`),
147// subject to
148// ||(x - `primal_solution`, y - `dual_solution`)||_PD <= `radius`.
149// `use_diagonal_qp_trust_region_solver` == true assumes that `primal_dual_norm`
150// is the Euclidean norm and the objective matrix is diagonal.
151// See `SolveDiagonalTrustRegion()` above for the meaning of
152// `diagonal_qp_trust_region_solver_tolerance`.
153//
154// In the context of primal_dual_norm, the primal norm ||.||_P is defined as
155// (||x||_P)^2 = (1 / 2) * `primal_weight` * ||x||_2^2, and the dual norm
156// ||.||_D is defined as
157// (||y||_D)^2 = (1 / 2) * (1 / `primal_weight`) * ||y||_2^2.
158//
159// Given an optimal solution (x, y) to the above problem, the lower bound is
160// computed as L(`primal_solution`, `dual_solution`) +
161// ∇_x L(`primal_solution`, `dual_solution`)^T (x - `primal_solution`)
162// and the upper bound is computed as L(`primal_solution`, `dual_solution`) +
163// ∇_y L(`primal_solution`, `dual_solution`)^T (y - `dual_solution`).
164//
165// The bounds are "localized" because they are guaranteed to bound OPT only if
166// the ||.||_PD ball contains an optimal solution.
167// `primal_product` and `dual_product` optionally specify the values of
168// `constraint_matrix` * `primal_solution` and `constraint_matrix.transpose()` *
169// `dual_solution`, respectively. If set to nullptr, they will be computed.
171 const ShardedQuadraticProgram& sharded_qp,
172 const Eigen::VectorXd& primal_solution,
173 const Eigen::VectorXd& dual_solution, PrimalDualNorm primal_dual_norm,
174 double primal_weight, double radius, const Eigen::VectorXd* primal_product,
175 const Eigen::VectorXd* dual_product,
176 bool use_diagonal_qp_trust_region_solver,
177 double diagonal_qp_trust_region_solver_tolerance);
178
179namespace internal {
180
181// These functions templated on `TrustRegionProblem` compute values useful to
182// the trust region solve. The templated `TrustRegionProblem` type should
183// provide methods:
184// `double Objective(int64_t index) const;`
185// `double LowerBound(int64_t index) const;`
186// `double UpperBound(int64_t index) const;`
187// `double CenterPoint(int64_t index) const;`
188// `double NormWeight(int64_t index) const;`
189// See trust_region.cc for more details and several implementations.
190
191// The distance (in the indexed element) from the center point to the bound, in
192// the direction that reduces the objective.
193template <typename TrustRegionProblem>
194double DistanceAtCriticalStepSize(const TrustRegionProblem& problem,
195 const int64_t index) {
196 if (problem.Objective(index) == 0.0) {
197 return 0.0;
198 }
199 if (problem.Objective(index) > 0.0) {
200 return problem.LowerBound(index) - problem.CenterPoint(index);
201 } else {
202 return problem.UpperBound(index) - problem.CenterPoint(index);
203 }
204}
205
206// The critical step size is the step size at which the indexed element hits its
207// bound (or infinity if that doesn't happen).
208template <typename TrustRegionProblem>
209double CriticalStepSize(const TrustRegionProblem& problem,
210 const int64_t index) {
211 if (problem.Objective(index) == 0.0) {
212 return std::numeric_limits<double>::infinity();
213 }
214 return -problem.NormWeight(index) *
215 DistanceAtCriticalStepSize(problem, index) / problem.Objective(index);
216}
217
218// The value of the indexed element at the given step_size, projected onto the
219// bounds.
220template <typename TrustRegionProblem>
221double ProjectedValue(const TrustRegionProblem& problem, const int64_t index,
222 const double step_size) {
223 const double full_step =
224 problem.CenterPoint(index) -
225 step_size * problem.Objective(index) / problem.NormWeight(index);
226 return std::min(std::max(full_step, problem.LowerBound(index)),
227 problem.UpperBound(index));
228}
229
230// An easy way of computing medians that's slightly off when the length of the
231// array is even. `array` is intentionally passed by value.
232// `value_function` maps an element of `array` to its (double) value. Returns
233// the value of the median element.
234template <typename ArrayType, typename ValueFunction>
235double EasyMedian(ArrayType array, ValueFunction value_function) {
236 CHECK_GT(array.size(), 0);
237 auto middle = array.begin() + (array.size() / 2);
238 absl::c_nth_element(array, middle,
239 [&](typename ArrayType::value_type lhs,
240 typename ArrayType::value_type rhs) {
241 return value_function(lhs) < value_function(rhs);
242 });
243 return value_function(*middle);
244}
245
246// Lists the undecided components (from [`start_index`, `end_index`) as those
247// with finite critical step sizes. The components with infinite critical step
248// sizes will never hit their bounds, so returns their contribution to square of
249// the radius.
250template <typename TrustRegionProblem>
252 const TrustRegionProblem& problem, int64_t start_index, int64_t end_index,
253 std::vector<int64_t>& undecided_components) {
254 // TODO(user): Evaluate dropping this `reserve()`, since it wastes space
255 // if many components are decided.
256 undecided_components.clear();
257 undecided_components.reserve(end_index - start_index);
258 double radius_coefficient = 0.0;
259 for (int64_t index = start_index; index < end_index; ++index) {
260 if (std::isfinite(internal::CriticalStepSize(problem, index))) {
261 undecided_components.push_back(index);
262 } else {
263 // Simplified from norm_weight * (objective / norm_weight)^2.
264 radius_coefficient += MathUtil::Square(problem.Objective(index)) /
265 problem.NormWeight(index);
266 }
267 }
268 return radius_coefficient;
269}
270
271template <typename TrustRegionProblem>
273 const TrustRegionProblem& problem, const double step_size,
274 const std::vector<int64_t>& undecided_components) {
275 return absl::c_accumulate(
276 undecided_components, 0.0, [&](double sum, int64_t index) {
277 const double distance_at_projected_value =
278 internal::ProjectedValue(problem, index, step_size) -
279 problem.CenterPoint(index);
280 return sum + problem.NormWeight(index) *
281 MathUtil::Square(distance_at_projected_value);
282 });
283}
284
285// Points whose critical step sizes are greater than or equal to
286// `step_size_threshold` are eliminated from the undecided components (we know
287// they'll be determined by center_point - step_size * objective /
288// norm_weights). Returns the coefficient of step_size^2 that accounts of the
289// contribution of the removed variables to the radius squared.
290template <typename TrustRegionProblem>
292 const TrustRegionProblem& problem, const double step_size_threshold,
293 std::vector<int64_t>& undecided_components) {
294 double variable_radius_coefficient = 0.0;
295 for (const int64_t index : undecided_components) {
296 if (internal::CriticalStepSize(problem, index) >= step_size_threshold) {
297 // Simplified from norm_weight * (objective / norm_weight)^2.
298 variable_radius_coefficient +=
299 MathUtil::Square(problem.Objective(index)) /
300 problem.NormWeight(index);
301 }
302 }
303 auto result =
304 std::remove_if(undecided_components.begin(), undecided_components.end(),
305 [&](const int64_t index) {
306 return internal::CriticalStepSize(problem, index) >=
307 step_size_threshold;
308 });
309 undecided_components.erase(result, undecided_components.end());
310 return variable_radius_coefficient;
311}
312
313// Points whose critical step sizes are smaller than or equal to
314// `step_size_threshold` are eliminated from the undecided components (we know
315// they'll always be at their bounds). Returns the weighted distance squared
316// from the center point for the removed components.
317template <typename TrustRegionProblem>
319 const TrustRegionProblem& problem, const double step_size_threshold,
320 std::vector<int64_t>& undecided_components) {
321 double radius_sq = 0.0;
322 for (const int64_t index : undecided_components) {
323 if (internal::CriticalStepSize(problem, index) <= step_size_threshold) {
324 radius_sq += problem.NormWeight(index) *
327 }
328 }
329 auto result =
330 std::remove_if(undecided_components.begin(), undecided_components.end(),
331 [&](const int64_t index) {
332 return internal::CriticalStepSize(problem, index) <=
333 step_size_threshold;
334 });
335 undecided_components.erase(result, undecided_components.end());
336 return radius_sq;
337}
338
339// `PrimalTrustRegionProblem` defines the primal trust region problem given a
340// `QuadraticProgram`, `primal_solution`, and `primal_gradient`. It captures
341// const references to the constructor arguments, which should outlive the class
342// instance.
343// The corresponding trust region problem is
344// min_x `primal_gradient`^T * (x - `primal_solution`)
345// s.t. `qp.variable_lower_bounds` <= x <= `qp.variable_upper_bounds`
346// || x - `primal_solution` ||_2 <= target_radius
348 public:
350 const Eigen::VectorXd* primal_solution,
351 const Eigen::VectorXd* primal_gradient,
352 const double norm_weight = 1.0)
353 : qp_(*qp),
354 primal_solution_(*primal_solution),
355 primal_gradient_(*primal_gradient),
356 norm_weight_(norm_weight) {}
357 double Objective(int64_t index) const { return primal_gradient_[index]; }
358 double LowerBound(int64_t index) const {
359 return qp_.variable_lower_bounds[index];
360 }
361 double UpperBound(int64_t index) const {
362 return qp_.variable_upper_bounds[index];
363 }
364 double CenterPoint(int64_t index) const { return primal_solution_[index]; }
365 double NormWeight(int64_t index) const { return norm_weight_; }
366
367 private:
368 const QuadraticProgram& qp_;
369 const Eigen::VectorXd& primal_solution_;
370 const Eigen::VectorXd& primal_gradient_;
371 const double norm_weight_;
372};
373
374// `DualTrustRegionProblem` defines the dual trust region problem given a
375// `QuadraticProgram`, `dual_solution`, and `dual_gradient`. It captures const
376// references to the constructor arguments, which should outlive the class
377// instance.
378// The corresponding trust region problem is
379// max_y `dual_gradient`^T * (y - `dual_solution`)
380// s.t. `qp.implicit_dual_lower_bounds` <= y <= `qp.implicit_dual_upper_bounds`
381// || y - `dual_solution` ||_2 <= target_radius
382// where the implicit dual bounds are those given in
383// https://developers.google.com/optimization/lp/pdlp_math#dual_variable_bounds
385 public:
387 const Eigen::VectorXd* dual_solution,
388 const Eigen::VectorXd* dual_gradient,
389 const double norm_weight = 1.0)
390 : qp_(*qp),
391 dual_solution_(*dual_solution),
392 dual_gradient_(*dual_gradient),
393 norm_weight_(norm_weight) {}
394 double Objective(int64_t index) const {
395 // The objective is negated because the trust region problem objective is
396 // minimize, but for the dual problem we want to maximize the gradient.
397 return -dual_gradient_[index];
398 }
399 double LowerBound(int64_t index) const {
400 return std::isfinite(qp_.constraint_upper_bounds[index])
401 ? -std::numeric_limits<double>::infinity()
402 : 0.0;
403 }
404 double UpperBound(int64_t index) const {
405 return std::isfinite(qp_.constraint_lower_bounds[index])
406 ? std::numeric_limits<double>::infinity()
407 : 0.0;
408 }
409 double CenterPoint(int64_t index) const { return dual_solution_[index]; }
410 double NormWeight(int64_t index) const { return norm_weight_; }
411
412 private:
413 const QuadraticProgram& qp_;
414 const Eigen::VectorXd& dual_solution_;
415 const Eigen::VectorXd& dual_gradient_;
416 const double norm_weight_;
417};
418
419} // namespace internal
420
421} // namespace operations_research::pdlp
422
423#endif // PDLP_TRUST_REGION_H_
static T Square(const T x)
Returns the square of x.
Definition mathutil.h:102
DualTrustRegionProblem(const QuadraticProgram *qp, const Eigen::VectorXd *dual_solution, const Eigen::VectorXd *dual_gradient, const double norm_weight=1.0)
PrimalTrustRegionProblem(const QuadraticProgram *qp, const Eigen::VectorXd *primal_solution, const Eigen::VectorXd *primal_gradient, const double norm_weight=1.0)
int index
double ComputeInitialUndecidedComponents(const TrustRegionProblem &problem, int64_t start_index, int64_t end_index, std::vector< int64_t > &undecided_components)
double ProjectedValue(const TrustRegionProblem &problem, const int64_t index, const double step_size)
double RemoveCriticalStepsAboveThreshold(const TrustRegionProblem &problem, const double step_size_threshold, std::vector< int64_t > &undecided_components)
double DistanceAtCriticalStepSize(const TrustRegionProblem &problem, const int64_t index)
double CriticalStepSize(const TrustRegionProblem &problem, const int64_t index)
double RemoveCriticalStepsBelowThreshold(const TrustRegionProblem &problem, const double step_size_threshold, std::vector< int64_t > &undecided_components)
double EasyMedian(ArrayType array, ValueFunction value_function)
double RadiusSquaredOfUndecidedComponents(const TrustRegionProblem &problem, const double step_size, const std::vector< int64_t > &undecided_components)
Validation utilities for solvers.proto.
TrustRegionResult SolveDiagonalTrustRegion(const VectorXd &objective_vector, const VectorXd &objective_matrix_diagonal, const VectorXd &variable_lower_bounds, const VectorXd &variable_upper_bounds, const VectorXd &center_point, const VectorXd &norm_weights, const double target_radius, const Sharder &sharder, const double solve_tolerance)
TrustRegionResult SolveDiagonalQpTrustRegion(const ShardedQuadraticProgram &sharded_qp, const VectorXd &primal_solution, const VectorXd &dual_solution, const VectorXd &primal_gradient, const VectorXd &dual_gradient, const double primal_weight, double target_radius, const double solve_tolerance)
@ kMaxNorm
The joint norm ||(x,y)||_PD = max{|x|_P, |y|_D}.
@ kEuclideanNorm
The joint norm (||(x,y)||_PD)^2 = (|x|_P)^2 + (|y|_D)^2.
TrustRegionResult SolveTrustRegion(const VectorXd &objective_vector, const VectorXd &variable_lower_bounds, const VectorXd &variable_upper_bounds, const VectorXd &center_point, const VectorXd &norm_weights, const double target_radius, const Sharder &sharder)
LocalizedLagrangianBounds ComputeLocalizedLagrangianBounds(const ShardedQuadraticProgram &sharded_qp, const VectorXd &primal_solution, const VectorXd &dual_solution, const PrimalDualNorm primal_dual_norm, const double primal_weight, const double radius, const VectorXd *primal_product, const VectorXd *dual_product, const bool use_diagonal_qp_trust_region_solver, const double diagonal_qp_trust_region_solver_tolerance)
double BoundGap(const LocalizedLagrangianBounds &bounds)
double lagrangian_value
The value of the Lagrangian function L(x, y) at the given solution.
double radius
The radius used when computing the bounds.
double lower_bound
A lower bound on the Lagrangian value, valid for the given radius.
double upper_bound
An upper bound on the Lagrangian value, valid for the given radius.
Eigen::VectorXd solution
The solution.
double solution_step_size
The step_size of the solution.