Google OR-Tools v9.11
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
fp_utils.cc
Go to the documentation of this file.
1// Copyright 2010-2024 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
15
16#include <limits.h>
17#include <stdint.h>
18
19#include <algorithm>
20#include <cmath>
21#include <cstdint>
22#include <cstdlib>
23#include <limits>
24#include <utility>
25#include <vector>
26
27#include "absl/base/casts.h"
28#include "absl/base/internal/endian.h"
29#include "absl/log/check.h"
30#include "absl/types/span.h"
31#include "ortools/util/bitset.h"
32
33namespace operations_research {
34
35namespace {
36
37void ReorderAndCapTerms(double* min, double* max) {
38 if (*min > *max) std::swap(*min, *max);
39 if (*min > 0.0) *min = 0.0;
40 if (*max < 0.0) *max = 0.0;
41}
42
43template <bool use_bounds>
44void ComputeScalingErrors(absl::Span<const double> input,
45 absl::Span<const double> lb,
46 absl::Span<const double> ub, double scaling_factor,
48 double* max_scaled_sum_error) {
49 double max_error = 0.0;
50 double min_error = 0.0;
52 const int size = input.size();
53 for (int i = 0; i < size; ++i) {
54 const double x = input[i];
55 if (x == 0.0) continue;
56 const double scaled = x * scaling_factor;
57
58 if (scaled == 0.0) {
59 *max_relative_coeff_error = std::numeric_limits<double>::infinity();
60 } else {
61 *max_relative_coeff_error = std::max(
62 *max_relative_coeff_error, std::abs(std::round(scaled) / scaled - 1));
63 }
64
65 const double error = std::round(scaled) - scaled;
66 const double error_lb = (use_bounds ? error * lb[i] : -error);
67 const double error_ub = (use_bounds ? error * ub[i] : error);
68 max_error += std::max(error_lb, error_ub);
69 min_error += std::min(error_lb, error_ub);
70 }
71 *max_scaled_sum_error = std::max(std::abs(max_error), std::abs(min_error));
72}
73
74template <bool use_bounds>
75void GetBestScalingOfDoublesToInt64(absl::Span<const double> input,
76 absl::Span<const double> lb,
77 absl::Span<const double> ub,
78 int64_t max_absolute_sum,
79 double* scaling_factor) {
80 const double kInfinity = std::numeric_limits<double>::infinity();
81
82 // We start by initializing the returns value to the "error" state.
83 *scaling_factor = 0;
84
85 // Abort in the "error" state if max_absolute_sum doesn't make sense.
86 if (max_absolute_sum < 0) return;
87
88 // Our scaling scaling_factor will be 2^factor_exponent.
89 //
90 // TODO(user): Consider using a non-power of two factor if the error can't be
91 // zero? Note however that using power of two has the extra advantage that
92 // subsequent int64_t -> double -> scaled back to int64_t will loose no extra
93 // information.
94 int factor_exponent = 0;
95 uint64_t sum_min = 0; // negated.
96 uint64_t sum_max = 0;
97 bool recompute_sum = false;
98 bool is_first_value = true;
99 const int msb = MostSignificantBitPosition64(max_absolute_sum);
100 const int size = input.size();
101 for (int i = 0; i < size; ++i) {
102 const double x = input[i];
103 double min_term = use_bounds ? x * lb[i] : -x;
104 double max_term = use_bounds ? x * ub[i] : x;
105 ReorderAndCapTerms(&min_term, &max_term);
106
107 // If min_term or max_term is not finite, then abort in the "error" state.
108 if (!(min_term > -kInfinity && max_term < kInfinity)) return;
109
110 // A value of zero can just be skipped (and needs to because the code below
111 // doesn't handle it correctly).
112 if (min_term == 0.0 && max_term == 0.0) continue;
113
114 // Compute the greatest candidate such that
115 // round(fabs(c).2^candidate) <= max_absolute_sum.
116 const double c = std::max(-min_term, max_term);
117 int candidate = msb - ilogb(c);
118 if (candidate >= std::numeric_limits<double>::max_exponent) {
119 candidate = std::numeric_limits<double>::max_exponent - 1;
120 }
121 if (std::round(ldexp(std::abs(c), candidate)) > max_absolute_sum) {
122 --candidate;
123 }
124 DCHECK_LE(std::abs(static_cast<int64_t>(round(ldexp(c, candidate)))),
125 max_absolute_sum);
126
127 // Update factor_exponent which is the min of all the candidates.
128 if (is_first_value || candidate < factor_exponent) {
129 is_first_value = false;
130 factor_exponent = candidate;
131 recompute_sum = true;
132 } else {
133 // Update the sum of absolute values of the numbers seen so far.
134 sum_min -=
135 static_cast<int64_t>(std::round(ldexp(min_term, factor_exponent)));
136 sum_max +=
137 static_cast<int64_t>(std::round(ldexp(max_term, factor_exponent)));
138 if (sum_min > static_cast<uint64_t>(max_absolute_sum) ||
139 sum_max > static_cast<uint64_t>(max_absolute_sum)) {
140 factor_exponent--;
141 recompute_sum = true;
142 }
143 }
144
145 // TODO(user): This is not super efficient, but note that in practice we
146 // will only scan the vector of values about log(size) times. It is possible
147 // to maintain an upper bound on the abs_sum in linear time instead, but the
148 // code and corner cases are a lot more involved. Also, we currently only
149 // use this code in situations where its run-time is negligeable compared to
150 // the rest.
151 while (recompute_sum) {
152 sum_min = 0;
153 sum_max = 0;
154 for (int j = 0; j <= i; ++j) {
155 const double x = input[j];
156 double min_term = use_bounds ? x * lb[j] : -x;
157 double max_term = use_bounds ? x * ub[j] : x;
158 ReorderAndCapTerms(&min_term, &max_term);
159 sum_min -=
160 static_cast<int64_t>(std::round(ldexp(min_term, factor_exponent)));
161 sum_max +=
162 static_cast<int64_t>(std::round(ldexp(max_term, factor_exponent)));
163 }
164 if (sum_min > static_cast<uint64_t>(max_absolute_sum) ||
165 sum_max > static_cast<uint64_t>(max_absolute_sum)) {
166 factor_exponent--;
167 } else {
168 recompute_sum = false;
169 }
170 }
171 }
172 *scaling_factor = ldexp(1.0, factor_exponent);
173}
174
175} // namespace
176
177void ComputeScalingErrors(absl::Span<const double> input,
178 absl::Span<const double> lb,
179 absl::Span<const double> ub, double scaling_factor,
181 double* max_scaled_sum_error) {
182 ComputeScalingErrors<true>(input, lb, ub, scaling_factor,
183 max_relative_coeff_error, max_scaled_sum_error);
184}
185
186double GetBestScalingOfDoublesToInt64(absl::Span<const double> input,
187 absl::Span<const double> lb,
188 absl::Span<const double> ub,
189 int64_t max_absolute_sum) {
190 double scaling_factor;
191 GetBestScalingOfDoublesToInt64<true>(input, lb, ub, max_absolute_sum,
192 &scaling_factor);
193 DCHECK(std::isfinite(scaling_factor));
194 return scaling_factor;
195}
196
197void GetBestScalingOfDoublesToInt64(absl::Span<const double> input,
198 int64_t max_absolute_sum,
199 double* scaling_factor,
200 double* max_relative_coeff_error) {
201 double max_scaled_sum_error;
202 GetBestScalingOfDoublesToInt64<false>(input, {}, {}, max_absolute_sum,
203 scaling_factor);
204 ComputeScalingErrors<false>(input, {}, {}, *scaling_factor,
205 max_relative_coeff_error, &max_scaled_sum_error);
206 DCHECK(std::isfinite(*scaling_factor));
207}
208
209int64_t ComputeGcdOfRoundedDoubles(absl::Span<const double> x,
210 double scaling_factor) {
211 DCHECK(std::isfinite(scaling_factor));
212 int64_t gcd = 0;
213 const int size = static_cast<int>(x.size());
214 for (int i = 0; i < size && gcd != 1; ++i) {
215 int64_t value = std::abs(std::round(x[i] * scaling_factor));
216 DCHECK_GE(value, 0);
217 if (value == 0) continue;
218 if (gcd == 0) {
219 gcd = value;
220 continue;
221 }
222 // GCD(gcd, value) = GCD(value, gcd % value);
223 while (value != 0) {
224 const int64_t r = gcd % value;
225 gcd = value;
226 value = r;
227 }
228 }
229 DCHECK_GE(gcd, 0);
230 return gcd > 0 ? gcd : 1;
231}
232
233int fast_ilogb(double value) {
234 static_assert(CHAR_BIT == 8);
235 static_assert(sizeof(double) == 8);
236 // Get little-endian bit-representation of the floating point value.
237 const uint64_t bit_rep =
238 absl::little_endian::FromHost64(absl::bit_cast<uint64_t>(value));
239 return static_cast<int>((bit_rep >> 52) & 0x7FF) - 1023;
240}
241
242void fast_scalbn_inplace(double& mutable_value, int exponent) {
243 mutable_value = fast_scalbn(mutable_value, exponent);
244}
245
246double fast_scalbn(double value, int exponent) {
247 if (value == 0.0) return 0.0;
248 uint64_t bit_rep =
249 absl::little_endian::FromHost64(absl::bit_cast<uint64_t>(value));
250 // Binary representation is: (sign-bit)(11 exponent bits)(52 mantissa bits)
251 constexpr uint64_t kExponentMask(0x7FF0000000000000);
252 // This addition relies on the fact that signed numbers are written in
253 // two-s complement, and is correct as long as the sum does not
254 // overflow/underflow the result.
255 const uint64_t value_exponent =
256 (bit_rep + (static_cast<uint64_t>(exponent) << 52)) & kExponentMask;
257 bit_rep &= ~kExponentMask;
258 bit_rep |= value_exponent;
259 return absl::bit_cast<double>(absl::little_endian::ToHost64(bit_rep));
260}
261
262} // namespace operations_research
IntegerValue size
int64_t max
int64_t min
int64_t value
In SWIG mode, we don't want anything besides these top-level includes.
int fast_ilogb(double value)
Definition fp_utils.cc:233
double fast_scalbn(double value, int exponent)
Definition fp_utils.cc:246
double GetBestScalingOfDoublesToInt64(absl::Span< const double > input, absl::Span< const double > lb, absl::Span< const double > ub, int64_t max_absolute_sum)
Definition fp_utils.cc:186
void ComputeScalingErrors(absl::Span< const double > input, absl::Span< const double > lb, absl::Span< const double > ub, double scaling_factor, double *max_relative_coeff_error, double *max_scaled_sum_error)
Definition fp_utils.cc:177
int64_t ComputeGcdOfRoundedDoubles(absl::Span< const double > x, double scaling_factor)
Definition fp_utils.cc:209
void fast_scalbn_inplace(double &mutable_value, int exponent)
Definition fp_utils.cc:242
int MostSignificantBitPosition64(uint64_t n)
Definition bitset.h:235
static int input(yyscan_t yyscanner)
const Variable x
Definition qp_tests.cc:127
double max_relative_coeff_error
Definition lp_utils.cc:737