Google OR-Tools v9.12
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
fp_utils.h
Go to the documentation of this file.
1// Copyright 2010-2025 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14// Utility functions on IEEE floating-point numbers.
15// Implemented on float, double, and long double.
16//
17// Also a placeholder for tools controlling and checking FPU rounding modes.
18//
19// IMPORTANT NOTICE: you need to compile your binary with -frounding-math if
20// you want to use rounding modes.
21
22#ifndef OR_TOOLS_UTIL_FP_UTILS_H_
23#define OR_TOOLS_UTIL_FP_UTILS_H_
24
25#include <algorithm>
26#include <cmath>
27#include <cstdint>
28#include <cstdlib>
29#include <limits>
30// Needed before fenv_access. See https://github.com/microsoft/STL/issues/2613.
31#include <numeric> // IWYU pragma:keep.
32#include <vector>
33
34#include "absl/log/check.h"
35#include "absl/types/span.h"
36
37#if defined(_MSC_VER)
38#pragma fenv_access(on) // NOLINT
39#else
40#include <cfenv> // NOLINT
41#endif
42
43#ifdef __SSE__
44#include <xmmintrin.h>
45#endif
46
47#if defined(_MSC_VER)
48static inline double isnan(double value) { return _isnan(value); }
49static inline double round(double value) { return floor(value + 0.5); }
50#elif defined(__APPLE__) || __GNUC__ >= 5
51using std::isnan;
52#endif
53
54namespace operations_research {
55
56// ScopedFloatingPointEnv is used to easily enable Floating-point exceptions.
57// The initial state is automatically restored when the object is deleted.
58//
59// Note(user): For some reason, this causes an FPE exception to be triggered for
60// unknown reasons when compiled in 32 bits. Because of this, we do not turn
61// on FPE exception if __x86_64__ is not defined.
62//
63// TODO(user): Make it work on 32 bits.
64// TODO(user): Make it work on msvc, currently calls to _controlfp crash.
65
67 public:
69#if defined(_MSC_VER)
70 // saved_control_ = _controlfp(0, 0);
71#elif (defined(__GNUC__) || defined(__llvm__)) && defined(__x86_64__)
72 CHECK_EQ(0, fegetenv(&saved_fenv_));
73#endif
74 }
75
77#if defined(_MSC_VER)
78 // CHECK_EQ(saved_control_, _controlfp(saved_control_, 0xFFFFFFFF));
79#elif defined(__x86_64__) && defined(__GLIBC__)
80 CHECK_EQ(0, fesetenv(&saved_fenv_));
81#endif
82 }
83
84 void EnableExceptions(int excepts) {
85#if defined(_MSC_VER)
86 // _controlfp(static_cast<unsigned int>(excepts), _MCW_EM);
87#elif (defined(__GNUC__) || defined(__llvm__)) && defined(__x86_64__) && \
88 !defined(__ANDROID__)
89 CHECK_EQ(0, fegetenv(&fenv_));
90 excepts &= FE_ALL_EXCEPT;
91#if defined(__APPLE__)
92 fenv_.__control &= ~excepts;
93#elif (defined(__FreeBSD__) || defined(__OpenBSD__))
94 fenv_.__x87.__control &= ~excepts;
95#elif defined(__NetBSD__)
96 fenv_.x87.control &= ~excepts;
97#else // Linux
98 fenv_.__control_word &= ~excepts;
99#endif
100#if defined(__NetBSD__)
101 fenv_.mxcsr &= ~(excepts << 7);
102#else
103 fenv_.__mxcsr &= ~(excepts << 7);
104#endif
105 CHECK_EQ(0, fesetenv(&fenv_));
106#endif
107 }
108
109 private:
110#if defined(_MSC_VER)
111 // unsigned int saved_control_;
112#elif (defined(__GNUC__) || defined(__llvm__)) && defined(__x86_64__)
113 fenv_t fenv_;
114 mutable fenv_t saved_fenv_;
115#endif
116};
117
118template <typename FloatType>
119inline bool IsPositiveOrNegativeInfinity(FloatType x) {
120 return x == std::numeric_limits<FloatType>::infinity() ||
121 x == -std::numeric_limits<FloatType>::infinity();
122}
123
124// Tests whether x and y are close to one another using absolute and relative
125// tolerances.
126// Returns true if |x - y| <= a (with a being the absolute_tolerance).
127// The above case is useful for values that are close to zero.
128// Returns true if |x - y| <= max(|x|, |y|) * r. (with r being the relative
129// tolerance.)
130// The cases for infinities are treated separately to avoid generating NaNs.
131template <typename FloatType>
132bool AreWithinAbsoluteOrRelativeTolerances(FloatType x, FloatType y,
133 FloatType relative_tolerance,
134 FloatType absolute_tolerance) {
135 DCHECK_LE(0.0, relative_tolerance);
136 DCHECK_LE(0.0, absolute_tolerance);
137 DCHECK_GT(1.0, relative_tolerance);
139 return x == y;
140 }
141 const FloatType difference = fabs(x - y);
142 if (difference <= absolute_tolerance) {
143 return true;
144 }
145 const FloatType largest_magnitude = std::max(fabs(x), fabs(y));
146 return difference <= largest_magnitude * relative_tolerance;
147}
148
149// Tests whether x and y are close to one another using an absolute tolerance.
150// Returns true if |x - y| <= a (with a being the absolute_tolerance).
151// The cases for infinities are treated separately to avoid generating NaNs.
152template <typename FloatType>
153bool AreWithinAbsoluteTolerance(FloatType x, FloatType y,
154 FloatType absolute_tolerance) {
155 DCHECK_LE(0.0, absolute_tolerance);
157 return x == y;
158 }
159 return fabs(x - y) <= absolute_tolerance;
160}
161
162// Returns true if x is less than y or slighlty greater than y with the given
163// absolute or relative tolerance.
164template <typename FloatType>
165bool IsSmallerWithinTolerance(FloatType x, FloatType y, FloatType tolerance) {
166 if (IsPositiveOrNegativeInfinity(y)) return x <= y;
167 return x <= y + tolerance * std::max(1.0, std::min(std::abs(x), std::abs(y)));
168}
169
170// Returns true if x is within tolerance of any integer. Always returns
171// false for x equal to +/- infinity.
172template <typename FloatType>
173inline bool IsIntegerWithinTolerance(FloatType x, FloatType tolerance) {
174 DCHECK_LE(0.0, tolerance);
175 if (IsPositiveOrNegativeInfinity(x)) return false;
176 return std::abs(x - std::round(x)) <= tolerance;
177}
178
179// Handy alternatives to EXPECT_NEAR(), using relative and absolute tolerance
180// instead of relative tolerance only, and with a proper support for infinity.
181#define EXPECT_COMPARABLE(expected, obtained, epsilon) \
182 EXPECT_TRUE(operations_research::AreWithinAbsoluteOrRelativeTolerances( \
183 expected, obtained, epsilon, epsilon)) \
184 << obtained << " != expected value " << expected \
185 << " within epsilon = " << epsilon;
186
187#define EXPECT_NOTCOMPARABLE(expected, obtained, epsilon) \
188 EXPECT_FALSE(operations_research::AreWithinAbsoluteOrRelativeTolerances( \
189 expected, obtained, epsilon, epsilon)) \
190 << obtained << " == expected value " << expected \
191 << " within epsilon = " << epsilon;
192
193// Given an array of doubles, this computes a positive scaling factor such that
194// the scaled doubles can then be rounded to integers with little or no loss of
195// precision, and so that the L1 norm of these integers is <= max_sum. More
196// precisely, the following formulas will hold (x[i] is input[i], for brevity):
197// - For all i, |round(factor * x[i]) / factor - x[i]| <= error * |x[i]|
198// - The sum over i of |round(factor * x[i])| <= max_sum.
199//
200// The algorithm tries to minimize "error" (which is the relative error for one
201// coefficient). Note however than in really broken cases, the error might be
202// infinity and the factor zero.
203//
204// Note on the algorithm:
205// - It only uses factors of the form 2^n (i.e. ldexp(1.0, n)) for simplicity.
206// - The error will be zero in many practical instances. For example, if x
207// contains only integers with low magnitude; or if x contains doubles whose
208// exponents cover a small range.
209// - It chooses the factor as high as possible under the given constraints, as
210// a result the numbers produced may be large. To balance this, we recommend
211// to divide the scaled integers by their gcd() which will result in no loss
212// of precision and will help in many practical cases.
213//
214// TODO(user): incorporate the gcd computation here? The issue is that I am
215// not sure if I just do factor /= gcd that round(x * factor) will be the same.
216void GetBestScalingOfDoublesToInt64(absl::Span<const double> input,
217 int64_t max_absolute_sum,
218 double* scaling_factor,
219 double* max_relative_coeff_error);
220
221// Returns the scaling factor like above with the extra conditions:
222// - The sum over i of min(0, round(factor * x[i])) >= -max_sum.
223// - The sum over i of max(0, round(factor * x[i])) <= max_sum.
224// For any possible values of the x[i] such that x[i] is in [lb[i], ub[i]].
225double GetBestScalingOfDoublesToInt64(absl::Span<const double> input,
226 absl::Span<const double> lb,
227 absl::Span<const double> ub,
228 int64_t max_absolute_sum);
229// This computes:
230//
231// The max_relative_coeff_error, which is the maximum over all coeff of
232// |round(factor * x[i]) / (factor * x[i]) - 1|.
233//
234// The max_scaled_sum_error which is a bound on the maximum difference between
235// the exact scaled sum and the rounded one. One needs to divide this by
236// scaling_factor to have the maximum absolute error on the original sum.
237void ComputeScalingErrors(absl::Span<const double> input,
238 absl::Span<const double> lb,
239 absl::Span<const double> ub, double scaling_factor,
240 double* max_relative_coeff_error,
241 double* max_scaled_sum_error);
242
243// Returns the Greatest Common Divisor of the numbers
244// round(fabs(x[i] * scaling_factor)). The numbers 0 are ignored and if they are
245// all zero then the result is 1. Note that round(fabs()) is the same as
246// fabs(round()) since the numbers are rounded away from zero.
247int64_t ComputeGcdOfRoundedDoubles(absl::Span<const double> x,
248 double scaling_factor);
249
250// Returns alpha * x + (1 - alpha) * y.
251template <typename FloatType>
252inline FloatType Interpolate(FloatType x, FloatType y, FloatType alpha) {
253 return alpha * x + (1 - alpha) * y;
254}
255
256// This is a fast implementation of the C99 function ilogb for normalized
257// doubles with the caveat that it returns -1023 for zero, and 1024 for infinity
258// an NaNs.
259int fast_ilogb(double value);
260
261// This is a fast implementation of the C99 function scalbn, with the caveat
262// that it works on normalized numbers and if the result underflows, overflows,
263// or is applied to a NaN or an +-infinity, the result is undefined behavior.
264// Note that the version of the function that takes a reference, modifies the
265// given value.
266double fast_scalbn(double value, int exponent);
267void fast_scalbn_inplace(double& mutable_value, int exponent);
268
269} // namespace operations_research
270
271#endif // OR_TOOLS_UTIL_FP_UTILS_H_
In SWIG mode, we don't want anything besides these top-level includes.
int fast_ilogb(double value)
Definition fp_utils.cc:233
bool IsSmallerWithinTolerance(FloatType x, FloatType y, FloatType tolerance)
Definition fp_utils.h:165
bool IsIntegerWithinTolerance(FloatType x, FloatType tolerance)
Definition fp_utils.h:173
double fast_scalbn(double value, int exponent)
Definition fp_utils.cc:246
double GetBestScalingOfDoublesToInt64(absl::Span< const double > input, absl::Span< const double > lb, absl::Span< const double > ub, int64_t max_absolute_sum)
Definition fp_utils.cc:186
FloatType Interpolate(FloatType x, FloatType y, FloatType alpha)
Returns alpha * x + (1 - alpha) * y.
Definition fp_utils.h:252
bool AreWithinAbsoluteOrRelativeTolerances(FloatType x, FloatType y, FloatType relative_tolerance, FloatType absolute_tolerance)
Definition fp_utils.h:132
bool AreWithinAbsoluteTolerance(FloatType x, FloatType y, FloatType absolute_tolerance)
Definition fp_utils.h:153
void ComputeScalingErrors(absl::Span< const double > input, absl::Span< const double > lb, absl::Span< const double > ub, double scaling_factor, double *max_relative_coeff_error, double *max_scaled_sum_error)
Definition fp_utils.cc:177
int64_t ComputeGcdOfRoundedDoubles(absl::Span< const double > x, double scaling_factor)
Definition fp_utils.cc:209
bool IsPositiveOrNegativeInfinity(FloatType x)
Definition fp_utils.h:119
void fast_scalbn_inplace(double &mutable_value, int exponent)
Definition fp_utils.cc:242
static int input(yyscan_t yyscanner)