Google OR-Tools v9.11
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
saturated_arithmetic.h
Go to the documentation of this file.
1// Copyright 2010-2024 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14#ifndef OR_TOOLS_UTIL_SATURATED_ARITHMETIC_H_
15#define OR_TOOLS_UTIL_SATURATED_ARITHMETIC_H_
16
17#include <cstdint>
18#include <limits>
19#include <type_traits>
20
21#include "absl/base/casts.h"
22#include "absl/log/check.h"
23#include "ortools/base/types.h"
24#include "ortools/util/bitset.h"
25
26// This file contains implementations for saturated addition, subtraction and
27// multiplication.
28// Currently, there are three versions of the code.
29// The code using the built-ins provided since GCC 5.0 now compiles really well
30// with clang/LLVM on both x86_64 and ARM. It should therefore be the standard,
31// but not for multiplication, see below.
32//
33// For example, on ARM, only 4 instructions are needed, two of which are
34// additions that can be executed in parallel.
35//
36// On x86_64, we're keeping the code with inline assembly for GCC as GCC does
37// manage to compile the code with built-ins properly.
38// On x86_64, the product of two 64-bit registers is a 128-bit integer
39// stored in two 64-bit registers. It's the carry flag that is set when the
40// result exceeds 64 bits, not the overflow flag. Since the built-in uses the
41// overflow flag, we have to resort on the assembly-based version of the code.
42//
43// Sadly, MSVC does not support the built-ins nor does it support inline
44// assembly. We have to rely on the generic, C++ only version of the code which
45// is much slower.
46//
47// TODO(user): make this implementation the default everywhere.
48// TODO(user): investigate the code generated by MSVC.
49
50namespace operations_research {
51
52// Checks if x is equal to the min or the max value of an int64_t.
53inline bool AtMinOrMaxInt64(int64_t x) {
54 return x == std::numeric_limits<int64_t>::min() ||
55 x == std::numeric_limits<int64_t>::max();
56}
57
58// Note(user): -kint64min != kint64max, but kint64max == ~kint64min.
59inline int64_t CapOpp(int64_t v) { return v == kint64min ? ~v : -v; }
60
61inline int64_t CapAbs(int64_t v) {
62 return v == kint64min ? std::numeric_limits<int64_t>::max()
63 : (v < 0 ? -v : v);
64}
65
66// ---------- Overflow utility functions ----------
67
68// Implement two's complement addition and subtraction on int64s.
69//
70// The C and C++ standards specify that the overflow of signed integers is
71// undefined. This is because of the different possible representations that may
72// be used for signed integers (one's complement, two's complement, sign and
73// magnitude). Such overflows are detected by Address Sanitizer with
74// -fsanitize=signed-integer-overflow.
75//
76// Simple, portable overflow detection on current machines relies on
77// these two functions. For example, if the sign of the sum of two positive
78// integers is negative, there has been an overflow.
79//
80// Note that the static assert will break if the code is compiled on machines
81// which do not use two's complement.
82inline int64_t TwosComplementAddition(int64_t x, int64_t y) {
83 static_assert(static_cast<uint64_t>(-1LL) == ~0ULL,
84 "The target architecture does not use two's complement.");
85 return absl::bit_cast<int64_t>(static_cast<uint64_t>(x) +
86 static_cast<uint64_t>(y));
87}
88
89inline int64_t TwosComplementSubtraction(int64_t x, int64_t y) {
90 static_assert(static_cast<uint64_t>(-1LL) == ~0ULL,
91 "The target architecture does not use two's complement.");
92 return absl::bit_cast<int64_t>(static_cast<uint64_t>(x) -
93 static_cast<uint64_t>(y));
94}
95
96// Helper function that returns true if an overflow has occurred in computing
97// sum = x + y. sum is expected to be computed elsewhere.
98inline bool AddHadOverflow(int64_t x, int64_t y, int64_t sum) {
99 // Overflow cannot occur if operands have different signs.
100 // It can only occur if sign(x) == sign(y) and sign(sum) != sign(x),
101 // which is equivalent to: sign(x) != sign(sum) && sign(y) != sign(sum).
102 // This is captured when the expression below is negative.
103 DCHECK_EQ(sum, TwosComplementAddition(x, y));
104 return ((x ^ sum) & (y ^ sum)) < 0;
105}
106
107inline bool SubHadOverflow(int64_t x, int64_t y, int64_t diff) {
108 // This is the same reasoning as for AddHadOverflow. We have x = diff + y.
109 // The formula is the same, with 'x' and diff exchanged.
110 DCHECK_EQ(diff, TwosComplementSubtraction(x, y));
111 return AddHadOverflow(diff, y, x);
112}
113
114// A note on overflow treatment.
115// kint64min and kint64max are treated as infinity.
116// Thus if the computation overflows, the result is always kint64m(ax/in).
117//
118// Note(user): this is actually wrong: when computing A-B, if A is kint64max
119// and B is finite, then A-B won't be kint64max: overflows aren't sticky.
120// TODO(user): consider making some operations overflow-sticky, some others
121// not, but make an explicit choice throughout.
122inline bool AddOverflows(int64_t x, int64_t y) {
124}
125
126inline int64_t SubOverflows(int64_t x, int64_t y) {
128}
129
130// Performs *b += a and returns false iff the addition overflow or underflow.
131// This function only works for typed integer type (IntType<>).
132template <typename IntegerType>
133bool SafeAddInto(IntegerType a, IntegerType* b) {
134 const int64_t x = a.value();
135 const int64_t y = b->value();
136 const int64_t sum = TwosComplementAddition(x, y);
137 if (AddHadOverflow(x, y, sum)) return false;
138 *b = sum;
139 return true;
140}
141
142// Returns kint64max if x >= 0 and kint64min if x < 0.
143inline int64_t CapWithSignOf(int64_t x) {
144 // return kint64max if x >= 0 or kint64max + 1 (== kint64min) if x < 0.
145 return TwosComplementAddition(kint64max, static_cast<int64_t>(x < 0));
146}
147
148// The following implementations are here for GCC because it does not
149// compiled the built-ins correctly. The code is either too long without
150// branches or contains jumps. These implementations are probably optimal
151// on x86_64.
152#if defined(__GNUC__) && !defined(__clang_) && defined(__x86_64__)
153inline int64_t CapAddAsm(int64_t x, int64_t y) {
154 const int64_t cap = CapWithSignOf(x);
155 int64_t result = x;
156 // clang-format off
157 asm volatile( // 'volatile': ask compiler optimizer "keep as is".
158 "\t" "addq %[y],%[result]"
159 "\n\t" "cmovoq %[cap],%[result]" // Conditional move if overflow.
160 : [result] "=r"(result) // Output
161 : "[result]" (result), [y] "r"(y), [cap] "r"(cap) // Input.
162 : "cc" /* Clobbered registers */ );
163 // clang-format on
164 return result;
165}
166
167inline int64_t CapSubAsm(int64_t x, int64_t y) {
168 const int64_t cap = CapWithSignOf(x);
169 int64_t result = x;
170 // clang-format off
171 asm volatile( // 'volatile': ask compiler optimizer "keep as is".
172 "\t" "subq %[y],%[result]"
173 "\n\t" "cmovoq %[cap],%[result]" // Conditional move if overflow.
174 : [result] "=r"(result) // Output
175 : "[result]" (result), [y] "r"(y), [cap] "r"(cap) // Input.
176 : "cc" /* Clobbered registers */ );
177 // clang-format on
178 return result;
179}
180
181// Note that on x86_64, we have to use this code because it's the carry flag
182// that is set when the product of two 64-bit integers does not fit in 64-bit.
183inline int64_t CapProdAsm(int64_t x, int64_t y) {
184 // cap = kint64max if x and y have the same sign, cap = kint64min
185 // otherwise.
186 const int64_t cap = CapWithSignOf(x ^ y);
187 int64_t result = x;
188 // Here, we use the fact that imul of two signed 64-integers returns a 128-bit
189 // result -- we care about the lower 64 bits. More importantly, imul also sets
190 // the carry flag if 64 bits were not enough.
191 // We therefore use cmovc to return cap if the carry was set.
192 // clang-format off
193 asm volatile( // 'volatile': ask compiler optimizer "keep as is".
194 "\n\t" "imulq %[y],%[result]"
195 "\n\t" "cmovcq %[cap],%[result]" // Conditional move if carry.
196 : [result] "=r"(result) // Output
197 : "[result]" (result), [y] "r"(y), [cap] "r"(cap) // Input.
198 : "cc" /* Clobbered registers */);
199 // clang-format on
200 return result;
201}
202#endif
203
204// Simple implementations which use the built-ins provided by both GCC and
205// clang. clang compiles to very good code for both x86_64 and ARM. This is the
206// preferred implementation in general.
207#if defined(__clang__)
208inline int64_t CapAddBuiltIn(int64_t x, int64_t y) {
209 const int64_t cap = CapWithSignOf(x);
210 int64_t result;
211 const bool overflowed = __builtin_add_overflow(x, y, &result);
212 return overflowed ? cap : result;
213}
214
215inline int64_t CapSubBuiltIn(int64_t x, int64_t y) {
216 const int64_t cap = CapWithSignOf(x);
217 int64_t result;
218 const bool overflowed = __builtin_sub_overflow(x, y, &result);
219 return overflowed ? cap : result;
220}
221
222// As said above, this is useless on x86_64.
223inline int64_t CapProdBuiltIn(int64_t x, int64_t y) {
224 const int64_t cap = CapWithSignOf(x ^ y);
225 int64_t result;
226 const bool overflowed = __builtin_mul_overflow(x, y, &result);
227 return overflowed ? cap : result;
228}
229#endif
230
231// Generic implementations. They are very good for addition and subtraction,
232// less so for multiplication.
233inline int64_t CapAddGeneric(int64_t x, int64_t y) {
234 const int64_t result = TwosComplementAddition(x, y);
235 return AddHadOverflow(x, y, result) ? CapWithSignOf(x) : result;
236}
237
238inline int64_t CapSubGeneric(int64_t x, int64_t y) {
239 const int64_t result = TwosComplementSubtraction(x, y);
240 return SubHadOverflow(x, y, result) ? CapWithSignOf(x) : result;
241}
242
243namespace cap_prod_util {
244// Returns an unsigned int equal to the absolute value of n, in a way that
245// will not produce overflows.
246inline uint64_t uint_abs(int64_t n) {
247 return n < 0 ? ~static_cast<uint64_t>(n) + 1 : static_cast<uint64_t>(n);
248}
249} // namespace cap_prod_util
250
251// The generic algorithm computes a bound on the number of bits necessary to
252// store the result. For this it uses the position of the most significant bits
253// of each of the arguments.
254// If the result needs at least 64 bits, then return a capped value.
255// If the result needs at most 63 bits, then return the product.
256// Otherwise, the result may use 63 or 64 bits: compute the product
257// as a uint64_t, and cap it if necessary.
258inline int64_t CapProdGeneric(int64_t x, int64_t y) {
259 const uint64_t a = cap_prod_util::uint_abs(x);
260 const uint64_t b = cap_prod_util::uint_abs(y);
261 // Let MSB(x) denote the most significant bit of x. We have:
262 // MSB(x) + MSB(y) <= MSB(x * y) <= MSB(x) + MSB(y) + 1
263 const int msb_sum =
265 const int kMaxBitIndexInInt64 = 63;
266 if (msb_sum <= kMaxBitIndexInInt64 - 2) return x * y;
267 // Catch a == 0 or b == 0 now, as MostSignificantBitPosition64(0) == 0.
268 // TODO(user): avoid this by writing function Log2(a) with Log2(0) == -1.
269 if (a == 0 || b == 0) return 0;
270 const int64_t cap = CapWithSignOf(x ^ y);
271 if (msb_sum >= kMaxBitIndexInInt64) return cap;
272 // The corner case is when msb_sum == 62, i.e. at least 63 bits will be
273 // needed to store the product. The following product will never overflow
274 // on uint64_t, since msb_sum == 62.
275 const uint64_t u_prod = a * b;
276 // The overflow cases are captured by one of the following conditions:
277 // (cap >= 0 && u_prod >= static_cast<uint64_t>(kint64max) or
278 // (cap < 0 && u_prod >= static_cast<uint64_t>(kint64min)).
279 // These can be optimized as follows (and if the condition is false, it is
280 // safe to compute x * y.
281 if (u_prod >= static_cast<uint64_t>(cap)) return cap;
282 const int64_t abs_result = absl::bit_cast<int64_t>(u_prod);
283 return cap < 0 ? -abs_result : abs_result;
284}
285
286// A generic, safer version of CapAdd() where we don't silently convert double
287// or int32_t to int64_t. When the inputs are floating-point, uses '+', else
288// uses CapAdd() for int64_t, and a (slow-ish) int32_t version for int.
289template <typename T>
290T CapOrFloatAdd(T x, T y);
291
292inline int64_t CapAdd(int64_t x, int64_t y) {
293#if defined(__GNUC__) && !defined(__clang__) && defined(__x86_64__)
294 return CapAddAsm(x, y);
295#elif defined(__clang__)
296 return CapAddBuiltIn(x, y);
297#else
298 return CapAddGeneric(x, y);
299#endif
300}
301
302inline void CapAddTo(int64_t x, int64_t* y) { *y = CapAdd(*y, x); }
303
304inline int64_t CapSub(int64_t x, int64_t y) {
305#if defined(__GNUC__) && !defined(__clang__) && defined(__x86_64__)
306 return CapSubAsm(x, y);
307#elif defined(__clang__)
308 return CapSubBuiltIn(x, y);
309#else
310 return CapSubGeneric(x, y);
311#endif
312}
313
314inline int64_t CapProd(int64_t x, int64_t y) {
315#if defined(__GNUC__) && defined(__x86_64__)
316 // On x86_64, the product of two 64-bit registeres is a 128-bit integer,
317 // stored in two 64-bit registers. It's the carry flag that is set when the
318 // result exceeds 64 bits, not the overflow flag. We therefore have to resort
319 // to the assembly-based version of the code.
320 return CapProdAsm(x, y);
321#elif defined(__clang__)
322 return CapProdBuiltIn(x, y);
323#else
324 return CapProdGeneric(x, y);
325#endif
326}
327
328template <typename T>
330 static_assert(std::is_floating_point_v<T> || std::is_integral_v<T>);
331 if constexpr (std::is_integral_v<T>) {
332 if constexpr (std::is_same_v<T, int64_t>) {
333 return CapAdd(x, y);
334 } else {
335 static_assert(
336 std::is_same_v<T, int32_t>,
337 "CapOrFloatAdd() on integers only works for int and int64_t");
338 const int64_t sum = int64_t{x} + y;
339 return sum > kint32max ? kint32max : sum < kint32min ? kint32min : sum;
340 }
341 } else {
342 return x + y;
343 }
344}
345
346} // namespace operations_research
347
348#endif // OR_TOOLS_UTIL_SATURATED_ARITHMETIC_H_
IntegerValue y
int64_t b
Definition table.cc:45
int64_t a
Definition table.cc:44
In SWIG mode, we don't want anything besides these top-level includes.
int64_t SubOverflows(int64_t x, int64_t y)
bool AtMinOrMaxInt64(int64_t x)
Checks if x is equal to the min or the max value of an int64_t.
bool AddHadOverflow(int64_t x, int64_t y, int64_t sum)
int64_t CapAdd(int64_t x, int64_t y)
void CapAddTo(int64_t x, int64_t *y)
int64_t CapWithSignOf(int64_t x)
Returns kint64max if x >= 0 and kint64min if x < 0.
int64_t TwosComplementAddition(int64_t x, int64_t y)
-------— Overflow utility functions -------—
int64_t CapSub(int64_t x, int64_t y)
int64_t CapAddGeneric(int64_t x, int64_t y)
bool AddOverflows(int64_t x, int64_t y)
int64_t CapProd(int64_t x, int64_t y)
bool SubHadOverflow(int64_t x, int64_t y, int64_t diff)
int64_t TwosComplementSubtraction(int64_t x, int64_t y)
int64_t CapAbs(int64_t v)
int64_t CapProdGeneric(int64_t x, int64_t y)
bool SafeAddInto(IntegerType a, IntegerType *b)
int64_t CapSubGeneric(int64_t x, int64_t y)
int64_t CapOpp(int64_t v)
Note(user): -kint64min != kint64max, but kint64max == ~kint64min.
int MostSignificantBitPosition64(uint64_t n)
Definition bitset.h:235
const Variable x
Definition qp_tests.cc:127
static const int32_t kint32min
Definition types.h:29
static const int64_t kint64max
Definition types.h:32
static const int32_t kint32max
Definition types.h:30
static const int64_t kint64min
Definition types.h:31