Google OR-Tools v9.12
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
top_n.h
Go to the documentation of this file.
1// Copyright 2010-2025 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14// ref:
15// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/lib/gtl/top_n.h
16// This simple class finds the top n elements of an incrementally provided set
17// of elements which you push one at a time. If the number of elements exceeds
18// n, the lowest elements are incrementally dropped. At the end you get
19// a vector of the top elements sorted in descending order (through Extract() or
20// ExtractNondestructive()), or a vector of the top elements but not sorted
21// (through ExtractUnsorted() or ExtractUnsortedNondestructive()).
22//
23// The value n is specified in the constructor. If there are p elements pushed
24// altogether:
25// The total storage requirements are O(min(n, p)) elements
26// The running time is O(p * log(min(n, p))) comparisons
27// If n is a constant, the total storage required is a constant and the running
28// time is linear in p.
29
30#ifndef OR_TOOLS_BASE_TOP_N_H_
31#define OR_TOOLS_BASE_TOP_N_H_
32
33#include <stddef.h>
34
35#include <algorithm>
36#include <functional>
37#include <string>
38#include <vector>
39
41
42namespace operations_research {
43namespace gtl {
44// Cmp is an stl binary predicate. Note that Cmp is the "greater" predicate,
45// not the more commonly used "less" predicate.
46//
47// If you use a "less" predicate here, the TopN will pick out the bottom N
48// elements out of the ones passed to it, and it will return them sorted in
49// ascending order.
50//
51// TopN is rule-of-zero copyable and movable if its members are.
52template <class T, class Cmp = std::greater<T> >
53class TopN {
54 public:
55 // The TopN is in one of the three states:
56 //
57 // o UNORDERED: this is the state an instance is originally in,
58 // where the elements are completely orderless.
59 //
60 // o BOTTOM_KNOWN: in this state, we keep the invariant that there
61 // is at least one element in it, and the lowest element is at
62 // position 0. The elements in other positions remain
63 // unsorted. This state is reached if the state was originally
64 // UNORDERED and a peek_bottom() function call is invoked.
65 //
66 // o HEAP_SORTED: in this state, the array is kept as a heap and
67 // there are exactly (limit_+1) elements in the array. This
68 // state is reached when at least (limit_+1) elements are
69 // pushed in.
70 //
71 // The state transition graph is at follows:
72 //
73 // peek_bottom() (limit_+1) elements
74 // UNORDERED --------------> BOTTOM_KNOWN --------------------> HEAP_SORTED
75 // | ^
76 // | (limit_+1) elements |
77 // +-----------------------------------------------------------+
78 enum State { UNORDERED, BOTTOM_KNOWN, HEAP_SORTED };
79 using UnsortedIterator = typename std::vector<T>::const_iterator;
80 // 'limit' is the maximum number of top results to return.
81 explicit TopN(size_t limit) : TopN(limit, Cmp()) {}
82 TopN(size_t limit, const Cmp& cmp) : limit_(limit), cmp_(cmp) {}
83 size_t limit() const { return limit_; }
84 // Number of elements currently held by this TopN object. This
85 // will be no greater than 'limit' passed to the constructor.
86 size_t size() const { return std::min(elements_.size(), limit_); }
87 bool empty() const { return size() == 0; }
88 // If you know how many elements you will push at the time you create the
89 // TopN object, you can call reserve to preallocate the memory that TopN
90 // will need to process all 'n' pushes. Calling this method is optional.
91 void reserve(size_t n) { elements_.reserve(std::min(n, limit_ + 1)); }
92 // Push 'v'. If the maximum number of elements was exceeded, drop the
93 // lowest element and return it in 'dropped' (if given). If the maximum is not
94 // exceeded, 'dropped' will remain unchanged. 'dropped' may be omitted or
95 // nullptr, in which case it is not filled in.
96 // Requires: T is CopyAssignable, Swappable
97 void push(const T& v) { push(v, nullptr); }
98 void push(const T& v, T* dropped) { PushInternal(v, dropped); }
99 // Move overloads of push.
100 // Requires: T is MoveAssignable, Swappable
101 void push(T&& v) { // NOLINT(build/c++11)
102 push(std::move(v), nullptr);
103 }
104 void push(T&& v, T* dropped) { // NOLINT(build/c++11)
105 PushInternal(std::move(v), dropped);
106 }
107 // Peeks the bottom result without calling Extract()
108 const T& peek_bottom();
109 // Destructively extract the elements as a vector, sorted in descending order.
110 // Leaves TopN in an empty state.
111 std::vector<T> Take();
112 // Extract the elements as a vector sorted in descending order. The caller
113 // assumes ownership of the vector and must delete it when done. This is a
114 // destructive operation. The only method that can be called immediately
115 // after Extract() is Reset().
116 std::vector<T>* Extract();
117 // Similar to Extract(), but makes no guarantees the elements are in sorted
118 // order. As with Extract(), the caller assumes ownership of the vector and
119 // must delete it when done. This is a destructive operation. The only
120 // method that can be called immediately after ExtractUnsorted() is Reset().
121 std::vector<T>* ExtractUnsorted();
122 // A non-destructive version of Extract(). Copy the elements in a new vector
123 // sorted in descending order and return it. The caller assumes ownership of
124 // the new vector and must delete it when done. After calling
125 // ExtractNondestructive(), the caller can continue to push() new elements.
126 std::vector<T>* ExtractNondestructive() const;
127 // A non-destructive version of Extract(). Copy the elements to a given
128 // vector sorted in descending order. After calling
129 // ExtractNondestructive(), the caller can continue to push() new elements.
130 // Note:
131 // 1. The given argument must to be allocated.
132 // 2. Any data contained in the vector prior to the call will be deleted
133 // from it. After the call the vector will contain only the elements
134 // from the data structure.
135 void ExtractNondestructive(std::vector<T>* output) const;
136 // A non-destructive version of ExtractUnsorted(). Copy the elements in a new
137 // vector and return it, with no guarantees the elements are in sorted order.
138 // The caller assumes ownership of the new vector and must delete it when
139 // done. After calling ExtractUnsortedNondestructive(), the caller can
140 // continue to push() new elements.
141 std::vector<T>* ExtractUnsortedNondestructive() const;
142 // A non-destructive version of ExtractUnsorted(). Copy the elements into
143 // a given vector, with no guarantees the elements are in sorted order.
144 // After calling ExtractUnsortedNondestructive(), the caller can continue
145 // to push() new elements.
146 // Note:
147 // 1. The given argument must to be allocated.
148 // 2. Any data contained in the vector prior to the call will be deleted
149 // from it. After the call the vector will contain only the elements
150 // from the data structure.
151 void ExtractUnsortedNondestructive(std::vector<T>* output) const;
152 // Return an iterator to the beginning (end) of the container,
153 // with no guarantees about the order of iteration. These iterators are
154 // invalidated by mutation of the data structure.
155 UnsortedIterator unsorted_begin() const { return elements_.begin(); }
156 UnsortedIterator unsorted_end() const { return elements_.begin() + size(); }
157 // Accessor for comparator template argument.
158 Cmp* comparator() { return &cmp_; }
159 // This removes all elements. If Extract() or ExtractUnsorted() have been
160 // called, this will put it back in an empty but useable state.
161 void Reset();
162
163 private:
164 template <typename U>
165 void PushInternal(
166 U&& v,
167 T* dropped); // NOLINT(build/c++11)
168 // elements_ can be in one of two states:
169 // elements_.size() <= limit_: elements_ is an unsorted
170 // vector of elements
171 // pushed so far.
172 // elements_.size() > limit_: The last element of
173 // elements_ is unused;
174 // the other elements of elements_ are an stl heap
175 // whose size is exactly limit_. In this case
176 // elements_.size() is exactly one greater than limit_,
177 // but don't use "elements_.size() == limit_ + 1" to
178 // check for that because you'll get a false positive
179 // if limit_ == size_t(-1).
180 std::vector<T> elements_;
181 size_t limit_; // Maximum number of elements to find
182 Cmp cmp_; // Greater-than comparison function
183 State state_ = UNORDERED;
184};
185// ----------------------------------------------------------------------
186// Implementations of non-inline functions
187template <class T, class Cmp>
188template <typename U>
189void TopN<T, Cmp>::PushInternal(U&& v, T* dropped) { // NOLINT(build/c++11)
190 if (limit_ == 0) {
191 if (dropped) *dropped = std::forward<U>(v); // NOLINT(build/c++11)
192 return;
193 }
194 if (state_ != HEAP_SORTED) {
195 elements_.push_back(std::forward<U>(v)); // NOLINT(build/c++11)
196 if (state_ == UNORDERED || cmp_(elements_.back(), elements_.front())) {
197 // Easy case: we just pushed the new element back
198 } else {
199 // To maintain the BOTTOM_KNOWN state, we need to make sure that
200 // the element at position 0 is always the smallest. So we put
201 // the new element at position 0 and push the original bottom
202 // element in the back.
203 // Warning: this code is subtle.
204 using std::swap;
205 swap(elements_.front(), elements_.back());
206 }
207 if (elements_.size() == limit_ + 1) {
208 // Transition from unsorted vector to a heap.
209 std::make_heap(elements_.begin(), elements_.end(), cmp_);
210 if (dropped) *dropped = std::move(elements_.front());
211 std::pop_heap(elements_.begin(), elements_.end(), cmp_);
212 state_ = HEAP_SORTED;
213 }
214 } else {
215 // Only insert the new element if it is greater than the least element.
216 if (cmp_(v, elements_.front())) {
217 // Store new element in the last slot of elements_. Remember from the
218 // comments on elements_ that this last slot is unused, so we don't
219 // overwrite anything useful.
220 elements_.back() = std::forward<U>(
221 v); // NOLINT(build/c++11)
222 // stp::pop_heap() swaps elements_.front() and elements_.back()
223 // and rearranges elements from [elements_.begin(),
224 // elements_.end() - 1) such that they are a heap according to
225 // cmp_. Net effect: remove elements_.front() from the heap, and
226 // add the new element instead. For more info, see
227 // https://en.cppreference.com/w/cpp/algorithm/pop_heap.
228 std::pop_heap(elements_.begin(), elements_.end(), cmp_);
229 if (dropped) *dropped = std::move(elements_.back());
230 } else {
231 if (dropped) *dropped = std::forward<U>(v); // NOLINT(build/c++11)
232 }
233 }
234}
235template <class T, class Cmp>
237 CHECK(!empty());
238 if (state_ == UNORDERED) {
239 // We need to do a linear scan to find out the bottom element
240 int min_candidate = 0;
241 for (size_t i = 1; i < elements_.size(); ++i) {
242 if (cmp_(elements_[min_candidate], elements_[i])) {
243 min_candidate = i;
244 }
245 }
246 // By swapping the element at position 0 and the minimal
247 // element, we transition to the BOTTOM_KNOWN state
248 if (min_candidate != 0) {
249 using std::swap;
250 swap(elements_[0], elements_[min_candidate]);
251 }
252 state_ = BOTTOM_KNOWN;
253 }
254 return elements_.front();
255}
256template <class T, class Cmp>
257std::vector<T> TopN<T, Cmp>::Take() {
258 std::vector<T> out = std::move(elements_);
259 if (state_ != State::HEAP_SORTED) {
260 std::sort(out.begin(), out.end(), cmp_);
261 } else {
262 out.pop_back();
263 std::sort_heap(out.begin(), out.end(), cmp_);
264 }
265 Reset();
266 return out;
267}
268
269template <class T, class Cmp>
270std::vector<T>* TopN<T, Cmp>::Extract() {
271 auto out = new std::vector<T>;
272 out->swap(elements_);
273 if (state_ != HEAP_SORTED) {
274 std::sort(out->begin(), out->end(), cmp_);
275 } else {
276 out->pop_back();
277 std::sort_heap(out->begin(), out->end(), cmp_);
278 }
279 return out;
280}
281template <class T, class Cmp>
282std::vector<T>* TopN<T, Cmp>::ExtractUnsorted() {
283 auto out = new std::vector<T>;
284 out->swap(elements_);
285 if (state_ == HEAP_SORTED) {
286 // Remove the limit_+1'th element.
287 out->pop_back();
288 }
289 return out;
290}
291template <class T, class Cmp>
292std::vector<T>* TopN<T, Cmp>::ExtractNondestructive() const {
293 auto out = new std::vector<T>;
294 ExtractNondestructive(out);
295 return out;
296}
297template <class T, class Cmp>
298void TopN<T, Cmp>::ExtractNondestructive(std::vector<T>* output) const {
299 CHECK(output);
300 *output = elements_;
301 if (state_ != HEAP_SORTED) {
302 std::sort(output->begin(), output->end(), cmp_);
303 } else {
304 output->pop_back();
305 std::sort_heap(output->begin(), output->end(), cmp_);
306 }
307}
308template <class T, class Cmp>
309std::vector<T>* TopN<T, Cmp>::ExtractUnsortedNondestructive() const {
310 auto elements = new std::vector<T>;
311 ExtractUnsortedNondestructive(elements);
312 return elements;
313}
314template <class T, class Cmp>
315void TopN<T, Cmp>::ExtractUnsortedNondestructive(std::vector<T>* output) const {
316 CHECK(output);
317 *output = elements_;
318 if (state_ == HEAP_SORTED) {
319 // Remove the limit_+1'th element.
320 output->pop_back();
321 }
322}
323template <class T, class Cmp>
324void TopN<T, Cmp>::Reset() {
325 elements_.clear();
326 state_ = UNORDERED;
327}
328} // namespace gtl
329} // namespace operations_research
330#endif // OR_TOOLS_BASE_TOP_N_H_
dual_gradient T(y - `dual_solution`) class DiagonalTrustRegionProblemFromQp
In SWIG mode, we don't want anything besides these top-level includes.