Google OR-Tools v9.14
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
top_n.h
Go to the documentation of this file.
1// Copyright 2010-2025 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14// ref:
15// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/lib/gtl/top_n.h
16// This simple class finds the top n elements of an incrementally provided set
17// of elements which you push one at a time. If the number of elements exceeds
18// n, the lowest elements are incrementally dropped. At the end you get
19// a vector of the top elements sorted in descending order (through Extract() or
20// ExtractNondestructive()), or a vector of the top elements but not sorted
21// (through ExtractUnsorted() or ExtractUnsortedNondestructive()).
22//
23// The value n is specified in the constructor. If there are p elements pushed
24// altogether:
25// The total storage requirements are O(min(n, p)) elements
26// The running time is O(p * log(min(n, p))) comparisons
27// If n is a constant, the total storage required is a constant and the running
28// time is linear in p.
29
30#ifndef OR_TOOLS_BASE_TOP_N_H_
31#define OR_TOOLS_BASE_TOP_N_H_
32
33#include <stddef.h>
34
35#include <algorithm>
36#include <functional>
37#include <vector>
38
39namespace operations_research {
40namespace gtl {
41// Cmp is an stl binary predicate. Note that Cmp is the "greater" predicate,
42// not the more commonly used "less" predicate.
43//
44// If you use a "less" predicate here, the TopN will pick out the bottom N
45// elements out of the ones passed to it, and it will return them sorted in
46// ascending order.
47//
48// TopN is rule-of-zero copyable and movable if its members are.
49template <class T, class Cmp = std::greater<T> >
50class TopN {
51 public:
52 // The TopN is in one of the three states:
53 //
54 // o UNORDERED: this is the state an instance is originally in,
55 // where the elements are completely orderless.
56 //
57 // o BOTTOM_KNOWN: in this state, we keep the invariant that there
58 // is at least one element in it, and the lowest element is at
59 // position 0. The elements in other positions remain
60 // unsorted. This state is reached if the state was originally
61 // UNORDERED and a peek_bottom() function call is invoked.
62 //
63 // o HEAP_SORTED: in this state, the array is kept as a heap and
64 // there are exactly (limit_+1) elements in the array. This
65 // state is reached when at least (limit_+1) elements are
66 // pushed in.
67 //
68 // The state transition graph is at follows:
69 //
70 // peek_bottom() (limit_+1) elements
71 // UNORDERED --------------> BOTTOM_KNOWN --------------------> HEAP_SORTED
72 // | ^
73 // | (limit_+1) elements |
74 // +-----------------------------------------------------------+
75 enum State { UNORDERED, BOTTOM_KNOWN, HEAP_SORTED };
76 using UnsortedIterator = typename std::vector<T>::const_iterator;
77 // 'limit' is the maximum number of top results to return.
78 explicit TopN(size_t limit) : TopN(limit, Cmp()) {}
79 TopN(size_t limit, const Cmp& cmp) : limit_(limit), cmp_(cmp) {}
80 size_t limit() const { return limit_; }
81 // Number of elements currently held by this TopN object. This
82 // will be no greater than 'limit' passed to the constructor.
83 size_t size() const { return std::min(elements_.size(), limit_); }
84 bool empty() const { return size() == 0; }
85 // If you know how many elements you will push at the time you create the
86 // TopN object, you can call reserve to preallocate the memory that TopN
87 // will need to process all 'n' pushes. Calling this method is optional.
88 void reserve(size_t n) { elements_.reserve(std::min(n, limit_ + 1)); }
89 // Push 'v'. If the maximum number of elements was exceeded, drop the
90 // lowest element and return it in 'dropped' (if given). If the maximum is not
91 // exceeded, 'dropped' will remain unchanged. 'dropped' may be omitted or
92 // nullptr, in which case it is not filled in.
93 // Requires: T is CopyAssignable, Swappable
94 void push(const T& v) { push(v, nullptr); }
95 void push(const T& v, T* dropped) { PushInternal(v, dropped); }
96 // Move overloads of push.
97 // Requires: T is MoveAssignable, Swappable
98 void push(T&& v) { // NOLINT(build/c++11)
99 push(std::move(v), nullptr);
100 }
101 void push(T&& v, T* dropped) { // NOLINT(build/c++11)
102 PushInternal(std::move(v), dropped);
103 }
104 // Peeks the bottom result without calling Extract()
105 const T& peek_bottom();
106 // Destructively extract the elements as a vector, sorted in descending order.
107 // Leaves TopN in an empty state.
108 std::vector<T> Take();
109 // Extract the elements as a vector sorted in descending order. The caller
110 // assumes ownership of the vector and must delete it when done. This is a
111 // destructive operation. The only method that can be called immediately
112 // after Extract() is Reset().
113 std::vector<T>* Extract();
114 // Similar to Extract(), but makes no guarantees the elements are in sorted
115 // order. As with Extract(), the caller assumes ownership of the vector and
116 // must delete it when done. This is a destructive operation. The only
117 // method that can be called immediately after ExtractUnsorted() is Reset().
118 std::vector<T>* ExtractUnsorted();
119 // A non-destructive version of Extract(). Copy the elements in a new vector
120 // sorted in descending order and return it. The caller assumes ownership of
121 // the new vector and must delete it when done. After calling
122 // ExtractNondestructive(), the caller can continue to push() new elements.
123 std::vector<T>* ExtractNondestructive() const;
124 // A non-destructive version of Extract(). Copy the elements to a given
125 // vector sorted in descending order. After calling
126 // ExtractNondestructive(), the caller can continue to push() new elements.
127 // Note:
128 // 1. The given argument must to be allocated.
129 // 2. Any data contained in the vector prior to the call will be deleted
130 // from it. After the call the vector will contain only the elements
131 // from the data structure.
132 void ExtractNondestructive(std::vector<T>* output) const;
133 // A non-destructive version of ExtractUnsorted(). Copy the elements in a new
134 // vector and return it, with no guarantees the elements are in sorted order.
135 // The caller assumes ownership of the new vector and must delete it when
136 // done. After calling ExtractUnsortedNondestructive(), the caller can
137 // continue to push() new elements.
138 std::vector<T>* ExtractUnsortedNondestructive() const;
139 // A non-destructive version of ExtractUnsorted(). Copy the elements into
140 // a given vector, with no guarantees the elements are in sorted order.
141 // After calling ExtractUnsortedNondestructive(), the caller can continue
142 // to push() new elements.
143 // Note:
144 // 1. The given argument must to be allocated.
145 // 2. Any data contained in the vector prior to the call will be deleted
146 // from it. After the call the vector will contain only the elements
147 // from the data structure.
148 void ExtractUnsortedNondestructive(std::vector<T>* output) const;
149 // Return an iterator to the beginning (end) of the container,
150 // with no guarantees about the order of iteration. These iterators are
151 // invalidated by mutation of the data structure.
152 UnsortedIterator unsorted_begin() const { return elements_.begin(); }
153 UnsortedIterator unsorted_end() const { return elements_.begin() + size(); }
154 // Accessor for comparator template argument.
155 Cmp* comparator() { return &cmp_; }
156 // This removes all elements. If Extract() or ExtractUnsorted() have been
157 // called, this will put it back in an empty but useable state.
158 void Reset();
159
160 private:
161 template <typename U>
162 void PushInternal(
163 U&& v,
164 T* dropped); // NOLINT(build/c++11)
165 // elements_ can be in one of two states:
166 // elements_.size() <= limit_: elements_ is an unsorted
167 // vector of elements
168 // pushed so far.
169 // elements_.size() > limit_: The last element of
170 // elements_ is unused;
171 // the other elements of elements_ are an stl heap
172 // whose size is exactly limit_. In this case
173 // elements_.size() is exactly one greater than limit_,
174 // but don't use "elements_.size() == limit_ + 1" to
175 // check for that because you'll get a false positive
176 // if limit_ == size_t(-1).
177 std::vector<T> elements_;
178 size_t limit_; // Maximum number of elements to find
179 Cmp cmp_; // Greater-than comparison function
180 State state_ = UNORDERED;
181};
182// ----------------------------------------------------------------------
183// Implementations of non-inline functions
184template <class T, class Cmp>
185template <typename U>
186void TopN<T, Cmp>::PushInternal(U&& v, T* dropped) { // NOLINT(build/c++11)
187 if (limit_ == 0) {
188 if (dropped) *dropped = std::forward<U>(v); // NOLINT(build/c++11)
189 return;
190 }
191 if (state_ != HEAP_SORTED) {
192 elements_.push_back(std::forward<U>(v)); // NOLINT(build/c++11)
193 if (state_ == UNORDERED || cmp_(elements_.back(), elements_.front())) {
194 // Easy case: we just pushed the new element back
195 } else {
196 // To maintain the BOTTOM_KNOWN state, we need to make sure that
197 // the element at position 0 is always the smallest. So we put
198 // the new element at position 0 and push the original bottom
199 // element in the back.
200 // Warning: this code is subtle.
201 using std::swap;
202 swap(elements_.front(), elements_.back());
203 }
204 if (elements_.size() == limit_ + 1) {
205 // Transition from unsorted vector to a heap.
206 std::make_heap(elements_.begin(), elements_.end(), cmp_);
207 if (dropped) *dropped = std::move(elements_.front());
208 std::pop_heap(elements_.begin(), elements_.end(), cmp_);
209 state_ = HEAP_SORTED;
210 }
211 } else {
212 // Only insert the new element if it is greater than the least element.
213 if (cmp_(v, elements_.front())) {
214 // Store new element in the last slot of elements_. Remember from the
215 // comments on elements_ that this last slot is unused, so we don't
216 // overwrite anything useful.
217 elements_.back() = std::forward<U>(
218 v); // NOLINT(build/c++11)
219 // stp::pop_heap() swaps elements_.front() and elements_.back()
220 // and rearranges elements from [elements_.begin(),
221 // elements_.end() - 1) such that they are a heap according to
222 // cmp_. Net effect: remove elements_.front() from the heap, and
223 // add the new element instead. For more info, see
224 // https://en.cppreference.com/w/cpp/algorithm/pop_heap.
225 std::pop_heap(elements_.begin(), elements_.end(), cmp_);
226 if (dropped) *dropped = std::move(elements_.back());
227 } else {
228 if (dropped) *dropped = std::forward<U>(v); // NOLINT(build/c++11)
229 }
230 }
231}
232template <class T, class Cmp>
234 CHECK(!empty());
235 if (state_ == UNORDERED) {
236 // We need to do a linear scan to find out the bottom element
237 int min_candidate = 0;
238 for (size_t i = 1; i < elements_.size(); ++i) {
239 if (cmp_(elements_[min_candidate], elements_[i])) {
240 min_candidate = i;
241 }
242 }
243 // By swapping the element at position 0 and the minimal
244 // element, we transition to the BOTTOM_KNOWN state
245 if (min_candidate != 0) {
246 using std::swap;
247 swap(elements_[0], elements_[min_candidate]);
248 }
249 state_ = BOTTOM_KNOWN;
250 }
251 return elements_.front();
252}
253template <class T, class Cmp>
254std::vector<T> TopN<T, Cmp>::Take() {
255 std::vector<T> out = std::move(elements_);
256 if (state_ != State::HEAP_SORTED) {
257 std::sort(out.begin(), out.end(), cmp_);
258 } else {
259 out.pop_back();
260 std::sort_heap(out.begin(), out.end(), cmp_);
261 }
262 Reset();
263 return out;
264}
265
266template <class T, class Cmp>
267std::vector<T>* TopN<T, Cmp>::Extract() {
268 auto out = new std::vector<T>;
269 out->swap(elements_);
270 if (state_ != HEAP_SORTED) {
271 std::sort(out->begin(), out->end(), cmp_);
272 } else {
273 out->pop_back();
274 std::sort_heap(out->begin(), out->end(), cmp_);
275 }
276 return out;
277}
278template <class T, class Cmp>
279std::vector<T>* TopN<T, Cmp>::ExtractUnsorted() {
280 auto out = new std::vector<T>;
281 out->swap(elements_);
282 if (state_ == HEAP_SORTED) {
283 // Remove the limit_+1'th element.
284 out->pop_back();
285 }
286 return out;
287}
288template <class T, class Cmp>
289std::vector<T>* TopN<T, Cmp>::ExtractNondestructive() const {
290 auto out = new std::vector<T>;
291 ExtractNondestructive(out);
292 return out;
293}
294template <class T, class Cmp>
295void TopN<T, Cmp>::ExtractNondestructive(std::vector<T>* output) const {
296 CHECK(output);
297 *output = elements_;
298 if (state_ != HEAP_SORTED) {
299 std::sort(output->begin(), output->end(), cmp_);
300 } else {
301 output->pop_back();
302 std::sort_heap(output->begin(), output->end(), cmp_);
303 }
304}
305template <class T, class Cmp>
306std::vector<T>* TopN<T, Cmp>::ExtractUnsortedNondestructive() const {
307 auto elements = new std::vector<T>;
308 ExtractUnsortedNondestructive(elements);
309 return elements;
310}
311template <class T, class Cmp>
312void TopN<T, Cmp>::ExtractUnsortedNondestructive(std::vector<T>* output) const {
313 CHECK(output);
314 *output = elements_;
315 if (state_ == HEAP_SORTED) {
316 // Remove the limit_+1'th element.
317 output->pop_back();
318 }
319}
320template <class T, class Cmp>
321void TopN<T, Cmp>::Reset() {
322 elements_.clear();
323 state_ = UNORDERED;
324}
325} // namespace gtl
326} // namespace operations_research
327#endif // OR_TOOLS_BASE_TOP_N_H_
!
Definition array.h:26
dual_gradient T(y - `dual_solution`) class DiagonalTrustRegionProblemFromQp
In SWIG mode, we don't want anything besides these top-level includes.