Google OR-Tools v9.11
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
top_n.h
Go to the documentation of this file.
1// Copyright 2010-2024 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14// ref:
15// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/lib/gtl/top_n.h
16// This simple class finds the top n elements of an incrementally provided set
17// of elements which you push one at a time. If the number of elements exceeds
18// n, the lowest elements are incrementally dropped. At the end you get
19// a vector of the top elements sorted in descending order (through Extract() or
20// ExtractNondestructive()), or a vector of the top elements but not sorted
21// (through ExtractUnsorted() or ExtractUnsortedNondestructive()).
22//
23// The value n is specified in the constructor. If there are p elements pushed
24// altogether:
25// The total storage requirements are O(min(n, p)) elements
26// The running time is O(p * log(min(n, p))) comparisons
27// If n is a constant, the total storage required is a constant and the running
28// time is linear in p.
29
30#ifndef OR_TOOLS_BASE_TOP_N_H_
31#define OR_TOOLS_BASE_TOP_N_H_
32
33#include <stddef.h>
34
35#include <algorithm>
36#include <functional>
37#include <string>
38#include <vector>
39
41
42namespace operations_research {
43namespace gtl {
44// Cmp is an stl binary predicate. Note that Cmp is the "greater" predicate,
45// not the more commonly used "less" predicate.
46//
47// If you use a "less" predicate here, the TopN will pick out the bottom N
48// elements out of the ones passed to it, and it will return them sorted in
49// ascending order.
50//
51// TopN is rule-of-zero copyable and movable if its members are.
52template <class T, class Cmp = std::greater<T> >
53class TopN {
54 public:
55 // The TopN is in one of the three states:
56 //
57 // o UNORDERED: this is the state an instance is originally in,
58 // where the elements are completely orderless.
59 //
60 // o BOTTOM_KNOWN: in this state, we keep the invariant that there
61 // is at least one element in it, and the lowest element is at
62 // position 0. The elements in other positions remain
63 // unsorted. This state is reached if the state was originally
64 // UNORDERED and a peek_bottom() function call is invoked.
65 //
66 // o HEAP_SORTED: in this state, the array is kept as a heap and
67 // there are exactly (limit_+1) elements in the array. This
68 // state is reached when at least (limit_+1) elements are
69 // pushed in.
70 //
71 // The state transition graph is at follows:
72 //
73 // peek_bottom() (limit_+1) elements
74 // UNORDERED --------------> BOTTOM_KNOWN --------------------> HEAP_SORTED
75 // | ^
76 // | (limit_+1) elements |
77 // +-----------------------------------------------------------+
78 enum State { UNORDERED, BOTTOM_KNOWN, HEAP_SORTED };
79 using UnsortedIterator = typename std::vector<T>::const_iterator;
80 // 'limit' is the maximum number of top results to return.
81 explicit TopN(size_t limit) : TopN(limit, Cmp()) {}
82 TopN(size_t limit, const Cmp& cmp) : limit_(limit), cmp_(cmp) {}
83 size_t limit() const { return limit_; }
84 // Number of elements currently held by this TopN object. This
85 // will be no greater than 'limit' passed to the constructor.
86 size_t size() const { return std::min(elements_.size(), limit_); }
87 bool empty() const { return size() == 0; }
88 // If you know how many elements you will push at the time you create the
89 // TopN object, you can call reserve to preallocate the memory that TopN
90 // will need to process all 'n' pushes. Calling this method is optional.
91 void reserve(size_t n) { elements_.reserve(std::min(n, limit_ + 1)); }
92 // Push 'v'. If the maximum number of elements was exceeded, drop the
93 // lowest element and return it in 'dropped' (if given). If the maximum is not
94 // exceeded, 'dropped' will remain unchanged. 'dropped' may be omitted or
95 // nullptr, in which case it is not filled in.
96 // Requires: T is CopyAssignable, Swappable
97 void push(const T& v) { push(v, nullptr); }
98 void push(const T& v, T* dropped) { PushInternal(v, dropped); }
99 // Move overloads of push.
100 // Requires: T is MoveAssignable, Swappable
101 void push(T&& v) { // NOLINT(build/c++11)
102 push(std::move(v), nullptr);
103 }
104 void push(T&& v, T* dropped) { // NOLINT(build/c++11)
105 PushInternal(std::move(v), dropped);
106 }
107 // Peeks the bottom result without calling Extract()
108 const T& peek_bottom();
109 // Extract the elements as a vector sorted in descending order. The caller
110 // assumes ownership of the vector and must delete it when done. This is a
111 // destructive operation. The only method that can be called immediately
112 // after Extract() is Reset().
113 std::vector<T>* Extract();
114 // Similar to Extract(), but makes no guarantees the elements are in sorted
115 // order. As with Extract(), the caller assumes ownership of the vector and
116 // must delete it when done. This is a destructive operation. The only
117 // method that can be called immediately after ExtractUnsorted() is Reset().
118 std::vector<T>* ExtractUnsorted();
119 // A non-destructive version of Extract(). Copy the elements in a new vector
120 // sorted in descending order and return it. The caller assumes ownership of
121 // the new vector and must delete it when done. After calling
122 // ExtractNondestructive(), the caller can continue to push() new elements.
123 std::vector<T>* ExtractNondestructive() const;
124 // A non-destructive version of Extract(). Copy the elements to a given
125 // vector sorted in descending order. After calling
126 // ExtractNondestructive(), the caller can continue to push() new elements.
127 // Note:
128 // 1. The given argument must to be allocated.
129 // 2. Any data contained in the vector prior to the call will be deleted
130 // from it. After the call the vector will contain only the elements
131 // from the data structure.
132 void ExtractNondestructive(std::vector<T>* output) const;
133 // A non-destructive version of ExtractUnsorted(). Copy the elements in a new
134 // vector and return it, with no guarantees the elements are in sorted order.
135 // The caller assumes ownership of the new vector and must delete it when
136 // done. After calling ExtractUnsortedNondestructive(), the caller can
137 // continue to push() new elements.
138 std::vector<T>* ExtractUnsortedNondestructive() const;
139 // A non-destructive version of ExtractUnsorted(). Copy the elements into
140 // a given vector, with no guarantees the elements are in sorted order.
141 // After calling ExtractUnsortedNondestructive(), the caller can continue
142 // to push() new elements.
143 // Note:
144 // 1. The given argument must to be allocated.
145 // 2. Any data contained in the vector prior to the call will be deleted
146 // from it. After the call the vector will contain only the elements
147 // from the data structure.
148 void ExtractUnsortedNondestructive(std::vector<T>* output) const;
149 // Return an iterator to the beginning (end) of the container,
150 // with no guarantees about the order of iteration. These iterators are
151 // invalidated by mutation of the data structure.
152 UnsortedIterator unsorted_begin() const { return elements_.begin(); }
153 UnsortedIterator unsorted_end() const { return elements_.begin() + size(); }
154 // Accessor for comparator template argument.
155 Cmp* comparator() { return &cmp_; }
156 // This removes all elements. If Extract() or ExtractUnsorted() have been
157 // called, this will put it back in an empty but useable state.
158 void Reset();
159
160 private:
161 template <typename U>
162 void PushInternal(
163 U&& v,
164 T* dropped); // NOLINT(build/c++11)
165 // elements_ can be in one of two states:
166 // elements_.size() <= limit_: elements_ is an unsorted
167 // vector of elements
168 // pushed so far.
169 // elements_.size() > limit_: The last element of
170 // elements_ is unused;
171 // the other elements of elements_ are an stl heap
172 // whose size is exactly limit_. In this case
173 // elements_.size() is exactly one greater than limit_,
174 // but don't use "elements_.size() == limit_ + 1" to
175 // check for that because you'll get a false positive
176 // if limit_ == size_t(-1).
177 std::vector<T> elements_;
178 size_t limit_; // Maximum number of elements to find
179 Cmp cmp_; // Greater-than comparison function
180 State state_ = UNORDERED;
181};
182// ----------------------------------------------------------------------
183// Implementations of non-inline functions
184template <class T, class Cmp>
185template <typename U>
186void TopN<T, Cmp>::PushInternal(U&& v, T* dropped) { // NOLINT(build/c++11)
187 if (limit_ == 0) {
188 if (dropped) *dropped = std::forward<U>(v); // NOLINT(build/c++11)
189 return;
190 }
191 if (state_ != HEAP_SORTED) {
192 elements_.push_back(std::forward<U>(v)); // NOLINT(build/c++11)
193 if (state_ == UNORDERED || cmp_(elements_.back(), elements_.front())) {
194 // Easy case: we just pushed the new element back
195 } else {
196 // To maintain the BOTTOM_KNOWN state, we need to make sure that
197 // the element at position 0 is always the smallest. So we put
198 // the new element at position 0 and push the original bottom
199 // element in the back.
200 // Warning: this code is subtle.
201 using std::swap;
202 swap(elements_.front(), elements_.back());
203 }
204 if (elements_.size() == limit_ + 1) {
205 // Transition from unsorted vector to a heap.
206 std::make_heap(elements_.begin(), elements_.end(), cmp_);
207 if (dropped) *dropped = std::move(elements_.front());
208 std::pop_heap(elements_.begin(), elements_.end(), cmp_);
209 state_ = HEAP_SORTED;
210 }
211 } else {
212 // Only insert the new element if it is greater than the least element.
213 if (cmp_(v, elements_.front())) {
214 // Store new element in the last slot of elements_. Remember from the
215 // comments on elements_ that this last slot is unused, so we don't
216 // overwrite anything useful.
217 elements_.back() = std::forward<U>(
218 v); // NOLINT(build/c++11)
219 // stp::pop_heap() swaps elements_.front() and elements_.back()
220 // and rearranges elements from [elements_.begin(),
221 // elements_.end() - 1) such that they are a heap according to
222 // cmp_. Net effect: remove elements_.front() from the heap, and
223 // add the new element instead. For more info, see
224 // https://en.cppreference.com/w/cpp/algorithm/pop_heap.
225 std::pop_heap(elements_.begin(), elements_.end(), cmp_);
226 if (dropped) *dropped = std::move(elements_.back());
227 } else {
228 if (dropped) *dropped = std::forward<U>(v); // NOLINT(build/c++11)
229 }
230 }
231}
232template <class T, class Cmp>
233const T& TopN<T, Cmp>::peek_bottom() {
234 CHECK(!empty());
235 if (state_ == UNORDERED) {
236 // We need to do a linear scan to find out the bottom element
237 int min_candidate = 0;
238 for (size_t i = 1; i < elements_.size(); ++i) {
239 if (cmp_(elements_[min_candidate], elements_[i])) {
240 min_candidate = i;
241 }
242 }
243 // By swapping the element at position 0 and the minimal
244 // element, we transition to the BOTTOM_KNOWN state
245 if (min_candidate != 0) {
246 using std::swap;
247 swap(elements_[0], elements_[min_candidate]);
248 }
249 state_ = BOTTOM_KNOWN;
250 }
251 return elements_.front();
252}
253template <class T, class Cmp>
254std::vector<T>* TopN<T, Cmp>::Extract() {
255 auto out = new std::vector<T>;
256 out->swap(elements_);
257 if (state_ != HEAP_SORTED) {
258 std::sort(out->begin(), out->end(), cmp_);
259 } else {
260 out->pop_back();
261 std::sort_heap(out->begin(), out->end(), cmp_);
262 }
263 return out;
264}
265template <class T, class Cmp>
266std::vector<T>* TopN<T, Cmp>::ExtractUnsorted() {
267 auto out = new std::vector<T>;
268 out->swap(elements_);
269 if (state_ == HEAP_SORTED) {
270 // Remove the limit_+1'th element.
271 out->pop_back();
272 }
273 return out;
274}
275template <class T, class Cmp>
276std::vector<T>* TopN<T, Cmp>::ExtractNondestructive() const {
277 auto out = new std::vector<T>;
278 ExtractNondestructive(out);
279 return out;
280}
281template <class T, class Cmp>
282void TopN<T, Cmp>::ExtractNondestructive(std::vector<T>* output) const {
283 CHECK(output);
284 *output = elements_;
285 if (state_ != HEAP_SORTED) {
286 std::sort(output->begin(), output->end(), cmp_);
287 } else {
288 output->pop_back();
289 std::sort_heap(output->begin(), output->end(), cmp_);
290 }
291}
292template <class T, class Cmp>
293std::vector<T>* TopN<T, Cmp>::ExtractUnsortedNondestructive() const {
294 auto elements = new std::vector<T>;
295 ExtractUnsortedNondestructive(elements);
296 return elements;
297}
298template <class T, class Cmp>
299void TopN<T, Cmp>::ExtractUnsortedNondestructive(std::vector<T>* output) const {
300 CHECK(output);
301 *output = elements_;
302 if (state_ == HEAP_SORTED) {
303 // Remove the limit_+1'th element.
304 output->pop_back();
305 }
306}
307template <class T, class Cmp>
308void TopN<T, Cmp>::Reset() {
309 elements_.clear();
310 state_ = UNORDERED;
311}
312} // namespace gtl
313} // namespace operations_research
314#endif // OR_TOOLS_BASE_TOP_N_H_
IntegerValue size
const int64_t limit_
In SWIG mode, we don't want anything besides these top-level includes.