tlx
Loading...
Searching...
No Matches
multisequence_partition.hpp
Go to the documentation of this file.
1/*******************************************************************************
2 * tlx/algorithm/multisequence_partition.hpp
3 *
4 * Implementation of multisequence partition and selection.
5 *
6 * Copied and modified from STXXL, see http://stxxl.org, which itself extracted
7 * it from MCSTL http://algo2.iti.uni-karlsruhe.de/singler/mcstl/. Both are
8 * distributed under the Boost Software License, Version 1.0.
9 *
10 * Part of tlx - http://panthema.net/tlx
11 *
12 * Copyright (C) 2007 Johannes Singler <singler@ira.uka.de>
13 * Copyright (C) 2014-2018 Timo Bingmann <tb@panthema.net>
14 *
15 * All rights reserved. Published under the Boost Software License, Version 1.0
16 ******************************************************************************/
17
18#ifndef TLX_ALGORITHM_MULTISEQUENCE_PARTITION_HEADER
19#define TLX_ALGORITHM_MULTISEQUENCE_PARTITION_HEADER
20
21#include <algorithm>
22#include <cassert>
23#include <queue>
24#include <utility>
25#include <vector>
26
29
30namespace tlx {
31
32//! \addtogroup tlx_algorithm
33//! \{
34
36
37//! Compare a pair of types lexicographically, ascending.
38template <typename T1, typename T2, typename Comparator>
40{
41protected:
42 Comparator& comp_;
43
44public:
45 explicit lexicographic(Comparator& comp) : comp_(comp) { }
46
47 inline bool operator () (const std::pair<T1, T2>& p1,
48 const std::pair<T1, T2>& p2) {
49 if (comp_(p1.first, p2.first))
50 return true;
51
52 if (comp_(p2.first, p1.first))
53 return false;
54
55 // firsts are equal
56 return p1.second < p2.second;
57 }
58};
59
60//! Compare a pair of types lexicographically, descending.
61template <typename T1, typename T2, typename Comparator>
63{
64protected:
65 Comparator& comp_;
66
67public:
68 explicit lexicographic_rev(Comparator& comp) : comp_(comp) { }
69
70 inline bool operator () (const std::pair<T1, T2>& p1,
71 const std::pair<T1, T2>& p2) {
72 if (comp_(p2.first, p1.first))
73 return true;
74
75 if (comp_(p1.first, p2.first))
76 return false;
77
78 // firsts are equal
79 return p2.second < p1.second;
80 }
81};
82
83} // namespace multisequence_partition_detail
84
85/*!
86 * Splits several sorted sequences at a certain global rank, resulting in a
87 * splitting point for each sequence. The sequences are passed via a sequence
88 * of random-access iterator pairs, none of the sequences may be empty. If
89 * there are several equal elements across the split, the ones on the left side
90 * will be chosen from sequences with smaller number.
91 *
92 * \param begin_seqs Begin of the sequence of iterator pairs.
93 * \param end_seqs End of the sequence of iterator pairs.
94 * \param rank The global rank to partition at.
95 * \param begin_offsets A random-access sequence begin where the result will be
96 * stored in. Each element of the sequence is an iterator that points to the
97 * first element on the greater part of the respective sequence.
98 * \param comp The ordering functor, defaults to std::less<T>.
99 */
100template <typename RanSeqs, typename RankType, typename RankIterator,
101 typename Comparator = std::less<
102 typename std::iterator_traits<
103 typename std::iterator_traits<RanSeqs>
104 ::value_type::first_type>::value_type>
105 >
107 const RanSeqs& begin_seqs, const RanSeqs& end_seqs,
108 const RankType& rank,
109 RankIterator begin_offsets,
110 Comparator comp = Comparator()) {
111
112 using iterator = typename std::iterator_traits<RanSeqs>
113 ::value_type::first_type;
114 using diff_type = typename std::iterator_traits<iterator>
115 ::difference_type;
116 using value_type = typename std::iterator_traits<iterator>
117 ::value_type;
118
119 using SamplePair = std::pair<value_type, diff_type>;
120
121 using namespace multisequence_partition_detail;
122
123 // comparators for SamplePair
124 lexicographic<value_type, diff_type, Comparator> lcomp(comp);
125 lexicographic_rev<value_type, diff_type, Comparator> lrcomp(comp);
126
127 // number of sequences, number of elements in total (possibly including
128 // padding)
129 const diff_type m = std::distance(begin_seqs, end_seqs);
130 diff_type nmax, n;
131 RankType N = 0;
132
133 for (diff_type i = 0; i < m; ++i)
134 {
135 N += std::distance(begin_seqs[i].first, begin_seqs[i].second);
136 assert(std::distance(begin_seqs[i].first, begin_seqs[i].second) > 0);
137 }
138
139 if (rank == N)
140 {
141 for (diff_type i = 0; i < m; ++i)
142 begin_offsets[i] = begin_seqs[i].second; // very end
143 return;
144 }
145
146 assert(m != 0 && N != 0 && rank < N);
147
148 simple_vector<diff_type> seqlen(m);
149
150 seqlen[0] = std::distance(begin_seqs[0].first, begin_seqs[0].second);
151 nmax = seqlen[0];
152 for (diff_type i = 1; i < m; ++i)
153 {
154 seqlen[i] = std::distance(begin_seqs[i].first, begin_seqs[i].second);
155 nmax = std::max(nmax, seqlen[i]);
156 }
157
158 // pad all lists to this length, at least as long as any ns[i], equality
159 // iff nmax = 2^k - 1
160 diff_type l = round_up_to_power_of_two(nmax + 1) - 1;
161
162 simple_vector<diff_type> a(m), b(m);
163
164 for (diff_type i = 0; i < m; ++i)
165 a[i] = 0, b[i] = l;
166
167 n = l / 2;
168
169 // invariants:
170 // 0 <= a[i] <= seqlen[i], 0 <= b[i] <= l
171
172 // initial partition
173
174 std::vector<SamplePair> sample;
175
176 for (diff_type i = 0; i < m; ++i) {
177 if (n < seqlen[i]) {
178 // sequence long enough
179 sample.push_back(SamplePair(begin_seqs[i].first[n], i));
180 }
181 }
182
183 std::sort(sample.begin(), sample.end(), lcomp);
184
185 for (diff_type i = 0; i < m; ++i) {
186 // conceptual infinity
187 if (n >= seqlen[i]) {
188 // sequence too short, conceptual infinity
189 sample.push_back(
190 SamplePair(begin_seqs[i].first[0] /* dummy element */, i));
191 }
192 }
193
194 size_t localrank = static_cast<size_t>(rank / l);
195
196 size_t j;
197 for (j = 0; j < localrank && ((n + 1) <= seqlen[sample[j].second]); ++j)
198 a[sample[j].second] += n + 1;
199 for ( ; j < static_cast<size_t>(m); ++j)
200 b[sample[j].second] -= n + 1;
201
202 // further refinement
203
204 while (n > 0)
205 {
206 n /= 2;
207
208 const value_type* lmax = nullptr; // impossible to avoid the warning?
209 for (diff_type i = 0; i < m; ++i)
210 {
211 if (a[i] > 0)
212 {
213 if (!lmax)
214 lmax = &(begin_seqs[i].first[a[i] - 1]);
215 else
216 {
217 // max, favor rear sequences
218 if (!comp(begin_seqs[i].first[a[i] - 1], *lmax))
219 lmax = &(begin_seqs[i].first[a[i] - 1]);
220 }
221 }
222 }
223
224 for (diff_type i = 0; i < m; ++i)
225 {
226 diff_type middle = (b[i] + a[i]) / 2;
227 if (lmax && middle < seqlen[i] &&
228 comp(begin_seqs[i].first[middle], *lmax))
229 a[i] = std::min(a[i] + n + 1, seqlen[i]);
230 else
231 b[i] -= n + 1;
232 }
233
234 diff_type leftsize = 0;
235 for (diff_type i = 0; i < m; ++i)
236 leftsize += a[i] / (n + 1);
237
238 diff_type skew = rank / (n + 1) - leftsize;
239
240 if (skew > 0)
241 {
242 // move to the left, find smallest
243 std::priority_queue<
244 SamplePair, std::vector<SamplePair>,
245 lexicographic_rev<value_type, diff_type, Comparator> >
246 pq(lrcomp);
247
248 for (diff_type i = 0; i < m; ++i) {
249 if (b[i] < seqlen[i])
250 pq.push(SamplePair(begin_seqs[i].first[b[i]], i));
251 }
252
253 for ( ; skew != 0 && !pq.empty(); --skew)
254 {
255 diff_type src = pq.top().second;
256 pq.pop();
257
258 a[src] = std::min(a[src] + n + 1, seqlen[src]);
259 b[src] += n + 1;
260
261 if (b[src] < seqlen[src])
262 pq.push(SamplePair(begin_seqs[src].first[b[src]], src));
263 }
264 }
265 else if (skew < 0)
266 {
267 // move to the right, find greatest
268 std::priority_queue<
269 SamplePair, std::vector<SamplePair>,
270 lexicographic<value_type, diff_type, Comparator> >
271 pq(lcomp);
272
273 for (diff_type i = 0; i < m; ++i) {
274 if (a[i] > 0)
275 pq.push(SamplePair(begin_seqs[i].first[a[i] - 1], i));
276 }
277
278 for ( ; skew != 0; ++skew)
279 {
280 diff_type src = pq.top().second;
281 pq.pop();
282
283 a[src] -= n + 1;
284 b[src] -= n + 1;
285
286 if (a[src] > 0)
287 pq.push(SamplePair(begin_seqs[src].first[a[src] - 1], src));
288 }
289 }
290 }
291
292 // postconditions: a[i] == b[i] in most cases, except when a[i] has been
293 // clamped because of having reached the boundary
294
295 // now return the result, calculate the offset, compare the keys on both
296 // edges of the border
297
298 // maximum of left edge, minimum of right edge
299 value_type* maxleft = nullptr, * minright = nullptr;
300 for (diff_type i = 0; i < m; ++i)
301 {
302 if (a[i] > 0)
303 {
304 if (!maxleft)
305 {
306 maxleft = &(begin_seqs[i].first[a[i] - 1]);
307 }
308 else
309 {
310 // max, favor rear sequences
311 if (!comp(begin_seqs[i].first[a[i] - 1], *maxleft))
312 maxleft = &(begin_seqs[i].first[a[i] - 1]);
313 }
314 }
315 if (b[i] < seqlen[i])
316 {
317 if (!minright)
318 {
319 minright = &(begin_seqs[i].first[b[i]]);
320 }
321 else
322 {
323 // min, favor fore sequences
324 if (comp(begin_seqs[i].first[b[i]], *minright))
325 minright = &(begin_seqs[i].first[b[i]]);
326 }
327 }
328 }
329
330 for (diff_type i = 0; i < m; ++i)
331 begin_offsets[i] = begin_seqs[i].first + a[i];
332}
333
334//! \}
335
336} // namespace tlx
337
338#endif // !TLX_ALGORITHM_MULTISEQUENCE_PARTITION_HEADER
339
340/******************************************************************************/
bool operator()(const std::pair< T1, T2 > &p1, const std::pair< T1, T2 > &p2)
bool operator()(const std::pair< T1, T2 > &p1, const std::pair< T1, T2 > &p2)
void multisequence_partition(const RanSeqs &begin_seqs, const RanSeqs &end_seqs, const RankType &rank, RankIterator begin_offsets, Comparator comp=Comparator())
Splits several sorted sequences at a certain global rank, resulting in a splitting point for each seq...
SimpleVector< T > simple_vector
make template alias due to similarity with std::vector
static int round_up_to_power_of_two(int i)
does what it says: round up to next power of two