SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
suffix_array_helper.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef INCLUDED_SDSL_SUFFIX_ARRAY_HELPER
9#define INCLUDED_SDSL_SUFFIX_ARRAY_HELPER
10
11#include <cassert>
12
13#include <sdsl/iterators.hpp>
15
16namespace sdsl
17{
18
20/*
21 * \param i Position in the first row.
22 * \param csa CSA
23 * \par Time complexity
24 * \f$ \Order{\log \sigma} \f$
25 * TODO: add hinted binary search? Two way binary search?
26 */
27template <typename t_csa>
28typename t_csa::char_type first_row_symbol(const typename t_csa::size_type i, t_csa const & csa)
29{
30 assert(i < csa.size());
31 if (csa.sigma < 16)
32 { //<- if sigma is small search linear
33 typename t_csa::size_type res = 1;
34 while (res < csa.sigma and csa.C[res] <= i)
35 ++res;
36 return csa.comp2char[res - 1];
37 }
38 else
39 {
40 // binary search the character with C
41 typename t_csa::size_type upper_c = csa.sigma,
42 lower_c = 0; // lower_c inclusive, upper_c exclusive
43 typename t_csa::size_type res = 0;
44 do
45 {
46 res = (upper_c + lower_c) / 2;
47 if (i < csa.C[res])
48 {
49 upper_c = res;
50 }
51 else if (i >= csa.C[res + 1])
52 {
53 lower_c = res + 1;
54 }
55 }
56 while (i < csa.C[res] or i >= csa.C[res + 1]); // i is not in the interval
57 return csa.comp2char[res];
58 }
59}
60
61// psi[] trait
62template <typename t_csa, bool t_direction>
64{
65 typedef typename t_csa::value_type value_type;
66 typedef typename t_csa::size_type size_type;
67 static value_type access(t_csa const & csa, size_type i)
68 {
69 return csa.psi[i];
70 }
71};
72
73// lf[] trait
74template <typename t_csa>
75struct traverse_csa_psi_trait<t_csa, false>
76{
77 typedef typename t_csa::value_type value_type;
78 typedef typename t_csa::size_type size_type;
79 static value_type access(t_csa const & csa, size_type i)
80 {
81 // TODO: in case of a very sparse sampling of SA it may be faster to
82 // use \sigma binary searches on PSI function to determine the
83 // LF values.
84 return csa.isa[(csa[i] + csa.size() - 1) % csa.size()];
85 }
86};
87
88template <typename t_csa, bool t_direction>
90{
91public:
92 typedef typename t_csa::value_type value_type;
93 typedef typename t_csa::size_type size_type;
94 typedef typename t_csa::difference_type difference_type;
98
99private:
100 t_csa const & m_csa;
101
102public:
104 traverse_csa_psi(t_csa const & csa_psi) : m_csa(csa_psi)
105 {}
106
107 traverse_csa_psi(traverse_csa_psi const & tcsa) : m_csa(tcsa.m_csa)
108 {}
109
111
114 {
115 assert(i < size());
117 }
118
121 {
122 return m_csa.size();
123 }
124
127 {
128 return m_csa.empty();
129 }
130
132 {
133 return const_iterator(this, 0);
134 }
135
137
141 {
142 return const_iterator(this, size());
143 }
144};
145
146// psi[] trait
147template <typename t_csa, bool t_direction>
149{
150 typedef typename t_csa::value_type value_type;
151 typedef typename t_csa::size_type size_type;
152 static value_type access(t_csa const & csa, size_type i)
153 {
154 // \f$\Psi[i] = SA^{-1}[SA[i]+1 \mod n]\f$, where \f$n\f$ is the length of the suffix array SA
155 return csa.isa[(csa[i] + 1) % csa.size()];
156 }
157};
158
159// lf[] trait
160template <typename t_csa>
161struct traverse_csa_saisa_trait<t_csa, false>
162{
163 typedef typename t_csa::value_type value_type;
164 typedef typename t_csa::size_type size_type;
165 static value_type access(t_csa const & csa, size_type i)
166 {
167 // TODO: in case of a very sparse sampling of SA it may be faster to
168 // use \sigma binary searches on PSI function to determine the
169 // LF values.
170 return csa.isa[(csa[i] + csa.size() - 1) % csa.size()];
171 }
172};
173
176template <typename t_csa, bool t_direction>
178{
179public:
180 typedef typename t_csa::value_type value_type;
181 typedef typename t_csa::size_type size_type;
182 typedef typename t_csa::difference_type difference_type;
186
187private:
188 t_csa const & m_csa;
189
190public:
192 traverse_csa_saisa(t_csa const & csa) : m_csa(csa)
193 {}
194
195 // Copy constructor
196 traverse_csa_saisa(traverse_csa_saisa const & tcsa) : m_csa(tcsa.m_csa)
197 {}
198
200
205 {
206 assert(i < size());
208 }
209
212 {
213 return m_csa.size();
214 }
215
218 {
219 return m_csa, empty();
220 }
221
223
227 {
228 return const_iterator(this, 0);
229 }
230
232
236 {
237 return const_iterator(this, size());
238 }
239};
240
242template <typename t_csa>
244{
245public:
246 typedef typename t_csa::char_type value_type;
247 typedef typename t_csa::size_type size_type;
248 typedef typename t_csa::char_type char_type;
249 typedef typename t_csa::difference_type difference_type;
252 typedef typename t_csa::alphabet_category alphabet_category;
253
254private:
255 t_csa const & m_csa; //<- pointer to the (compressed) suffix array that is based on the \f$\Psi\f$ function.
256
257public:
259 bwt_of_csa_psi(t_csa const & csa) : m_csa(csa)
260 {}
261
263
268 {
269 assert(i < size());
270 size_type pos = m_csa.lf[i];
271 return first_row_symbol(pos, m_csa);
272 }
273
275
283 {
284 return m_csa.rank_bwt(i, c);
285 }
286
288
296 {
297 return m_csa.select_bwt(i, c);
298 }
299
302 {
303 return m_csa.size();
304 }
305
308 {
309 return m_csa.empty();
310 }
311
313
317 {
318 return const_iterator(this, 0);
319 }
320
322
326 {
327 return const_iterator(this, size());
328 }
329};
330
331// psi[] trait
332template <typename t_csa, bool t_direction>
334{
335 typedef typename t_csa::value_type value_type;
336 typedef typename t_csa::char_type char_type;
337 typedef typename t_csa::size_type size_type;
338 static value_type access(t_csa const & csa, size_type i)
339 {
340 char_type c = csa.F[i];
341 return csa.wavelet_tree.select(i - csa.C[csa.char2comp[c]] + 1, c);
342 }
343};
344
345// lf[] trait
346template <typename t_csa>
347struct traverse_csa_wt_traits<t_csa, false>
348{
349 typedef typename t_csa::value_type value_type;
350 typedef typename t_csa::char_type char_type;
351 typedef typename t_csa::size_type size_type;
352 static value_type access(t_csa const & csa, size_type i)
353 {
354 typename t_csa::char_type c;
355 auto rc = csa.wavelet_tree.inverse_select(i);
356 size_type j = rc.first;
357 c = rc.second;
358 return csa.C[csa.char2comp[c]] + j;
359 }
360};
361
364template <typename t_csa, bool t_direction>
365class traverse_csa_wt
366{
367public:
368 typedef typename t_csa::value_type value_type;
369 typedef typename t_csa::size_type size_type;
370 typedef typename t_csa::char_type char_type;
371 typedef typename t_csa::difference_type difference_type;
375
376private:
377 t_csa const & m_csa; //<- pointer to the (compressed) suffix array that is based on a wavelet tree
378 traverse_csa_wt(){}; // disable default constructor
379
380public:
382 traverse_csa_wt(t_csa const & csa_wt) : m_csa(csa_wt)
383 {}
384
385
390 {
391 assert(i < m_csa.size());
393 }
394
397 {
398 return m_csa.size();
399 }
400
402 {
403 return m_csa.empty();
404 }
405
407 {
408 return const_iterator(this, 0);
409 }
410
412 {
413 return const_iterator(this, size());
414 }
415};
416
417template <typename t_csa>
418class bwt_of_csa_wt
419{
420public:
421 typedef const typename t_csa::char_type value_type;
422 typedef typename t_csa::size_type size_type;
423 typedef typename t_csa::char_type char_type;
424 typedef typename t_csa::difference_type difference_type;
427 typedef typename t_csa::alphabet_category alphabet_category;
428
429private:
430 t_csa const & m_csa; //<- pointer to the (compressed) suffix array that is based on a wavelet tree
431 bwt_of_csa_wt(){}; // disable default constructor
432
433public:
435 bwt_of_csa_wt(t_csa const & csa_wt) : m_csa(csa_wt)
436 {}
437
438
443 {
444 assert(i < size());
445 return m_csa.wavelet_tree[i];
446 }
447
449 {
450 return m_csa.size();
451 }
452
454
462 {
463 return m_csa.rank_bwt(i, c);
464 }
465
467
475 {
476 return m_csa.select(i, c);
477 }
478
481 {
482 return m_csa.empty();
483 }
484
486 {
487 return const_iterator(this, 0);
488 }
489
491 {
492 return const_iterator(this, size());
493 }
494};
495
496template <typename t_csa>
497class isa_of_csa_wt
498{
499public:
500 typedef typename t_csa::value_type value_type;
501 typedef typename t_csa::size_type size_type;
502 typedef typename t_csa::difference_type difference_type;
506
507private:
508 t_csa const & m_csa; //<- pointer to the (compressed) suffix array that is based on a wavelet tree
509 isa_of_csa_wt(){}; // disable default constructor
510
511public:
513 isa_of_csa_wt(t_csa const & csa_wt) : m_csa(csa_wt)
514 {}
515
517
520 {
521 assert(i < size());
522 auto sample = m_csa.isa_sample.sample_qeq(i);
523 value_type result = std::get<0>(sample);
524 if (std::get<1>(sample) < i)
525 {
526 i = std::get<1>(sample) + m_csa.size() - i;
527 }
528 else
529 {
530 i = std::get<1>(sample) - i;
531 }
532 while (i--)
533 {
534 result = m_csa.lf[result];
535 }
536 return result;
537 }
538
541 {
542 return m_csa.size();
543 }
544
546 {
547 return m_csa.empty();
548 }
549
551 {
552 return const_iterator(this, 0);
553 }
554
556 {
557 return const_iterator(this, size());
558 }
559};
560
561template <typename t_csa>
562class isa_of_csa_psi
563{
564public:
565 typedef typename t_csa::value_type value_type;
566 typedef typename t_csa::size_type size_type;
567 typedef typename t_csa::difference_type difference_type;
571
572private:
573 t_csa const & m_csa; //<- pointer to the (compressed) suffix array that is based on a wavelet tree
574 isa_of_csa_psi(){}; // disable default constructor
575
576public:
578 isa_of_csa_psi(t_csa const & csa_wt) : m_csa(csa_wt)
579 {}
580
582
585 {
586 assert(i < size());
587 // get the rightmost sampled isa value to the left of i
588 auto sample = m_csa.isa_sample.sample_leq(i);
589 value_type result = std::get<0>(sample);
590 i = i - std::get<1>(sample);
591 while (i--)
592 {
593 result = m_csa.psi[result];
594 }
595 return result;
596 }
597
599 {
600 return m_csa.size();
601 }
602
604 {
605 return m_csa.empty();
606 }
607
609 {
610 return const_iterator(this, 0);
611 }
612
614 {
615 return const_iterator(this, size());
616 }
617};
618
619template <typename t_csa>
621{
622public:
623 typedef const typename t_csa::char_type value_type;
624 typedef typename t_csa::size_type size_type;
625 typedef typename t_csa::difference_type difference_type;
628 typedef typename t_csa::alphabet_category alphabet_category;
629
630private:
631 t_csa const & m_csa;
632
633public:
635 first_row_of_csa(t_csa const & csa) : m_csa(csa)
636 {}
637
638
643 {
644 assert(i < size());
645 return first_row_symbol(i, m_csa);
646 }
647
649 {
650 return m_csa.size();
651 }
652
654 {
655 return m_csa.empty();
656 }
657
659 {
660 return const_iterator(this, 0);
661 }
662
664 {
665 return const_iterator(this, size());
666 }
667};
668
669template <typename t_csa>
670class text_of_csa
671{
672public:
673 typedef typename t_csa::char_type value_type;
674 typedef typename t_csa::size_type size_type;
675 typedef typename t_csa::difference_type difference_type;
678 typedef typename t_csa::alphabet_category alphabet_category;
679
680private:
681 t_csa const & m_csa;
682 text_of_csa()
683 {}
684
685public:
687 text_of_csa(t_csa const & csa) : m_csa(csa)
688 {}
689
691
696 {
697 assert(i < size());
698 return first_row_symbol(m_csa.isa[i], m_csa);
699 }
700
703 {
704 return m_csa.size();
705 }
706
709 {
710 return m_csa.empty();
711 }
712
714
718 {
719 return const_iterator(this, 0);
720 }
721
723
727 {
728 return const_iterator(this, size());
729 }
730};
731} // namespace sdsl
732
733#endif
size_type size() const
Returns the size of the function.
value_type operator[](size_type i) const
Calculate the Burrows Wheeler Transform (BWT) at position i.
bwt_of_csa_psi(t_csa const &csa)
Constructor.
random_access_const_iterator< bwt_of_csa_psi > const_iterator
csa_bitcompressed::alphabet_category alphabet_category
size_type rank(size_type i, const char_type c) const
Calculates how many symbols c are in the prefix [0..i-1].
size_type empty() const
Returns if the bwt is empty.
size_type select(size_type i, const char_type c) const
Calculates the position of the i-th c.
const_iterator end() const
Returns a const_iterator to the element after the last element.
const_iterator begin() const
Returns a const_iterator to the first element.
csa_bitcompressed::difference_type difference_type
bwt_of_csa_wt(t_csa const &csa_wt)
Constructor.
size_type rank(size_type i, const char_type c) const
Calculates how many symbols c are in the prefix [0..i-1].
size_type select(size_type i, const char_type c) const
Calculates the position of the i-th c.
random_access_const_iterator< bwt_of_csa_wt > const_iterator
size_type empty() const
Returns if the BWT function is empty.
size_type size() const
Returns the size of the BWT function.
const_iterator begin() const
Returns a const_iterator to the first element.
value_type operator[](size_type i) const
Calculate the Burrows Wheeler Transform (BWT) at position i.
csa_wt::difference_type difference_type
csa_wt::alphabet_category alphabet_category
const_iterator end() const
Returns a const_iterator to the element after the last element.
A class for the Compressed Suffix Array (CSA) based on a Wavelet Tree (WT) of the Burrow Wheeler Tran...
Definition csa_wt.hpp:57
const_iterator end() const
Returns a const_iterator to the element after the last element.
random_access_const_iterator< first_row_of_csa > const_iterator
const csa_bitcompressed::char_type value_type
csa_bitcompressed::alphabet_category alphabet_category
size_type empty() const
Returns if the F column is empty.
csa_bitcompressed::difference_type difference_type
value_type operator[](size_type i) const
Calculate F[i].
first_row_of_csa(t_csa const &csa)
Constructor.
const_iterator begin() const
Returns a const_iterator to the first element.
size_type size() const
Returns the size of the F column.
const_iterator begin() const
Returns a const_iterator to the first element.
size_type empty() const
Returns if the CSA is empty.
const_iterator end() const
Returns a const_iterator to the element after the last element.
size_type size() const
Returns the size of the CSA.
isa_of_csa_psi(t_csa const &csa_wt)
Constructor.
value_type operator[](size_type i) const
Access operator to ISA.
random_access_const_iterator< isa_of_csa_psi > const_iterator
csa_sada::difference_type difference_type
size_type empty() const
Returns if the CSA is empty.
isa_of_csa_wt(t_csa const &csa_wt)
Constructor.
size_type size() const
Returns the size of the CSA.
csa_wt::difference_type difference_type
const_iterator begin() const
Returns a const_iterator to the first element.
const_iterator end() const
Returns a const_iterator to the element after the last element.
value_type operator[](size_type i) const
Access operator to ISA.
random_access_const_iterator< isa_of_csa_wt > const_iterator
Generic iterator for a random access container.
Definition iterators.hpp:24
value_type operator[](size_type i) const
Character at index of the original text.
size_type empty() const
Returns if text text has size 0.
csa_bitcompressed::difference_type difference_type
csa_bitcompressed::alphabet_category alphabet_category
size_type size() const
Returns the size of the original text.
random_access_const_iterator< text_of_csa > const_iterator
const_iterator begin() const
Returns a const_iterator to the first element.
const_iterator end() const
Returns a const_iterator to the element after the last element.
text_of_csa(t_csa const &csa)
Constructor.
random_access_const_iterator< traverse_csa_psi > const_iterator
traverse_csa_psi(t_csa const &csa_psi)
Constructor.
size_type size() const
Returns the size of the function.
const_iterator begin() const
traverse_csa_psi(traverse_csa_psi const &tcsa)
Copy constructor.
const_iterator end() const
Returns a const_iterator to the element after the last element.
size_type empty() const
Returns if the function is empty.
value_type operator[](size_type i) const
Calculate the or value at position i.
const_iterator end() const
Returns a const_iterator to the element after the last element.
const_iterator begin() const
Returns a const_iterator to the first element.
size_type empty() const
Returns if the function is empty.
size_type size() const
Returns the size of the function.
traverse_csa_saisa(t_csa const &csa)
Constructor.
traverse_csa_saisa(traverse_csa_saisa const &tcsa)
value_type operator[](size_type i) const
Calculate the value at position i.
random_access_const_iterator< traverse_csa_saisa > const_iterator
value_type operator[](size_type i) const
Calculate the value at position i.
const_iterator end() const
Returns a const_iterator to the element after the last element.
const_iterator begin() const
Returns a const_iterator to the first element.
size_type size() const
Returns the size of the function.
traverse_csa_wt(t_csa const &csa_wt)
Constructor.
random_access_const_iterator< traverse_csa_wt > const_iterator
size_type empty() const
Returns if the function is empty.
iterators.hpp contains an generic iterator for random access containers.
Namespace for the succinct data structure library.
t_csa::char_type first_row_symbol(const typename t_csa::size_type i, t_csa const &csa)
Get the symbol at position i in the first row of the sorted suffixes of CSA.
Contains declarations and definitions of data structure concepts.
static value_type access(t_csa const &csa, size_type i)
static value_type access(t_csa const &csa, size_type i)
static value_type access(t_csa const &csa, size_type i)
static value_type access(t_csa const &csa, size_type i)
static value_type access(t_csa const &csa, size_type i)
static value_type access(t_csa const &csa, size_type i)