SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
wt_pc.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
9#ifndef INCLUDED_SDSL_WT_PC
10#define INCLUDED_SDSL_WT_PC
11
12#include <algorithm>
13#include <array>
14#include <assert.h>
15#include <cstdint>
16#include <functional>
17#include <iosfwd>
18#include <iterator>
19#include <stddef.h>
20#include <string>
21#include <tuple>
22#include <type_traits>
23#include <utility>
24#include <vector>
25
26#include <sdsl/bits.hpp>
27#include <sdsl/cereal.hpp>
28#include <sdsl/int_vector.hpp>
29#include <sdsl/io.hpp>
30#include <sdsl/iterators.hpp>
34#include <sdsl/util.hpp>
35#include <sdsl/wt_helper.hpp>
36
38namespace sdsl
39{
40
42
53template <class t_shape,
54 class t_bitvector = bit_vector,
55 class t_rank = typename t_bitvector::rank_1_type,
56 class t_select = typename t_bitvector::select_1_type,
57 class t_select_zero = typename t_bitvector::select_0_type,
58 class t_tree_strat = byte_tree<>>
59class wt_pc
60{
61public:
62 typedef typename t_tree_strat::template type<wt_pc> tree_strat_type;
64 typedef typename tree_strat_type::value_type value_type;
65 typedef typename t_bitvector::difference_type difference_type;
68 typedef t_bitvector bit_vector_type;
69 typedef t_rank rank_1_type;
70 typedef t_select select_1_type;
71 typedef t_select_zero select_0_type;
73 typedef typename tree_strat_type::alphabet_category alphabet_category;
74 typedef typename t_shape::template type<wt_pc> shape_type;
75 enum
76 {
77 lex_ordered = shape_type::lex_ordered
78 };
79 using node_type = typename tree_strat_type::node_type;
80
81private:
82#ifdef WT_PC_CACHE
83 mutable value_type m_last_access_answer;
84 mutable size_type m_last_access_i;
85 mutable size_type m_last_access_rl;
86#endif
87
88 size_type m_size = 0; // original text size
89 size_type m_sigma = 0; // alphabet size
90 bit_vector_type m_bv; // bit vector to store the wavelet tree
91 rank_1_type m_bv_rank; // rank support for the wavelet tree bit vector
92 select_1_type m_bv_select1; // select support for the wavelet tree bit vector
93 select_0_type m_bv_select0;
94 tree_strat_type m_tree;
95
96 // insert a character into the wavelet tree, see construct method
97 void insert_char(value_type old_chr, std::vector<uint64_t> & bv_node_pos, size_type times, bit_vector & bv)
98 {
99 uint64_t p = m_tree.bit_path(old_chr);
100 uint32_t path_len = p >> 56;
101 node_type v = m_tree.root();
102 for (uint32_t l = 0; l < path_len; ++l, p >>= 1)
103 {
104 if (p & 1)
105 {
106 bv.set_int(bv_node_pos[v], 0xFFFFFFFFFFFFFFFFULL, times);
107 }
108 bv_node_pos[v] += times;
109 v = m_tree.child(v, p & 1);
110 }
111 }
112
113 // calculates the tree shape returns the size of the WT bit vector
114 size_type construct_tree_shape(std::vector<size_type> const & C)
115 {
116 // vector for node of the tree
117 std::vector<pc_node> temp_nodes; //(2*m_sigma-1);
118 shape_type::construct_tree(C, temp_nodes);
119 // Convert code tree into BFS order in memory and
120 // calculate bv_pos values
121 size_type bv_size = 0;
122 m_tree = tree_strat_type(temp_nodes, bv_size, this);
123 return bv_size;
124 }
125
126 void construct_init_rank_select()
127 {
128 util::init_support(m_bv_rank, &m_bv);
129 util::init_support(m_bv_select0, &m_bv);
130 util::init_support(m_bv_select1, &m_bv);
131 }
132
133 // recursive internal version of the method interval_symbols
134 void _interval_symbols(size_type i,
135 size_type j,
136 size_type & k,
137 std::vector<value_type> & cs,
138 std::vector<size_type> & rank_c_i,
139 std::vector<size_type> & rank_c_j,
140 node_type v) const
141 {
142 // invariant: j>i
143 size_type i_new = (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
144 size_type j_new = (m_bv_rank(m_tree.bv_pos(v) + j) - m_tree.bv_pos_rank(v));
145 // goto left child
146 i -= i_new;
147 j -= j_new;
148 if (i != j)
149 {
150 node_type v_new = m_tree.child(v, 0);
151 if (!m_tree.is_leaf(v_new))
152 {
153 _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, v_new);
154 }
155 else
156 {
157 rank_c_i[k] = i;
158 rank_c_j[k] = j;
159 cs[k++] = m_tree.bv_pos_rank(v_new);
160 }
161 }
162 // goto right child
163 if (i_new != j_new)
164 {
165 node_type v_new = m_tree.child(v, 1);
166 if (!m_tree.is_leaf(v_new))
167 {
168 _interval_symbols(i_new, j_new, k, cs, rank_c_i, rank_c_j, v_new);
169 }
170 else
171 {
172 rank_c_i[k] = i_new;
173 rank_c_j[k] = j_new;
174 cs[k++] = m_tree.bv_pos_rank(v_new);
175 }
176 }
177 }
178
179public:
180 size_type const & sigma = m_sigma;
181 bit_vector_type const & bv = m_bv;
182
183 // Default constructor
184 wt_pc(){};
185
187
193 template <typename t_it>
194 wt_pc(t_it begin, t_it end) : m_size(std::distance(begin, end))
195 {
196 if (0 == m_size)
197 return;
198 // O(n + |\Sigma|\log|\Sigma|) algorithm for calculating node sizes
199 // TODO: C should also depend on the tree_strategy. C is just a mapping
200 // from a symbol to its frequency. So a map<uint64_t,uint64_t> could be
201 // used for integer alphabets...
202 std::vector<size_type> C;
203 // 1. Count occurrences of characters
205 // 2. Calculate effective alphabet size
207 // 3. Generate tree shape
208 size_type tree_size = construct_tree_shape(C);
209 // 4. Generate wavelet tree bit sequence m_bv
210 bit_vector temp_bv(tree_size, 0);
211
212 // Initializing starting position of wavelet tree nodes
213 std::vector<uint64_t> bv_node_pos(m_tree.size(), 0);
214 for (size_type v = 0; v < m_tree.size(); ++v)
215 {
216 bv_node_pos[v] = m_tree.bv_pos(v);
217 }
218 value_type old_chr = *begin;
219 uint32_t times = 0;
220 for (auto it = begin; it != end; ++it)
221 {
222 value_type chr = *it;
223 if (chr != old_chr)
224 {
225 insert_char(old_chr, bv_node_pos, times, temp_bv);
226 times = 1;
227 old_chr = chr;
228 }
229 else
230 { // chr == old_chr
231 ++times;
232 if (times == 64)
233 {
234 insert_char(old_chr, bv_node_pos, times, temp_bv);
235 times = 0;
236 }
237 }
238 }
239 if (times > 0)
240 {
241 insert_char(old_chr, bv_node_pos, times, temp_bv);
242 }
243 m_bv = bit_vector_type(std::move(temp_bv));
244 // 5. Initialize rank and select data structures for m_bv
245 construct_init_rank_select();
246 // 6. Finish inner nodes by precalculating the bv_pos_rank values
247 m_tree.init_node_ranks(m_bv_rank);
248 }
249
250 template <typename t_it>
251 wt_pc(t_it begin, t_it end, std::string) : wt_pc(begin, end)
252 {}
253
255 wt_pc(wt_pc const & wt) :
256 m_size(wt.m_size),
257 m_sigma(wt.m_sigma),
258 m_bv(wt.m_bv),
259 m_bv_rank(wt.m_bv_rank),
260 m_bv_select1(wt.m_bv_select1),
261 m_bv_select0(wt.m_bv_select0),
262 m_tree(wt.m_tree)
263 {
264 m_bv_rank.set_vector(&m_bv);
265 m_bv_select1.set_vector(&m_bv);
266 m_bv_select0.set_vector(&m_bv);
267 }
268
269 wt_pc(wt_pc && wt) :
270 m_size(wt.m_size),
271 m_sigma(wt.m_sigma),
272 m_bv(std::move(wt.m_bv)),
273 m_bv_rank(std::move(wt.m_bv_rank)),
274 m_bv_select1(std::move(wt.m_bv_select1)),
275 m_bv_select0(std::move(wt.m_bv_select0)),
276 m_tree(std::move(wt.m_tree))
277 {
278 m_bv_rank.set_vector(&m_bv);
279 m_bv_select1.set_vector(&m_bv);
280 m_bv_select0.set_vector(&m_bv);
281 }
282
284 wt_pc & operator=(wt_pc const & wt)
285 {
286 if (this != &wt)
287 {
288 wt_pc tmp(wt); // re-use copy-constructor
289 *this = std::move(tmp); // re-use move-assignment
290 }
291 return *this;
292 }
293
296 {
297 if (this != &wt)
298 {
299 m_size = wt.m_size;
300 m_sigma = wt.m_sigma;
301 m_bv = std::move(wt.m_bv);
302 m_bv_rank = std::move(wt.m_bv_rank);
303 m_bv_rank.set_vector(&m_bv);
304 m_bv_select1 = std::move(wt.m_bv_select1);
305 m_bv_select1.set_vector(&m_bv);
306 m_bv_select0 = std::move(wt.m_bv_select0);
307 m_bv_select0.set_vector(&m_bv);
308 m_tree = std::move(wt.m_tree);
309 }
310 return *this;
311 }
312
315 {
316 return m_size;
317 }
318
320 bool empty() const
321 {
322 return m_size == 0;
323 }
324
326
337 {
338 assert(i < size());
339 // which stores how many of the next symbols are equal
340 // with the current char
341 node_type v = m_tree.root(); // start at root node
342 while (!m_tree.is_leaf(v))
343 { // while not a leaf
344 if (m_bv[m_tree.bv_pos(v) + i])
345 { // goto right child
346 i = m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v);
347 v = m_tree.child(v, 1);
348 }
349 else
350 { // goto the left child
351 i -= (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
352 v = m_tree.child(v, 0);
353 }
354 }
355 // if v is a leaf bv_pos_rank returns symbol itself
356 return m_tree.bv_pos_rank(v);
357 };
358
360
372 {
373 assert(i <= size());
374 if (!m_tree.is_valid(m_tree.c_to_leaf(c)))
375 {
376 return 0; // if `c` was not in the text
377 }
378 if (m_sigma == 1)
379 {
380 return i; // if m_sigma == 1 answer is trivial
381 }
382 uint64_t p = m_tree.bit_path(c);
383 uint32_t path_len = (p >> 56);
384 size_type result = i;
385 node_type v = m_tree.root();
386 for (uint32_t l = 0; l < path_len and result; ++l, p >>= 1)
387 {
388 if (p & 1)
389 {
390 result = (m_bv_rank(m_tree.bv_pos(v) + result) - m_tree.bv_pos_rank(v));
391 }
392 else
393 {
394 result -= (m_bv_rank(m_tree.bv_pos(v) + result) - m_tree.bv_pos_rank(v));
395 }
396 v = m_tree.child(v, p & 1); // goto child
397 }
398 return result;
399 };
400
402
411 std::pair<size_type, value_type> inverse_select(size_type i) const
412 {
413 assert(i < size());
414 node_type v = m_tree.root();
415 while (!m_tree.is_leaf(v))
416 { // while not a leaf
417 if (m_bv[m_tree.bv_pos(v) + i])
418 { // goto right child
419 i = (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
420 v = m_tree.child(v, 1);
421 }
422 else
423 { // goto left child
424 i -= (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
425 v = m_tree.child(v, 0);
426 }
427 }
428 // if v is a leaf bv_pos_rank returns symbol itself
429 return std::make_pair(i, (value_type)m_tree.bv_pos_rank(v));
430 }
431
433
444 {
445 assert(1 <= i and i <= rank(size(), c));
446 node_type v = m_tree.c_to_leaf(c);
447 if (!m_tree.is_valid(v))
448 { // if c was not in the text
449 return m_size; // -> return a position right to the end
450 }
451 if (m_sigma == 1)
452 {
453 return std::min(i - 1, m_size);
454 }
455 size_type result = i - 1; // otherwise
456 uint64_t p = m_tree.bit_path(c);
457 uint32_t path_len = (p >> 56);
458 // path_len > 0, since we have handled m_sigma = 1.
459 p <<= (64 - path_len);
460 for (uint32_t l = 0; l < path_len; ++l, p <<= 1)
461 {
462 if ((p & 0x8000000000000000ULL) == 0)
463 { // node was a left child
464 v = m_tree.parent(v);
465 result = m_bv_select0(m_tree.bv_pos(v) - m_tree.bv_pos_rank(v) + result + 1) - m_tree.bv_pos(v);
466 }
467 else
468 { // node was a right child
469 v = m_tree.parent(v);
470 result = m_bv_select1(m_tree.bv_pos_rank(v) + result + 1) - m_tree.bv_pos(v);
471 }
472 }
473 return result;
474 };
475
477
499 size_type j,
500 size_type & k,
501 std::vector<value_type> & cs,
502 std::vector<size_type> & rank_c_i,
503 std::vector<size_type> & rank_c_j) const
504 {
505 assert(i <= j and j <= size());
506 if (i == j)
507 {
508 k = 0;
509 }
510 else if (1 == m_sigma)
511 {
512 k = 1;
513 cs[0] = m_tree.bv_pos_rank(m_tree.root());
514 rank_c_i[0] = std::min(i, m_size);
515 rank_c_j[0] = std::min(j, m_size);
516 }
517 else if ((j - i) == 1)
518 {
519 k = 1;
520 auto rc = inverse_select(i);
521 rank_c_i[0] = rc.first;
522 cs[0] = rc.second;
523 rank_c_j[0] = rank_c_i[0] + 1;
524 }
525 else if ((j - i) == 2)
526 {
527 auto rc = inverse_select(i);
528 rank_c_i[0] = rc.first;
529 cs[0] = rc.second;
530 rc = inverse_select(i + 1);
531 rank_c_i[1] = rc.first;
532 cs[1] = rc.second;
533
534 if (cs[0] == cs[1])
535 {
536 k = 1;
537 rank_c_j[0] = rank_c_i[0] + 2;
538 }
539 else
540 {
541 k = 2;
542 if (lex_ordered and cs[0] > cs[1])
543 {
544 std::swap(cs[0], cs[1]);
545 std::swap(rank_c_i[0], rank_c_i[1]);
546 }
547 rank_c_j[0] = rank_c_i[0] + 1;
548 rank_c_j[1] = rank_c_i[1] + 1;
549 }
550 }
551 else
552 {
553 k = 0;
554 _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, 0);
555 }
556 }
557
559
573 template <class t_ret_type = std::tuple<size_type, size_type, size_type>>
574 typename std::enable_if<shape_type::lex_ordered, t_ret_type>::type
576 {
577 assert(i <= j and j <= size());
578 if (1 == m_sigma)
579 {
580 value_type _c = m_tree.bv_pos_rank(m_tree.root());
581 if (c == _c)
582 { // c is the only symbol in the wt
583 return t_ret_type{i, 0, 0};
584 }
585 else if (c < _c)
586 {
587 return t_ret_type{0, 0, j - i};
588 }
589 else
590 {
591 return t_ret_type{0, j - i, 0};
592 }
593 }
594 if (i == j)
595 {
596 return t_ret_type{rank(i, c), 0, 0};
597 }
598 uint64_t p = m_tree.bit_path(c);
599 uint32_t path_len = p >> 56;
600 if (path_len == 0)
601 { // path_len=0: => c is not present
602 value_type _c = (value_type)p;
603 if (c == _c)
604 { // c is smaller than any symbol in wt
605 return t_ret_type{0, 0, j - i};
606 }
607 auto res = lex_count(i, j, _c);
608 return t_ret_type{0, j - i - std::get<2>(res), std::get<2>(res)};
609 }
610 size_type smaller = 0, greater = 0;
611 node_type v = m_tree.root();
612 for (uint32_t l = 0; l < path_len; ++l, p >>= 1)
613 {
614 size_type r1_1 = (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
615 size_type r1_2 = (m_bv_rank(m_tree.bv_pos(v) + j) - m_tree.bv_pos_rank(v));
616
617 if (p & 1)
618 {
619 smaller += j - r1_2 - i + r1_1;
620 i = r1_1;
621 j = r1_2;
622 }
623 else
624 {
625 greater += r1_2 - r1_1;
626 i -= r1_1;
627 j -= r1_2;
628 }
629 v = m_tree.child(v, p & 1);
630 }
631 return t_ret_type{i, smaller, greater};
632 }
633
635
646 template <class t_ret_type = std::tuple<size_type, size_type>>
647 typename std::enable_if<shape_type::lex_ordered, t_ret_type>::type lex_smaller_count(size_type i,
648 value_type c) const
649 {
650 assert(i <= size());
651 if (1 == m_sigma)
652 {
653 value_type _c = m_tree.bv_pos_rank(m_tree.root());
654 if (c == _c)
655 { // c is the only symbol in the wt
656 return t_ret_type{i, 0};
657 }
658 else if (c < _c)
659 {
660 return t_ret_type{0, 0};
661 }
662 else
663 {
664 return t_ret_type{0, i};
665 }
666 }
667
668 uint64_t p = m_tree.bit_path(c);
669 uint32_t path_len = p >> 56;
670 if (path_len == 0)
671 { // path_len=0: => c is not present
672 value_type _c = (value_type)p;
673 if (c == _c)
674 { // c is smaller than any symbol in wt
675 return t_ret_type{0, 0};
676 }
677 auto res = lex_smaller_count(i, _c);
678 return t_ret_type{0, std::get<0>(res) + std::get<1>(res)};
679 }
680 size_type result = 0;
681 size_type all = i; // possible occurrences of c
682 node_type v = m_tree.root();
683 for (uint32_t l = 0; l < path_len and all; ++l, p >>= 1)
684 {
685 size_type ones = (m_bv_rank(m_tree.bv_pos(v) + all) - m_tree.bv_pos_rank(v));
686 if (p & 1)
687 {
688 result += all - ones;
689 all = ones;
690 }
691 else
692 {
693 all -= ones;
694 }
695 v = m_tree.child(v, p & 1);
696 }
697 return t_ret_type{all, result};
698 }
699
702 {
703 return const_iterator(this, 0);
704 }
705
708 {
709 return const_iterator(this, size());
710 }
711
713 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
714 {
715 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
716 size_type written_bytes = 0;
717 written_bytes += write_member(m_size, out, child, "size");
718 written_bytes += write_member(m_sigma, out, child, "sigma");
719 written_bytes += m_bv.serialize(out, child, "bv");
720 written_bytes += m_bv_rank.serialize(out, child, "bv_rank");
721 written_bytes += m_bv_select1.serialize(out, child, "bv_select_1");
722 written_bytes += m_bv_select0.serialize(out, child, "bv_select_0");
723 written_bytes += m_tree.serialize(out, child, "tree");
724 structure_tree::add_size(child, written_bytes);
725 return written_bytes;
726 }
727
729 void load(std::istream & in)
730 {
731 read_member(m_size, in);
732 read_member(m_sigma, in);
733 m_bv.load(in);
734 m_bv_rank.load(in, &m_bv);
735 m_bv_select1.load(in, &m_bv);
736 m_bv_select0.load(in, &m_bv);
737 m_tree.load(in);
738 }
739
741 bool operator==(wt_pc const & other) const noexcept
742 {
743 return (m_size == other.m_size) && (m_sigma == other.m_sigma) && (m_bv == other.m_bv)
744 && (m_bv_rank == other.m_bv_rank) && (m_bv_select1 == other.m_bv_select1)
745 && (m_bv_select0 == other.m_bv_select0) && (m_tree == other.m_tree);
746 }
747
749 bool operator!=(wt_pc const & other) const noexcept
750 {
751 return !(*this == other);
752 }
753
754 template <typename archive_t>
755 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
756 {
757 ar(CEREAL_NVP(m_size));
758 ar(CEREAL_NVP(m_sigma));
759 ar(CEREAL_NVP(m_bv));
760 ar(CEREAL_NVP(m_bv_rank));
761 ar(CEREAL_NVP(m_bv_select1));
762 ar(CEREAL_NVP(m_bv_select0));
763 ar(CEREAL_NVP(m_tree));
764 }
765
766 template <typename archive_t>
767 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
768 {
769 ar(CEREAL_NVP(m_size));
770 ar(CEREAL_NVP(m_sigma));
771 ar(CEREAL_NVP(m_bv));
772 ar(CEREAL_NVP(m_bv_rank));
773 m_bv_rank.set_vector(&m_bv);
774 ar(CEREAL_NVP(m_bv_select1));
775 m_bv_select1.set_vector(&m_bv);
776 ar(CEREAL_NVP(m_bv_select0));
777 m_bv_select0.set_vector(&m_bv);
778 ar(CEREAL_NVP(m_tree));
779 }
780
783 {
785 }
786
788 auto seq(node_type const & v) const -> random_access_container<std::function<value_type(size_type)>>
789 {
790 return random_access_container<std::function<value_type(size_type)>>(
791 [&v, this](size_type i)
792 {
793 node_type vv = v;
794 while (!is_leaf(vv))
795 {
796 auto vs = expand(vv);
797 auto rs = expand(vv, range_type{{0, i}});
798 bool bit = *(begin(vv) + i);
799 i = std::get<1>(rs[bit]);
800 vv = vs[bit];
801 }
802 return sym(vv);
803 },
804 size(v));
805 }
806
808 bool is_leaf(node_type const & v) const
809 {
810 return m_tree.is_leaf(v);
811 }
812
814 value_type sym(node_type const & v) const
815 {
816 return m_tree.bv_pos_rank(v);
817 }
818
820 bool empty(node_type const & v) const
821 {
822 return size(v) == 0;
823 }
824
826 auto size(node_type const & v) const -> decltype(m_tree.size(v))
827 {
828 if (is_leaf(v))
829 {
830 if (v == root())
831 return size();
832 else
833 {
834 auto parent = m_tree.parent(v);
835 auto rs = expand(parent, range_type{{0, size(parent) - 1}});
836 if (m_tree.child(parent, 0) == v)
837 return std::get<1>(std::get<0>(rs)) - std::get<0>((std::get<0>(rs))) + 1;
838 else
839 return std::get<1>(std::get<1>(rs)) - std::get<0>((std::get<1>(rs))) + 1;
840 }
841 }
842 else
843 {
844 return m_tree.size(v);
845 }
846 }
847
850 {
851 return m_tree.root();
852 }
853
855
859 std::array<node_type, 2> expand(node_type const & v) const
860 {
861 return {{m_tree.child(v, 0), m_tree.child(v, 1)}};
862 }
863
865
874 std::array<range_vec_type, 2> expand(node_type const & v, range_vec_type const & ranges) const
875 {
876 auto ranges_copy = ranges;
877 return expand(v, std::move(ranges_copy));
878 }
879
881
890 std::array<range_vec_type, 2> expand(node_type const & v, range_vec_type && ranges) const
891 {
892 auto v_sp_rank = m_tree.bv_pos_rank(v);
893 range_vec_type res(ranges.size());
894 size_t i = 0;
895 for (auto & r : ranges)
896 {
897 auto sp_rank = m_bv_rank(m_tree.bv_pos(v) + r[0]);
898 auto right_size = m_bv_rank(m_tree.bv_pos(v) + r[1] + 1) - sp_rank;
899 auto left_size = (r[1] - r[0] + 1) - right_size;
900
901 auto right_sp = sp_rank - v_sp_rank;
902 auto left_sp = r[0] - right_sp;
903
904 r = {{left_sp, left_sp + left_size - 1}};
905 res[i++] = {{right_sp, right_sp + right_size - 1}};
906 }
907 return {{ranges, std::move(res)}};
908 }
909
911
920 std::array<range_type, 2> expand(node_type const & v, range_type const & r) const
921 {
922 auto v_sp_rank = m_tree.bv_pos_rank(v);
923 auto sp_rank = m_bv_rank(m_tree.bv_pos(v) + r[0]);
924 auto right_size = m_bv_rank(m_tree.bv_pos(v) + r[1] + 1) - sp_rank;
925 auto left_size = (r[1] - r[0] + 1) - right_size;
926
927 auto right_sp = sp_rank - v_sp_rank;
928 auto left_sp = r[0] - right_sp;
929
930 return {{{{left_sp, left_sp + left_size - 1}}, {{right_sp, right_sp + right_size - 1}}}};
931 }
932
934 std::pair<uint64_t, uint64_t> path(value_type c) const
935 {
936 uint64_t path = m_tree.bit_path(c);
937 uint64_t path_len = path >> 56;
938 // reverse the path till we fix the ordering
940 path = path >> (64 - path_len); // remove the length
941 return {path_len, path};
942 }
943
945
950 std::pair<bool, value_type> symbol_gte(value_type c) const
951 {
952 return m_tree.symbol_gte(c);
953 }
954
956
961 std::pair<bool, value_type> symbol_lte(value_type c) const
962 {
963 return m_tree.symbol_lte(c);
964 }
965
966private:
968 auto begin(node_type const & v) const -> decltype(m_bv.begin() + m_tree.bv_pos(v))
969 {
970 return m_bv.begin() + m_tree.bv_pos(v);
971 }
972
974 auto end(node_type const & v) const -> decltype(m_bv.begin() + m_tree.bv_pos(v) + m_tree.size(v))
975 {
976 return m_bv.begin() + m_tree.bv_pos(v) + m_tree.size(v);
977 }
978};
979} // namespace sdsl
980
981#endif
bits.hpp contains the sdsl::bits class.
cereal.hpp offers cereal support
#define CEREAL_NVP(X)
Definition cereal.hpp:31
A generic vector class for integers of width .
Generic iterator for a random access container.
Definition iterators.hpp:24
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
A prefix code-shaped wavelet.
Definition wt_pc.hpp:60
value_type sym(node_type const &v) const
Symbol for a leaf.
Definition wt_pc.hpp:814
void interval_symbols(size_type i, size_type j, size_type &k, std::vector< value_type > &cs, std::vector< size_type > &rank_c_i, std::vector< size_type > &rank_c_j) const
For each symbol c in wt[i..j-1] get rank(i,c) and rank(j,c).
Definition wt_pc.hpp:498
wt_pc & operator=(wt_pc &&wt)
Move assignment operator.
Definition wt_pc.hpp:295
std::enable_if< shape_type::lex_ordered, t_ret_type >::type lex_smaller_count(size_type i, value_type c) const
How many symbols are lexicographic smaller than c in [0..i-1].
Definition wt_pc.hpp:647
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1].
Definition wt_pc.hpp:371
auto bit_vec(node_type const &v) const -> node_bv_container< t_bitvector >
Random access container to bitvector of node v.
Definition wt_pc.hpp:782
t_bitvector::difference_type difference_type
Definition wt_pc.hpp:65
std::pair< bool, value_type > symbol_gte(value_type c) const
Returns for a symbol c the next larger or equal symbol in the WT.
Definition wt_pc.hpp:950
void load(std::istream &in)
Loads the data structure from the given istream.
Definition wt_pc.hpp:729
std::array< range_type, 2 > expand(node_type const &v, range_type const &r) const
Returns for a range its left and right child ranges.
Definition wt_pc.hpp:920
int_vector ::size_type size_type
Definition wt_pc.hpp:63
wt_pc & operator=(wt_pc const &wt)
Assignment operator.
Definition wt_pc.hpp:284
auto seq(node_type const &v) const -> random_access_container< std::function< value_type(size_type)> >
Random access container to sequence of node v.
Definition wt_pc.hpp:788
bit_vector_type const & bv
Definition wt_pc.hpp:181
value_type operator[](size_type i) const
Recovers the i-th symbol of the original vector.
Definition wt_pc.hpp:336
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition wt_pc.hpp:767
tree_strat_type::alphabet_category alphabet_category
Definition wt_pc.hpp:73
t_bitvector bit_vector_type
Definition wt_pc.hpp:68
const_iterator iterator
Definition wt_pc.hpp:67
wt_pc(t_it begin, t_it end, std::string)
Definition wt_pc.hpp:251
wt_tag index_category
Definition wt_pc.hpp:72
std::array< range_vec_type, 2 > expand(node_type const &v, range_vec_type &&ranges) const
Returns for each range its left and right child ranges.
Definition wt_pc.hpp:890
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition wt_pc.hpp:755
const_iterator begin() const
Returns a const_iterator to the first element.
Definition wt_pc.hpp:701
size_type size() const
Returns the size of the original vector.
Definition wt_pc.hpp:314
size_type const & sigma
Definition wt_pc.hpp:180
typename tree_strat_type::node_type node_type
Definition wt_pc.hpp:79
node_type root() const
Returns the root node.
Definition wt_pc.hpp:849
bool operator==(wt_pc const &other) const noexcept
Equality operator.
Definition wt_pc.hpp:741
auto size(node_type const &v) const -> decltype(m_tree.size(v))
Return the size of node v.
Definition wt_pc.hpp:826
std::array< range_vec_type, 2 > expand(node_type const &v, range_vec_type const &ranges) const
Returns for each range its left and right child ranges.
Definition wt_pc.hpp:874
const_iterator end() const
Returns a const_iterator to the element after the last element.
Definition wt_pc.hpp:707
std::pair< bool, value_type > symbol_lte(value_type c) const
Returns for a symbol c the previous smaller or equal symbol in the WT.
Definition wt_pc.hpp:961
wt_pc(t_it begin, t_it end)
Construct the wavelet tree from a sequence defined by two interators.
Definition wt_pc.hpp:194
t_select_zero select_0_type
Definition wt_pc.hpp:71
t_tree_strat::template type< wt_pc > tree_strat_type
Definition wt_pc.hpp:62
t_select select_1_type
Definition wt_pc.hpp:70
wt_pc(wt_pc &&wt)
Definition wt_pc.hpp:269
std::pair< size_type, value_type > inverse_select(size_type i) const
Calculates how many times symbol wt[i] occurs in the prefix [0..i-1].
Definition wt_pc.hpp:411
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition wt_pc.hpp:713
tree_strat_type::value_type value_type
Definition wt_pc.hpp:64
std::enable_if< shape_type::lex_ordered, t_ret_type >::type lex_count(size_type i, size_type j, value_type c) const
How many symbols are lexicographic smaller/greater than c in [i..j-1].
Definition wt_pc.hpp:575
bool empty() const
Returns whether the wavelet tree contains no data.
Definition wt_pc.hpp:320
t_shape::template type< wt_pc > shape_type
Definition wt_pc.hpp:74
bool operator!=(wt_pc const &other) const noexcept
Inequality operator.
Definition wt_pc.hpp:749
@ lex_ordered
Definition wt_pc.hpp:77
bool is_leaf(node_type const &v) const
Checks if the node is a leaf node.
Definition wt_pc.hpp:808
size_type select(size_type i, value_type c) const
Calculates the ith occurrence of the symbol c in the supported vector.
Definition wt_pc.hpp:443
t_rank rank_1_type
Definition wt_pc.hpp:69
std::array< node_type, 2 > expand(node_type const &v) const
Returns the two child nodes of an inner node.
Definition wt_pc.hpp:859
random_access_const_iterator< wt_pc > const_iterator
Definition wt_pc.hpp:66
wt_pc(wt_pc const &wt)
Copy constructor.
Definition wt_pc.hpp:255
bool empty(node_type const &v) const
Indicates if node v is empty.
Definition wt_pc.hpp:820
std::pair< uint64_t, uint64_t > path(value_type c) const
return the path to the leaf for a given symbol
Definition wt_pc.hpp:934
int_vector.hpp contains the sdsl::int_vector class.
io.hpp contains some methods for reading/writing sdsl structures.
iterators.hpp contains an generic iterator for random access containers.
Namespace for the succinct data structure library.
void calculate_effective_alphabet_size(t_rac const &C, sigma_type &sigma)
Definition wt_helper.hpp:62
void calculate_character_occurences(t_it begin, t_it end, t_rac &C)
Count for each character the number of occurrences in rac[0..size-1].
Definition wt_helper.hpp:47
size_t write_member(T const &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition io.hpp:94
void read_member(T &t, std::istream &in)
Definition io.hpp:119
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
std::array< int_vector<>::size_type, 2 > range_type
Definition wt_helper.hpp:27
std::vector< range_type > range_vec_type
Definition wt_helper.hpp:28
rank_support_v.hpp contains rank_support_v.
Contains declarations and definitions of data structure concepts.
static constexpr uint64_t rev(uint64_t x)
reverses a given 64 bit word
Definition bits.hpp:949
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.