SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
suffix_tree_helper.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
4#ifndef INCLUDED_SDSL_SUFFIX_TREE_HELPER
5#define INCLUDED_SDSL_SUFFIX_TREE_HELPER
6
7#include <cassert>
8#include <iterator>
9#include <stack>
10#include <stdint.h>
11#include <type_traits>
12
13#include <sdsl/int_vector.hpp>
17#include <sdsl/util.hpp>
18
19namespace sdsl
20{
21
22template <class t_cst>
24{
25public:
26 using iterator_category = std::forward_iterator_tag;
27 using value_type = typename t_cst::node_type;
28 using difference_type = std::ptrdiff_t;
31
32 using node_type = typename t_cst::node_type;
35
36private:
37 t_cst const * m_cst;
38 node_type m_cur_node;
39
40public:
41 cst_node_child_proxy_iterator() : m_cst(nullptr){};
42 cst_node_child_proxy_iterator(t_cst const * cst, node_type v) : m_cst(cst), m_cur_node(v)
43 {}
44 cst_node_child_proxy_iterator(iterator_type const & it) : m_cst(it.m_cst), m_cur_node(it.m_cur_node)
45 {}
46
47public:
49 {
50 return m_cur_node;
51 }
53 {
54 m_cur_node = m_cst->sibling(m_cur_node);
55 return *this;
56 }
58 {
59 iterator_type it = *this;
60 ++(*this);
61 return it;
62 }
63 bool operator==(iterator_type const & it) const
64 {
65 return it.m_cur_node == m_cur_node;
66 }
67 bool operator!=(iterator_type const & it) const
68 {
69 return !(*this == it);
70 }
71};
72
73template <class t_cst>
75{
76public: // types
78 using node_type = typename t_cst::node_type;
79 using size_type = typename t_cst::size_type;
80
81private: // data
82 node_type m_parent;
83 t_cst const * m_cst;
84
85public: // constructors
87 explicit cst_node_child_proxy(t_cst const * cst, node_type v) : m_parent(v), m_cst(cst){};
88 cst_node_child_proxy(cst_node_child_proxy const & p) : m_parent(p.m_parent), m_cst(p.m_cst){};
89
90public: // methods
92 {
93 return m_cst->select_child(m_parent, i + 1);
94 } // enumeration starts with 1 not 0
96 {
97 return m_cst->degree(m_parent);
98 }
100 {
101 return iterator_type(m_cst, m_cst->select_child(m_parent, 1));
102 }
104 {
105 return iterator_type(m_cst, m_cst->root());
106 }
107};
108
110
119template <class t_rac>
120void construct_supercartesian_tree_bp(t_rac const & vec, bit_vector & bp, bool const minimum = true)
121{
122 typedef typename t_rac::size_type size_type;
123 bp.resize(2 * vec.size()); // resize bit vector for balanaced parantheses to 2 n bits
124 util::set_to_value(bp, 0);
125 std::stack<typename t_rac::value_type> vec_stack;
126
127 size_type k = 0;
128 for (size_type i = 0; i < vec.size(); ++i)
129 {
130 typename t_rac::value_type l = vec[i];
131 if (minimum)
132 {
133 while (vec_stack.size() > 0 and l < vec_stack.top())
134 {
135 vec_stack.pop();
136 ++k;
137 /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis
138 }
139 }
140 else
141 {
142 while (vec_stack.size() > 0 and l > vec_stack.top())
143 {
144 vec_stack.pop();
145 ++k;
146 /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis
147 }
148 }
149 vec_stack.push(l);
150 bp[k++] = 1; // writing an opening parenthesis
151 }
152 while (vec_stack.size() > 0)
153 {
154 vec_stack.pop();
155 bp[k++] = 0; // writing a closing parenthesis
156 }
157 assert(k == 2 * vec.size());
158}
159
161
170template <class t_rac>
171bit_vector construct_supercartesian_tree_bp_succinct(t_rac const & vec, bool const minimum = true)
172{
173 typedef typename t_rac::size_type size_type;
174 bit_vector bp(2 * vec.size(), 0); // initialize result
175 if (vec.size() > 0)
176 {
177 sorted_stack_support vec_stack(vec.size());
178
179 size_type k = 0;
180 if (minimum)
181 {
182 bp[k++] = 1;
183 for (size_type i = 1; i < vec.size(); ++i)
184 {
185 if (vec[i] < vec[i - 1])
186 {
187 ++k;
188 while (vec_stack.size() > 0 and vec[i] < vec[vec_stack.top()])
189 {
190 vec_stack.pop();
191 ++k; // writing a closing parenthesis, bp is already initialized to zero
192 }
193 }
194 else
195 {
196 vec_stack.push(i - 1); // "lazy stack" trick: speed-up approx. 25%
197 }
198 bp[k++] = 1; // writing an opening parenthesis
199 }
200 }
201 else
202 {
203 // no "lazy stack" trick used here
204 for (size_type i = 0; i < vec.size(); ++i)
205 {
206 while (vec_stack.size() > 0 and vec[i] > vec[vec_stack.top()])
207 {
208 vec_stack.pop();
209 ++k;
210 /*bp[k++] = 0; bp is already initialized to zero*/ // writing a closing parenthesis
211 }
212 vec_stack.push(i);
213 bp[k++] = 1; // writing an opening parenthesis
214 }
215 }
216 }
217 return bp;
218}
219
221
229template <uint8_t t_width>
231{
233 bit_vector bp(2 * lcp_buf.size(), 0); // initialize result
234 if (lcp_buf.size() > 0)
235 {
236 sorted_multi_stack_support vec_stack(lcp_buf.size());
237
238 size_type k = 0;
239 if (minimum)
240 {
241 bp[k++] = 1;
242 size_type last = lcp_buf[0];
243 for (size_type i = 1, x; i < lcp_buf.size(); ++i)
244 {
245 x = lcp_buf[i];
246 if (x < last)
247 {
248 ++k; // writing a closing parenthesis for last
249 while (!vec_stack.empty() and x < vec_stack.top())
250 {
251 vec_stack.pop();
252 ++k; // writing a closing parenthesis, bp is already initialized to zeros
253 }
254 }
255 else
256 {
257 vec_stack.push(last); // "lazy stack" trick: speed-up about 25 %
258 }
259 bp[k++] = 1; // writing an opening parenthesis
260 last = x;
261 }
262 }
263 else
264 {
265 // no "lazy stack" trick use here
266 for (size_type i = 0, x; i < lcp_buf.size(); ++i)
267 {
268 x = lcp_buf[i];
269 while (!vec_stack.empty() and x > vec_stack.top())
270 {
271 vec_stack.pop();
272 ++k; // writing a closing parenthesis, bp is already initialized to zeros
273 }
274 vec_stack.push(x);
275 bp[k++] = 1; // writing an opening parenthesis
276 }
277 }
278 }
279 return bp;
280}
281
284
293template <uint8_t t_width>
295 bit_vector & bp,
296 bit_vector & bp_fc,
297 bool const minimum = true)
298{
300 size_type n = lcp_buf.size();
301 bp.resize(2 * n); // resize bit vector for balanced parentheses to 2 n bits
302 bp_fc.resize(n);
303 if (n == 0) // if n == 0 we are done
304 return 0;
305 size_type fc_cnt = 0; // first child counter
306 util::set_to_value(bp, 0);
307 util::set_to_value(bp_fc, 0);
308 sorted_multi_stack_support vec_stack(n);
309
310 size_type k = 0;
311 size_type k_fc = 0; // first child index
312 if (minimum)
313 {
314 // no "lazy stack" trick used here
315 for (size_type i = 0, x; i < n; ++i)
316 {
317 x = lcp_buf[i];
318 while (!vec_stack.empty() and x < vec_stack.top())
319 {
320 if (vec_stack.pop())
321 {
322 bp_fc[k_fc] = 1;
323 ++fc_cnt;
324 }
325 ++k; // writing a closing parenthesis, bp is already initialized to zeros
326 ++k_fc; // write a bit in first_child
327 }
328 vec_stack.push(x);
329 bp[k++] = 1; // writing an opening parenthesis
330 }
331 }
332 else
333 {
334 // no "lazy stack" trick used here
335 for (size_type i = 0, x; i < n; ++i)
336 {
337 x = lcp_buf[i];
338 while (!vec_stack.empty() and x > vec_stack.top())
339 {
340 if (vec_stack.pop())
341 {
342 bp_fc[k_fc] = 1;
343 ++fc_cnt;
344 }
345 ++k; // writing a closing parenthesis, bp is already initialized to zeros
346 ++k_fc; // write a bit in first_child
347 }
348 vec_stack.push(x);
349 bp[k++] = 1; // writing an opening parenthesis
350 }
351 }
352 while (!vec_stack.empty())
353 {
354 if (vec_stack.pop())
355 {
356 bp_fc[k_fc] = 1;
357 ++fc_cnt;
358 }
359 // writing a closing parenthesis in bp, not necessary as bp is initialized with zeros
360 ++k;
361 ++k_fc;
362 }
363 return fc_cnt;
364}
365
366// Gets ISA[SA[idx]+d]
367// d = depth of the character 0 = first position
368template <class t_csa>
369typename t_csa::size_type get_char_pos(typename t_csa::size_type idx, typename t_csa::size_type d, t_csa const & csa)
370{
371 if (d == 0)
372 return idx;
373 // if we have to apply \f$\LF\f$ or \f$\Phi\f$ more
374 // than 2*d times to calc csa(csa[idx]+d), we opt to
375 // apply \f$ \Phi \f$ d times
376 if (csa.sa_sample_dens + csa.isa_sample_dens > 2 * d + 2)
377 {
378 for (typename t_csa::size_type i = 0; i < d; ++i)
379 idx = csa.psi[idx];
380 return idx;
381 }
382 return csa.isa[csa[idx] + d];
383}
384
385// has_id<X>::value is true if class X has
386// implement method id
387// Adapted solution from jrok's proposal:
388// http://stackoverflow.com/questions/87372/check-if-a-class-has-a-member-function-of-a-given-signature
389template <typename t_wt>
390struct has_id
391{
392 template <typename T>
393 static constexpr auto check(T *) ->
394 typename std::is_same<decltype(std::declval<T>().id(std::declval<typename T::node_type &>())),
395 typename T::size_type>::type
396 {
397 return std::true_type();
398 }
399 template <typename>
400 static constexpr std::false_type check(...)
401 {
402 return std::false_type();
403 }
404 typedef decltype(check<t_wt>(nullptr)) type;
405 static constexpr bool value = type::value;
406};
407} // namespace sdsl
408
409#endif
cst_node_child_proxy_iterator(t_cst const *cst, node_type v)
std::forward_iterator_tag iterator_category
cst_node_child_proxy_iterator< t_cst > iterator_type
cst_node_child_proxy_iterator(iterator_type const &it)
bool operator!=(iterator_type const &it) const
bool operator==(iterator_type const &it) const
typename t_cst::size_type size_type
typename t_cst::node_type node_type
cst_node_child_proxy(cst_node_child_proxy const &p)
cst_node_child_proxy_iterator< t_cst > iterator_type
cst_node_child_proxy(t_cst const *cst, node_type v)
node_type operator[](size_type i) const
uint64_t size() const
Returns the number of elements currently stored.
int_vector_size_type size_type
size_type size() const noexcept
The number of elements in the int_vector.
void resize(const size_type size)
Resize the int_vector in terms of elements.
Stack which contains elements from [0..n] in sorted order. Duplicates are possible.
bool empty() const
Returns if the stack is empty.
size_type top() const
Returns the topmost index on the stack.
bool pop()
Pop the topmost index of the stack.
bool push(size_type x)
Push the index x of vector vec onto the stack.
A stack which contains strictly increasing numbers in the range from to .
size_type top() const
Returns the topmost index on the stack.
void pop()
Pop the topmost index of the stack.
void push(size_type x)
Push the index x of vector vec onto the stack.
size_type size() const
Returns the number of element is the stack.
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
void set_to_value(t_int_vec &v, uint64_t k)
Set all entries of int_vector to value k.
Definition util.hpp:597
Namespace for the succinct data structure library.
t_csa::size_type get_char_pos(typename t_csa::size_type idx, typename t_csa::size_type d, t_csa const &csa)
bit_vector::size_type construct_supercartesian_tree_bp_succinct_and_first_child(int_vector_buffer< t_width > &lcp_buf, bit_vector &bp, bit_vector &bp_fc, bool const minimum=true)
Calculate the balanced parentheses of the Super-Cartesian tree, described in Ohlebusch and Gog (SPIRE...
void construct_supercartesian_tree_bp(t_rac const &vec, bit_vector &bp, bool const minimum=true)
Calculate the balanced parentheses of the Super-Cartesian tree, described in Ohlebusch and Gog (SPIRE...
bit_vector construct_supercartesian_tree_bp_succinct(t_rac const &vec, bool const minimum=true)
Calculate the balanced parentheses of the Super-Cartesian tree, described in Ohlebusch and Gog (SPIRE...
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
sorted_multi_stack_support.hpp contains a data structure for a stack which contains elements from [0....
static constexpr std::false_type check(...)
static constexpr auto check(T *) -> typename std::is_same< decltype(std::declval< T >().id(std::declval< typename T::node_type & >())), typename T::size_type >::type
static constexpr bool value
decltype(check< t_wt >(nullptr)) type
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.