8#ifndef INCLUDED_SDSL_SUFFIX_TREE_ALGORITHM
9#define INCLUDED_SDSL_SUFFIX_TREE_ALGORITHM
46 typename t_cst::node_type & v,
47 const typename t_cst::size_type d,
48 const typename t_cst::char_type c,
49 typename t_cst::size_type & char_pos,
51 typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value,
cst_tag>::type x =
cst_tag())
53 auto cc = cst.csa.char2comp[c];
54 if (cc == 0 and cc != c)
56 typename t_cst::size_type depth_node = cst.depth(v);
59 char_pos = cst.csa.psi[char_pos];
60 if (char_pos < cst.csa.C[cc] or char_pos >= cst.csa.C[cc + 1])
64 else if (d == depth_node)
66 v = cst.child(v, c, char_pos);
90template <
class t_cst,
class t_pat_iter>
93 typename t_cst::node_type & v,
94 typename t_cst::size_type d,
97 typename t_cst::size_type & char_pos,
99 typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value,
cst_tag>::type x =
cst_tag())
103 typename t_cst::size_type
size = 0;
104 t_pat_iter it = begin;
126template <
class t_cst,
class t_pat_iter>
127typename t_cst::size_type
count(t_cst
const & cst, t_pat_iter begin, t_pat_iter end,
cst_tag)
129 return count(cst.csa, begin, end);
147template <
class t_cst,
class t_pat_iter,
class t_rac =
int_vector<64>>
152 typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value,
cst_tag>::type x =
155 return locate(cst.csa, begin, end);
169template <
class t_cst,
class t_text_iter>
172 const typename t_cst::node_type & v,
175 typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value,
cst_tag>::type x =
cst_tag())
183 typename t_cst::size_type begin = cst.csa[cst.lb(v)];
185 extract(cst.csa, begin, begin + cst.depth(v) - 1, text);
197template <
class t_cst>
200 const typename t_cst::node_type & v,
202 typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value,
cst_tag>::type x =
cst_tag())
204 typedef typename t_cst::csa_type::string_type t_rac;
210 typename t_cst::size_type begin = cst.csa[cst.lb(v)];
212 return extract(cst.csa, begin, begin + cst.depth(v) - 1);
222template <
class t_cst>
223double H0(
const typename t_cst::node_type & v, t_cst
const & cst)
232 auto n = cst.size(v);
233 for (
auto const & child : cst.children(v))
235 double p = ((double)cst.size(child)) / n;
248template <
class t_cst>
249std::pair<double, size_t>
Hk(t_cst
const & cst,
typename t_cst::size_type k)
253 std::set<typename t_cst::size_type> leafs_with_d_smaller_k;
254 for (
typename t_cst::size_type d = 1; d < k; ++d)
256 leafs_with_d_smaller_k.insert(cst.csa.isa[cst.csa.size() - d]);
258 for (
typename t_cst::const_iterator it = cst.begin(), end = cst.end(); it != end; ++it)
262 if (!cst.is_leaf(*it))
264 typename t_cst::size_type d = cst.depth(*it);
269 hk += cst.size(*it) *
H0(*it, cst);
278 if (leafs_with_d_smaller_k.find(cst.lb(*it)) == leafs_with_d_smaller_k.end())
286 return {hk, context};
int_vector.hpp contains the sdsl::int_vector class.
Namespace for the succinct data structure library.
t_csa::size_type forward_search(t_csa const &csa, typename t_csa::size_type l, typename t_csa::size_type r, t_pat_iter begin, t_pat_iter end, typename t_csa::size_type &l_res, typename t_csa::size_type &r_res, SDSL_UNUSED typename std::enable_if< std::is_same< csa_tag, typename t_csa::index_category >::value, csa_tag >::type x=csa_tag())
Forward search for a pattern in an -interval in the CSA.
t_csa::size_type extract(t_csa const &csa, typename t_csa::size_type begin, typename t_csa::size_type end, t_text_iter text, SDSL_UNUSED typename std::enable_if< std::is_same< csa_tag, typename t_csa::index_category >::value, csa_tag >::type x=csa_tag())
Writes the substring T[begin..end] of the original text T to text[0..end-begin+1].
std::pair< double, size_t > Hk(t_cst const &cst, typename t_cst::size_type k)
Calculate the k-th order entropy of a text.
uint64_t count(t_k2_treap const &treap, k2_treap_ns::point_type p1, k2_treap_ns::point_type p2)
Count how many points are in the rectangle (p1,p2)
double H0(const typename t_cst::node_type &v, t_cst const &cst)
Calculate the zeroth order entropy of the text that follows a certain substring s.
int_vector ::size_type size(range_type const &r)
Size of a range.
t_rac locate(t_csa const &csa, t_pat_iter begin, t_pat_iter end, SDSL_UNUSED typename std::enable_if< std::is_same< csa_tag, typename t_csa::index_category >::value, csa_tag >::type x=csa_tag())
Calculates all occurrences of a pattern pat in a CSA.
Contains declarations and definitions of data structure concepts.
suffix_array_algorithm.hpp contains algorithms on CSAs