8#ifndef INCLUDED_SDSL_CST_SCT3
9#define INCLUDED_SDSL_CST_SCT3
40template <class t_int = int_vector<>::size_type>
77template <
class t_csa = csa_wt<>,
78 class t_lcp = lcp_dac<>,
79 class t_bp_support = bp_support_sada<>,
80 class t_bv = bit_vector,
81 class t_rank =
typename std::
82 conditional<std::is_same<t_bv, bit_vector>::value, rank_support_v5<>,
typename t_bv::rank_1_type>::type,
83 class t_sel =
typename std::conditional<
84 std::is_same<t_bv, bit_vector>::value
85 and std::is_same<
typename t_csa::alphabet_category,
byte_alphabet_tag>::value,
86 select_support_scan<>,
87 typename t_bv::select_1_type>::type>
90 static_assert(std::is_same<typename index_tag<t_csa>::type,
csa_tag>::value,
91 "First template argument has to be a compressed suffix array.");
99 typedef typename t_lcp::template type<cst_sct3>
lcp_type;
109 typedef typename t_csa::alphabet_type::sigma_type
sigma_type;
140 assert(m_bp[ckpos] == 0);
141 kpos = m_bp_support.find_open(ckpos);
142 return m_bp_support.rank(kpos) - 1;
154 if (v.cipos > v.jp1pos)
156 ckpos = v.jp1pos - 1;
162 assert(m_bp[ckpos] == 0);
165 kpos = m_bp_support.find_open(ckpos);
166 return m_bp_support.rank(kpos) - 1;
171 size_type r = ckpos - m_bp_support.rank(ckpos);
178 assert(m_bp[ckpos] == 0);
179 kpos = m_bp_support.find_open(ckpos);
180 return m_bp_support.rank(kpos) - 1;
185 if (kpos < m_bp.
size())
186 ckpos = m_bp_support.find_close(kpos);
222 size_type cipos = m_bp_support.find_close(ipos);
223 size_type result = m_bp_support.rank(cipos);
243 psvcpos = m_bp.
size() - 1;
248 psvpos = m_bp_support.enclose(ipos);
249 psvcpos = m_bp_support.find_close(psvpos);
250 return m_bp_support.rank(psvpos) - 1;
253 size_type r0 = cipos - m_bp_support.rank(cipos);
255 uint64_t
const * p = m_first_child.data() + (r0 >> 6);
256 uint64_t w = (*p) >> (r0 & 0x3F);
259 next_first_child = cipos +
bits::lo(w);
260 if (cipos == next_first_child and m_bp[next_first_child + 1])
262 psvpos = m_bp_support.enclose(ipos);
263 psvcpos = m_bp_support.find_close(psvpos);
264 return m_bp_support.rank(psvpos) - 1;
272 while (!(w = *p) and steps-- > 0)
283 auto pos = m_first_child_select(m_first_child_rank(r0 + 1) + 1);
286 next_first_child = cipos + delta;
288 if (!m_bp[next_first_child + 1])
290 psvcpos = next_first_child + 1;
291 psvpos = m_bp_support.find_open(psvcpos);
292 return m_bp_support.rank(psvpos) - 1;
296 psvpos = m_bp_support.enclose(m_bp_support.find_open(next_first_child));
297 psvcpos = m_bp_support.find_close(psvpos);
298 return m_bp_support.rank(psvpos) - 1;
308 size_type i = m_bp_support.select(l + 1);
309 size_type j = m_bp_support.select(r + 1);
310 size_type fc_i = m_bp_support.find_close(i);
317 size_type ec = m_bp_support.rr_enclose(i, j);
318 if (ec == m_bp_support.size())
324 return m_bp_support.rank(ec) - 1;
357 m_bp_support(cst.m_bp_support),
358 m_first_child(cst.m_first_child),
359 m_first_child_rank(cst.m_first_child_rank),
360 m_first_child_select(cst.m_first_child_select),
364 m_bp_support.set_vector(&m_bp);
365 m_first_child_rank.set_vector(&m_first_child);
366 m_first_child_select.set_vector(&m_first_child);
374 m_csa(std::move(cst.m_csa)),
375 m_bp(std::move(cst.m_bp)),
376 m_bp_support(std::move(cst.m_bp_support)),
377 m_first_child(std::move(cst.m_first_child)),
378 m_first_child_rank(std::move(cst.m_first_child_rank)),
379 m_first_child_select(std::move(cst.m_first_child_select)),
383 m_bp_support.set_vector(&m_bp);
384 m_first_child_rank.set_vector(&m_first_child);
385 m_first_child_select.set_vector(&m_first_child);
396 return m_bp.
size() >> 1;
405 return t_csa::max_size();
414 return m_csa.empty();
423 if (0 == m_bp.
size())
431 if (0 == m_bp.
size() and
root() == v)
456 if (0 == m_bp.
size())
476 *
this = std::move(tmp);
489 m_csa = std::move(cst.m_csa);
491 m_bp = std::move(cst.m_bp);
492 m_bp_support = std::move(cst.m_bp_support);
493 m_bp_support.set_vector(&m_bp);
494 m_first_child = std::move(cst.m_first_child);
495 m_first_child_rank = std::move(cst.m_first_child_rank);
496 m_first_child_rank.set_vector(&m_first_child);
497 m_first_child_select = std::move(cst.m_first_child_select);
498 m_first_child_select.set_vector(&m_first_child);
499 m_nodes = std::move(cst.m_nodes);
513 void load(std::istream & in);
516 template <
typename archive_t>
520 template <
typename archive_t>
526 return (m_csa == other.m_csa) && (m_lcp == other.m_lcp) && (m_bp == other.m_bp)
527 && (m_bp_support == other.m_bp_support) && (m_first_child == other.m_first_child)
528 && (m_first_child_rank == other.m_first_child_rank)
529 && (m_first_child_select == other.m_first_child_select) ;
535 return !(*
this == other);
573 assert(i > 0 and i <=
size());
577 jp1pos = m_bp.
size();
578 else if (m_bp[ipos + 1])
581 jp1pos = m_bp_support.select(i + 1);
582 return node_type(i - 1, i - 1, ipos, m_bp_support.find_close(ipos), jp1pos);
593 return v.
j - v.
i + 1;
654 size_type psv_pos, psv_cpos, psv_v, nsv_v, nsv_p1pos;
655 psv_v = psv(v.
j + 1, v.
jp1pos, m_bp_support.find_close(v.
jp1pos), psv_pos, psv_cpos);
656 nsv_v = nsv(v.
j + 1, v.
jp1pos) - 1;
657 if (nsv_v ==
size() - 1)
659 nsv_p1pos = m_bp.
size();
663 nsv_p1pos = m_bp_support.select(nsv_v + 2);
665 return node_type(psv_v, nsv_v, psv_pos, psv_cpos, nsv_p1pos);
670 psv_v = psv(v.
i, v.
ipos, v.
cipos, psv_pos, psv_cpos);
704 bool last_child = m_bp[cjp1posm1];
708 size_type first_child_idx = cjp1posm1 - m_bp_support.rank(cjp1posm1);
709 last_child = m_first_child[first_child_idx];
715 if (nsv_v ==
size() - 1)
717 nsv_p1pos = m_bp.
size();
721 nsv_p1pos = m_bp_support.select(nsv_v + 2);
727 size_type new_j = m_bp_support.rank(m_bp_support.find_open(cjp1posm1)) - 2;
731 m_bp_support.find_close(v.
jp1pos),
732 m_bp_support.select(new_j + 2));
754 size_type k = 0, kpos = 0, k_find_close = 0;
756 k = select_l_index(v, 1, kpos, k_find_close);
762 k1 = select_l_index(v, i - 1, kpos1, k_find_close1);
766 k2 = select_l_index(v, i, kpos2, k_find_close2);
767 return node_type(k1, k2 - 1, kpos1, k_find_close1, kpos2);
784 size_type r = closing_pos_of_first_l_index(v);
786 uint64_t
const * p = m_first_child.data() + (r0 >> 6);
787 uint8_t offset = r0 & 0x3F;
794 else if (m_first_child.data() == p)
803 while (p > m_first_child.data() and steps-- > 0)
814 auto goal_rank = m_first_child_rank(r0);
821 return r0 - m_first_child_select(goal_rank) + 1;
843 if (cc == 0 and c != 0)
845 size_type char_ex_max_pos = m_csa.C[((
size_type)1) + cc], char_inc_min_pos = m_csa.C[cc];
851 if (char_pos >= char_ex_max_pos)
856 else if (char_pos >= char_inc_min_pos)
865 if (char_pos < char_inc_min_pos)
870 else if (char_pos < char_ex_max_pos)
876 size_type l_bound = 2, r_bound = child_cnt, mid, kpos, ckpos, l_index;
877 while (l_bound < r_bound)
879 mid = (l_bound + r_bound) >> 1;
881 l_index = select_l_index(v, mid - 1, kpos, ckpos);
884 if (char_inc_min_pos > char_pos)
888 else if (char_ex_max_pos <= char_pos)
896 size_type lp1_index = m_bp_support.rank(m_bp_support.find_open(ckpos - 1)) - 1;
898 if (lp1_index - 1 <
size() - 1)
900 jp1pos = m_bp_support.select(lp1_index + 1);
902 return node_type(l_index, lp1_index - 1, kpos, ckpos, jp1pos);
913 return child(v, c, char_pos);
928 assert(d <=
depth(v));
931 while (c_begin < c_end)
933 mid = (c_begin + c_end) >> 1;
934 if (m_csa.C[mid] <= order)
943 return m_csa.comp2char[c_begin - 1];
957 if (v.
i > w.
i or (v.
i == w.
i and v.
j < w.
j))
968 size_type min_index_pos = m_bp_support.select(min_index + 1);
969 size_type min_index_cpos = m_bp_support.find_close(min_index_pos);
971 if (min_index_cpos >= (m_bp.
size() - m_csa.sigma))
975 size_type new_j = nsv(min_index, min_index_pos) - 1;
977 size_type new_i = psv(min_index, min_index_pos, min_index_cpos, new_ipos, new_icpos);
979 if (new_j <
size() - 1)
981 jp1pos = m_bp_support.select(new_j + 2);
983 return node_type(new_i, new_j, new_ipos, new_icpos, jp1pos);
1000 else if (v.
i == v.
j)
1002 return size() - m_csa[v.
i];
1007 size_type l = select_l_index(v, 1, kpos, ckpos);
1050 if (v.
i == 0 and v.
j == 0)
1058 size_type min_index_pos = m_bp_support.select(min_index + 1);
1059 size_type min_index_cpos = m_bp_support.find_close(min_index_pos);
1060 if (min_index_cpos >= (m_bp.
size() - m_csa.sigma))
1064 size_type new_j = nsv(min_index, min_index_pos) - 1;
1066 size_type new_i = psv(min_index, min_index_pos, min_index_cpos, new_ipos, new_icpos);
1068 if (new_j <
size() - 1)
1070 jp1pos = m_bp_support.select(new_j + 2);
1072 return node_type(new_i, new_j, new_ipos, new_icpos, jp1pos);
1087 size_type c_right = m_csa.bwt.rank(v.
j + 1, c);
1088 if (c_left == c_right)
1090 if (c_left + 1 == c_right)
1091 return select_leaf(m_csa.C[m_csa.char2comp[c]] + c_left + 1);
1094 size_type left = m_csa.C[m_csa.char2comp[c]] + c_left;
1095 size_type right = m_csa.C[m_csa.char2comp[c]] + c_right - 1;
1096 assert(left < right);
1098 size_type ipos = m_bp_support.select(left + 1);
1100 if (right <
size() - 1)
1102 jp1pos = m_bp_support.select(right + 2);
1104 return node_type(left, right, ipos, m_bp_support.find_close(ipos), jp1pos);
1140 ckpos = v.
cipos - 1;
1142 assert(m_bp[ckpos] == 0);
1143 size_type r0ckpos = ckpos - m_bp_support.rank(ckpos);
1144 return size() + m_first_child_rank(r0ckpos);
1167 id =
id -
size() + 1;
1173 size_type arg = m_first_child_rank(mid);
1194 size_type arg = mid - m_bp_support.rank(mid - 1);
1195 if (arg < r0ckpos + 1)
1206 if (ckpos == m_bp.
size() - 1)
1210 if (m_bp[ckpos + 1])
1213 size_type j = m_bp_support.rank(jp1pos - 1) - 1;
1214 size_type kpos = m_bp_support.find_open(ckpos);
1215 size_type ipos = m_bp_support.enclose(kpos);
1216 size_type cipos = m_bp_support.find_close(ipos);
1217 size_type i = m_bp_support.rank(ipos - 1);
1218 return node_type(i, j, ipos, cipos, jp1pos);
1223 size_type ipos = m_bp_support.find_open(cipos);
1224 size_type i = m_bp_support.rank(ipos - 1);
1227 if (j !=
size() - 1)
1229 jp1pos = m_bp_support.select(j + 2);
1231 return node_type(i, j, ipos, cipos, jp1pos);
1255 jp1pos = m_bp.
size();
1259 jp1pos = m_bp_support.select(
rb + 2);
1261 return node_type(
lb,
rb, ipos, m_bp_support.find_close(ipos), jp1pos);
1271 size_type ipos = m_bp_support.select(i + 1);
1272 size_type cipos = m_bp_support.find_close(ipos);
1273 return m_first_child_rank.rank(((ipos + cipos - 1) >> 1) - i);
1280template <
class t_csa,
class t_lcp,
class t_bp_support,
class t_bv,
class t_rank,
class t_sel>
1287 m_nodes += m_bp.
size() / 2;
1288 if (m_bp.
size() == 2)
1295 util::init_support(m_bp_support, &m_bp);
1296 util::init_support(m_first_child_rank, &m_first_child);
1297 util::init_support(m_first_child_select, &m_first_child);
1299 if (!build_only_bps)
1306 if (!build_only_bps)
1313template <
class t_csa,
class t_lcp,
class t_bp_support,
class t_bv,
class t_rank,
class t_sel>
1320 written_bytes += m_csa.serialize(out, child,
"csa");
1321 written_bytes += m_lcp.serialize(out, child,
"lcp");
1322 written_bytes += m_bp.
serialize(out, child,
"bp");
1323 written_bytes += m_bp_support.serialize(out, child,
"bp_support");
1324 written_bytes += m_first_child.serialize(out, child,
"mark_child");
1325 written_bytes += m_first_child_rank.serialize(out, child,
"mark_child_rank");
1326 written_bytes += m_first_child_select.serialize(out, child,
"mark_child_select");
1327 written_bytes +=
write_member(m_nodes, out, child,
"node_cnt");
1329 return written_bytes;
1332template <
class t_csa,
class t_lcp,
class t_bp_support,
class t_bv,
class t_rank,
class t_sel>
1338 m_bp_support.load(in, &m_bp);
1339 m_first_child.load(in);
1340 m_first_child_rank.load(in, &m_first_child);
1341 m_first_child_select.load(in, &m_first_child);
1345template <
class t_csa,
class t_lcp,
class t_bp_support,
class t_bv,
class t_rank,
class t_sel>
1346template <
typename archive_t>
1359template <
class t_csa,
class t_lcp,
class t_bp_support,
class t_bv,
class t_rank,
class t_sel>
1360template <
typename archive_t>
1368 m_bp_support.set_vector(&m_bp);
1371 m_first_child_rank.set_vector(&m_first_child);
1373 m_first_child_select.set_vector(&m_first_child);
1377template <
class t_
int>
1402 if (
i != interval.
i)
1403 return i < interval.
i;
1404 return j < interval.
j;
1412 return i == interval.
i and
j == interval.
j;
1421 return !(*
this == interval);
1430template <
class t_
int>
1433 os <<
"-[" << interval.
i <<
"," << interval.
j <<
"](" << interval.
ipos <<
"," << interval.
cipos <<
","
1434 << interval.
jp1pos <<
")";
bits.hpp contains the sdsl::bits class.
bp_support_sada.hpp contains an implementation of a balanced parentheses support structure proposed b...
cereal.hpp offers cereal support
#define CEREAL_LOAD_FUNCTION_NAME
#define CEREAL_SAVE_FUNCTION_NAME
A forward iterator for a bottom up traversal of a suffix tree.
An forward iterator for (compressed) suffix trees.
A class for the Compressed Suffix Tree (CST) proposed by Ohlebusch and Gog.
t_csa::alphabet_type::sigma_type sigma_type
bool is_leaf(node_type const &v) const
Decide if a node is a leaf.
node_type lca(node_type v, node_type w) const
Calculate the LCA of two nodes v and w
const_iterator begin() const
Returns a const_iterator to the first element of a depth first traversal of the tree.
const_bottom_up_iterator end_bottom_up() const
Returns an iterator to the element after the last element of a bottom-up traversal of the tree.
node_type child(node_type const &v, const char_type c, size_type &char_pos) const
Get the child w of node v which edge label (v,w) starts with character c.
node_type rightmost_leaf(node_type const &v) const
Calculates the rightmost leaf in the subtree rooted at node v.
t_bp_support bp_support_type
static size_type max_size()
Returns the largest size that cst_sct3 can ever have.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Serialise (load) via cereal.
node_type select_leaf(size_type i) const
Return the i-th leaf (1-based from left to right).
size_type lb(node_type const &v) const
Calculates the index of the leftmost leaf in the corresponding suffix array.
node_type root() const
Return the root of the suffix tree.
node_type inv_id(size_type id)
Computes the node for such that id(v)=id.
size_type node_depth(node_type v) const
Returns the node depth of node v.
const_bottom_up_iterator begin_bottom_up() const
Returns an iterator to the first element of a bottom-up traversal of the tree.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serialize to a stream.
node_type node(size_type lb, size_type rb) const
Get the node in the suffix tree which corresponds to the lcp-interval [lb..rb].
size_type nodes() const
Get the number of nodes of the suffix tree.
size_type size(node_type const &v) const
Calculate the number of leaves in the subtree rooted at node v.
cst_bottom_up_const_forward_iterator< cst_sct3 > const_bottom_up_iterator
node_type parent(node_type const &v) const
Calculate the parent node of a node v.
t_csa::string_type string_type
bv_type const & first_child_bv
sel_type const & first_child_select
rank_type const & first_child_rank
node_type leftmost_leaf(node_type const &v) const
Calculates the leftmost leaf in the subtree rooted at node v.
bool empty() const
Returns if the data structure is empty.
cst_sct3(cst_sct3 &&cst)
Move constructor.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Serialise (save) via cereal.
node_type sibling(node_type const &v) const
Returns the next sibling of node v.
t_csa::char_type char_type
bool operator==(cst_sct3 const &other) const noexcept
Equality operator.
bool operator!=(cst_sct3 const &other) const noexcept
Inequality operator.
size_type size() const
Number of leaves of the suffix tree.
const_iterator end(node_type const &v) const
Returns a const_iterator to the element past the end of a depth first traversal of the subtree rooted...
node_type select_child(node_type const &v, size_type i) const
Get the i-th child of a node v.
size_type sn(node_type const &v) const
Computes the suffix number of a leaf node v.
cst_sct3 & operator=(cst_sct3 const &cst)
Assignment Operator.
cst_sct3(cst_sct3 const &cst)
Copy constructor.
node_type wl(node_type const &v, const char_type c) const
Compute the Weiner link of node v and character c.
node_type sl(node_type const &v) const
Compute the suffix link of node v.
const_iterator end() const
Returns a const_iterator to the element after the last element of a depth first traversal of the tree...
cst_dfs_const_forward_iterator< cst_sct3 > const_iterator
size_type id(node_type const &v) const
Computes a unique identification number for a node of the suffx tree in the range [0....
node_type child(node_type const &v, const char_type c) const
Get the child w of node v which edge label (v,w) starts with character c.
t_csa::size_type size_type
bp_interval< size_type > node_type
Type for the nodes in the tree.
size_type depth(node_type const &v) const
Returns the string depth of node v.
size_type rb(node_type const &v) const
Calculates the index of the rightmost leaf in the corresponding suffix array.
cst_sct3 & operator=(cst_sct3 &&cst)
Assignment Move Operator.
t_lcp::template type< cst_sct3 > lcp_type
ptrdiff_t difference_type
bp_support_type const & bp_support
const_iterator begin(node_type const &v) const
Returns a const_iterator to the first element of a depth first traversal of the subtree rooted at nod...
t_csa::alphabet_category alphabet_category
t_csa::alphabet_type::comp_char_type comp_char_type
void load(std::istream &in)
Load from a stream.
size_type degree(node_type const &v) const
Get the number of children of a node v.
size_type tlcp_idx(size_type i) const
Maps an index i to the position in TLCP where LCP[i] can be found.
cst_sct3()=default
Default constructor.
char_type edge(node_type const &v, size_type d) const
Returns the d-th character (1-based indexing) of the edge-label pointing to v.
cst_node_child_proxy< cst_sct3 > children(node_type const &v) const
Return a proxy object which allows iterating over the children of a node.
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
static mm_event_proxy event(std::string const &name)
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
csa_wt.hpp contains an implementation of the compressed suffix array based on a wavelet tree.
cst_iterators.hpp contains iterator classes for traversing (compressed) suffix arrays.
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
io.hpp contains some methods for reading/writing sdsl structures.
lcp.hpp contains classes for lcp information.
lcp_dac.hpp contains an implementation of a (compressed) LCP array.
memory_tracking.hpp contains two function for allocating and deallocating memory
Namespace for the succinct data structure library.
std::ostream & operator<<(std::ostream &os, bp_interval< t_int > const &interval)
t_csa::size_type get_char_pos(typename t_csa::size_type idx, typename t_csa::size_type d, t_csa const &csa)
bit_vector::size_type construct_supercartesian_tree_bp_succinct_and_first_child(int_vector_buffer< t_width > &lcp_buf, bit_vector &bp, bit_vector &bp_fc, bool const minimum=true)
Calculate the balanced parentheses of the Super-Cartesian tree, described in Ohlebusch and Gog (SPIRE...
std::string cache_file_name(std::string const &key, cache_config const &config)
Returns the file name of the resource.
void copy_lcp(t_lcp &lcp, t_lcp const &lcp_c, t_cst const &cst)
size_t write_member(T const &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
void move_lcp(t_lcp &&lcp, t_lcp &&lcp_c, t_cst const &cst)
void read_member(T &t, std::istream &in)
bool load_from_cache(T &v, std::string const &key, cache_config const &config, bool add_type_hash=false)
void load_lcp(t_lcp &lcp, std::istream &in, t_cst const &cst)
void set_lcp_pointer(t_lcp &lcp, t_cst const &cst)
void construct_lcp(t_lcp &lcp, t_cst const &cst, cache_config &config)
rank_support_v5.hpp contains rank_support_v5.5
Contains declarations and definitions of data structure concepts.
select_support_scan.hpp contains classes that support a sdsl::bit_vector with linear time select.
static constexpr uint32_t lo(uint64_t x)
Calculates the position of the rightmost 1-bit in the 64bit integer x if it exists.
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
bp_interval & operator=(bp_interval const &interval)=default
Assignment operator.
bp_interval(bp_interval const &iv)=default
Copy constructor.
t_int i
The left border of the lcp-interval .
bp_interval & operator=(bp_interval &&interval)=default
Move assignment.
bp_interval(t_int i=0, t_int j=0, t_int ipos=0, t_int cipos=0, t_int jp1pos=0)
Constructor.
bool operator<(bp_interval const &interval) const
t_int j
The right border of the lcp-interval .
bool operator!=(bp_interval const &interval) const
Inequality operator.
bool operator==(bp_interval const &interval) const
Equality operator.
bp_interval(bp_interval &&iv)=default
Move copy constructor.
Helper class for construction process.
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.