SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
lcp_wt.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef INCLUDED_SDSL_LCP_WT
9#define INCLUDED_SDSL_LCP_WT
10
11#include <iostream>
12#include <stddef.h>
13#include <stdint.h>
14#include <string>
15#include <utility> // for pair
16#include <vector>
17
18#include <sdsl/bits.hpp>
19#include <sdsl/cereal.hpp>
20#include <sdsl/config.hpp>
21#include <sdsl/int_vector.hpp>
23#include <sdsl/io.hpp>
24#include <sdsl/iterators.hpp>
25#include <sdsl/ram_fs.hpp>
29#include <sdsl/util.hpp>
30#include <sdsl/wt_huff.hpp>
31
32namespace sdsl
33{
34
36
43template <uint8_t t_width = 0>
44class lcp_wt
45{
46public:
53 typedef const pointer const_pointer;
55 typedef ptrdiff_t difference_type;
57
60
61 enum
62 {
66 }; // as the lcp_wt is not fast for texts with long repetition
67
68 template <class Cst>
69 using type = lcp_wt;
70
71private:
72 small_lcp_type m_small_lcp; // vector for lcp values < 255
73 int_vector<t_width> m_big_lcp; // vector for lcp values > 254
74
75 typedef std::pair<size_type, size_type> tPII;
76 typedef std::vector<tPII> tVPII;
77
78public:
80 lcp_wt() = default;
82 lcp_wt(lcp_wt const &) = default;
83 lcp_wt(lcp_wt &&) = default;
84 lcp_wt & operator=(lcp_wt const &) = default;
85 lcp_wt & operator=(lcp_wt &&) = default;
86
88 lcp_wt(cache_config & config, std::string other_key = "")
89 {
90 std::string temp_file = tmp_file(config, "_lcp_sml");
91 std::string lcp_key = conf::KEY_LCP;
92 if ("" != other_key)
93 {
94 lcp_key = other_key;
95 }
96 int_vector_buffer<> lcp_buf(cache_file_name(lcp_key, config));
97 size_type l = 0, max_l = 0, big_sum = 0, n = lcp_buf.size();
98 {
99 int_vector<8> small_lcp = int_vector<8>(n);
100 for (size_type i = 0; i < n; ++i)
101 {
102 if ((l = lcp_buf[i]) < 255)
103 {
104 small_lcp[i] = l;
105 }
106 else
107 {
108 small_lcp[i] = 255;
109 if (l > max_l)
110 max_l = l;
111 ++big_sum;
112 }
113 }
114 store_to_file(small_lcp, temp_file);
115 }
116 {
117 int_vector_buffer<8> lcp_sml_buf(temp_file);
118 m_small_lcp = small_lcp_type(lcp_sml_buf.begin(), lcp_sml_buf.end(), config.dir);
119 }
120 sdsl::remove(temp_file);
121 m_big_lcp = int_vector<>(big_sum, 0, bits::hi(max_l) + 1);
122 {
123 for (size_type i = 0, ii = 0; i < n; ++i)
124 {
125 if (lcp_buf[i] >= 255)
126 {
127 m_big_lcp[ii++] = lcp_buf[i];
128 }
129 }
130 }
131 }
132
135 {
136 return m_small_lcp.size();
137 }
138
141 {
143 }
144
146 bool empty() const
147 {
148 return 0 == m_small_lcp.size();
149 }
150
153 {
154 return const_iterator(this, 0);
155 }
156
159 {
160 return const_iterator(this, size());
161 }
162
164
167 {
168 if (m_small_lcp[i] != 255)
169 {
170 return m_small_lcp[i];
171 }
172 else
173 {
174 return m_big_lcp[m_small_lcp.rank(i, 255)];
175 }
176 }
177
179 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
180 {
181 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
182 size_type written_bytes = 0;
183 written_bytes += m_small_lcp.serialize(out, child, "small_lcp");
184 written_bytes += m_big_lcp.serialize(out, child, "large_lcp");
185 structure_tree::add_size(child, written_bytes);
186 return written_bytes;
187 }
188
190 void load(std::istream & in)
191 {
192 m_small_lcp.load(in);
193 m_big_lcp.load(in);
194 }
195
196 template <typename archive_t>
197 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
198 {
199 ar(CEREAL_NVP(m_small_lcp));
200 ar(CEREAL_NVP(m_big_lcp));
201 }
202
203 template <typename archive_t>
204 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
205 {
206 ar(CEREAL_NVP(m_small_lcp));
207 ar(CEREAL_NVP(m_big_lcp));
208 }
209
211 bool operator==(lcp_wt const & other) const noexcept
212 {
213 return (m_small_lcp == other.m_small_lcp) && (m_big_lcp == other.m_big_lcp);
214 }
215
217 bool operator!=(lcp_wt const & other) const noexcept
218 {
219 return !(*this == other);
220 }
221};
222
223} // end namespace sdsl
224#endif
bits.hpp contains the sdsl::bits class.
cereal.hpp offers cereal support
#define CEREAL_NVP(X)
Definition cereal.hpp:31
uint64_t size() const
Returns the number of elements currently stored.
A generic vector class for integers of width .
int_vector_size_type size_type
int_vector_trait< t_width >::value_type value_type
static size_type max_size() noexcept
Maximum size of the int_vector.
random_access_const_iterator< lcp_wt > const_iterator
Definition lcp_wt.hpp:48
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition lcp_wt.hpp:197
lcp_plain_tag lcp_category
Definition lcp_wt.hpp:58
bool operator!=(lcp_wt const &other) const noexcept
Inequality operator.
Definition lcp_wt.hpp:217
const value_type const_reference
Definition lcp_wt.hpp:50
const_iterator iterator
Definition lcp_wt.hpp:49
static size_type max_size()
Returns the largest size that lcp_wt can ever have.
Definition lcp_wt.hpp:140
lcp_wt()=default
Default Constructor.
int_vector< t_width >::value_type value_type
Definition lcp_wt.hpp:47
lcp_wt & operator=(lcp_wt &&)=default
lcp_tag index_category
Definition lcp_wt.hpp:59
int_vector ::size_type size_type
Definition lcp_wt.hpp:54
lcp_wt type
Definition lcp_wt.hpp:69
void load(std::istream &in)
Load from a stream.
Definition lcp_wt.hpp:190
ptrdiff_t difference_type
Definition lcp_wt.hpp:55
const pointer const_pointer
Definition lcp_wt.hpp:53
value_type operator[](size_type i) const
[]-operator
Definition lcp_wt.hpp:166
lcp_wt & operator=(lcp_wt const &)=default
bool operator==(lcp_wt const &other) const noexcept
Equality operator.
Definition lcp_wt.hpp:211
bool empty() const
Returns if the data structure is empty.
Definition lcp_wt.hpp:146
const_iterator begin() const
Returns a const_iterator to the first element.
Definition lcp_wt.hpp:152
size_type size() const
Number of elements in the instance.
Definition lcp_wt.hpp:134
const_reference * pointer
Definition lcp_wt.hpp:52
const_reference reference
Definition lcp_wt.hpp:51
lcp_wt(lcp_wt const &)=default
Copy / Move constructor.
lcp_wt(cache_config &config, std::string other_key="")
Constructor.
Definition lcp_wt.hpp:88
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serialize to a stream.
Definition lcp_wt.hpp:179
const_iterator end() const
Returns a const_iterator to the element after the last element.
Definition lcp_wt.hpp:158
wt_huff< bit_vector, rank_support_v<>, select_support_scan< 1 >, select_support_scan< 0 > > small_lcp_type
Definition lcp_wt.hpp:56
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition lcp_wt.hpp:204
lcp_wt(lcp_wt &&)=default
Generic iterator for a random access container.
Definition iterators.hpp:24
A class supporting linear time select queries.
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
wt_pc< huff_shape, t_bitvector, t_rank, t_select, t_select_zero, t_tree_strat > wt_huff
A Huffman-shaped wavelet tree.
Definition wt_huff.hpp:67
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
io.hpp contains some methods for reading/writing sdsl structures.
iterators.hpp contains an generic iterator for random access containers.
constexpr char KEY_LCP[]
Definition config.hpp:43
Namespace for the succinct data structure library.
int remove(std::string const &)
Remove a file.
Definition ram_fs.hpp:221
std::string tmp_file(cache_config const &config, std::string name_part="")
Returns a name for a temporary file. I.e. the name was not used before.
Definition io.hpp:759
std::string cache_file_name(std::string const &key, cache_config const &config)
Returns the file name of the resource.
Definition io.hpp:688
bool store_to_file(T const &v, std::string const &file)
Store a data structure to a file.
Definition io.hpp:874
ram_fs.hpp
Contains declarations and definitions of data structure concepts.
select_support_scan.hpp contains classes that support a sdsl::bit_vector with linear time select.
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition bits.hpp:653
Helper class for construction process.
Definition config.hpp:66
std::string dir
Definition config.hpp:70
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.
wt_huff.hpp contains a class for a Huffman shaped wavelet tree over byte sequences.