SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
lcp_byte.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef INCLUDED_SDSL_LCP_BYTE
9#define INCLUDED_SDSL_LCP_BYTE
10
11#include <iostream>
12#include <stddef.h>
13#include <stdint.h>
14#include <string>
15#include <utility> // for pair
16#include <vector>
17
18#include <sdsl/bits.hpp>
19#include <sdsl/cereal.hpp>
20#include <sdsl/config.hpp>
21#include <sdsl/int_vector.hpp>
23#include <sdsl/io.hpp>
24#include <sdsl/iterators.hpp>
27#include <sdsl/util.hpp>
28
29namespace sdsl
30{
31
33
44template <uint8_t t_width = 0>
46{
47public:
54 typedef const pointer const_pointer;
56 typedef ptrdiff_t difference_type;
57
60
61 enum
62 {
65 sa_order = 1
66 }; // as the lcp_byte is not fast for texts with long repetition
67
68 template <class Cst>
69 using type = lcp_byte;
70
71private:
72 int_vector<8> m_small_lcp; // vector for LCP values < 255
73 int_vector<t_width> m_big_lcp; // vector for LCP values > 254
74 int_vector<t_width> m_big_lcp_idx; // index of LCP entries in the LCP array
75
76 typedef std::pair<size_type, size_type> tPII;
77 typedef std::vector<tPII> tVPII;
78
79public:
81 lcp_byte() = default;
82 lcp_byte(lcp_byte const &) = default;
83 lcp_byte(lcp_byte &&) = default;
84 lcp_byte & operator=(lcp_byte const &) = default;
85 lcp_byte & operator=(lcp_byte &&) = default;
86
89 {
90 std::string lcp_file = cache_file_name(conf::KEY_LCP, config);
91 int_vector_buffer<> lcp_buf(lcp_file);
92 m_small_lcp = int_vector<8>(lcp_buf.size());
93 size_type l = 0, max_l = 0, max_big_idx = 0, big_sum = 0;
94
95 for (size_type i = 0; i < m_small_lcp.size(); ++i)
96 {
97 if ((l = lcp_buf[i]) < 255)
98 {
99 m_small_lcp[i] = l;
100 }
101 else
102 {
103 m_small_lcp[i] = 255;
104 if (l > max_l)
105 max_l = l;
106 max_big_idx = i;
107 ++big_sum;
108 }
109 }
110 m_big_lcp = int_vector<>(big_sum, 0, bits::hi(max_l) + 1);
111 m_big_lcp_idx = int_vector<>(big_sum, 0, bits::hi(max_big_idx) + 1);
112
113 for (size_type i = 0, ii = 0; i < m_small_lcp.size(); ++i)
114 {
115 if ((l = lcp_buf[i]) >= 255)
116 {
117 m_big_lcp[ii] = l;
118 m_big_lcp_idx[ii] = i;
119 ++ii;
120 }
121 }
122 }
123
126 {
127 return m_small_lcp.size();
128 }
129
132 {
134 }
135
137 bool empty() const
138 {
139 return m_small_lcp.empty();
140 }
141
144 {
145 return const_iterator(this, 0);
146 }
147
150 {
151 return const_iterator(this, size());
152 }
153
155
159 {
160 if (m_small_lcp[i] != 255)
161 {
162 return m_small_lcp[i];
163 }
164 else
165 {
166 size_type idx = std::lower_bound(m_big_lcp_idx.begin(), m_big_lcp_idx.end(), i) - m_big_lcp_idx.begin();
167 return m_big_lcp[idx];
168 }
169 }
170
172 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
173 {
174 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
175 size_type written_bytes = 0;
176 written_bytes += m_small_lcp.serialize(out, child, "small_lcp");
177 written_bytes += m_big_lcp.serialize(out, child, "large_lcp");
178 written_bytes += m_big_lcp_idx.serialize(out, child, "large_lcp_idx");
179 structure_tree::add_size(child, written_bytes);
180 return written_bytes;
181 }
182
184 void load(std::istream & in)
185 {
186 m_small_lcp.load(in);
187 m_big_lcp.load(in);
188 m_big_lcp_idx.load(in);
189 }
190
191 template <typename archive_t>
192 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
193 {
194 ar(CEREAL_NVP(m_small_lcp));
195 ar(CEREAL_NVP(m_big_lcp));
196 ar(CEREAL_NVP(m_big_lcp_idx));
197 }
198
199 template <typename archive_t>
200 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
201 {
202 ar(CEREAL_NVP(m_small_lcp));
203 ar(CEREAL_NVP(m_big_lcp));
204 ar(CEREAL_NVP(m_big_lcp_idx));
205 }
206
208 bool operator==(lcp_byte const & other) const noexcept
209 {
210 return (m_small_lcp == other.m_small_lcp) && (m_big_lcp == other.m_big_lcp)
211 && (m_big_lcp_idx == other.m_big_lcp_idx);
212 }
213
215 bool operator!=(lcp_byte const & other) const noexcept
216 {
217 return !(*this == other);
218 }
219};
220
221} // end namespace sdsl
222#endif
bits.hpp contains the sdsl::bits class.
cereal.hpp offers cereal support
#define CEREAL_NVP(X)
Definition cereal.hpp:31
uint64_t size() const
Returns the number of elements currently stored.
A generic vector class for integers of width .
Definition io.hpp:36
int_vector_size_type size_type
iterator end() noexcept
Iterator that points to the element after the last element of int_vector.
bool empty() const noexcept
Equivalent to size() == 0.
void load(std::istream &in)
Load the int_vector for a stream.
static size_type max_size() noexcept
Maximum size of the int_vector.
size_type size() const noexcept
The number of elements in the int_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
int_vector_trait< t_width >::value_type value_type
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
A class for a simple compressed version of LCP information.
Definition lcp_byte.hpp:46
lcp_byte(cache_config &config)
Constructor.
Definition lcp_byte.hpp:88
const_iterator end() const
Returns a const_iterator to the element after the last element.
Definition lcp_byte.hpp:149
void load(std::istream &in)
Load from a stream.
Definition lcp_byte.hpp:184
lcp_byte()=default
Default Constructor.
lcp_byte & operator=(lcp_byte &&)=default
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition lcp_byte.hpp:200
const value_type const_reference
Definition lcp_byte.hpp:51
bool operator==(lcp_byte const &other) const noexcept
Equality operator.
Definition lcp_byte.hpp:208
lcp_byte(lcp_byte const &)=default
value_type operator[](size_type i) const
[]-operator
Definition lcp_byte.hpp:158
const pointer const_pointer
Definition lcp_byte.hpp:54
const_iterator iterator
Definition lcp_byte.hpp:50
bool operator!=(lcp_byte const &other) const noexcept
Inequality operator.
Definition lcp_byte.hpp:215
ptrdiff_t difference_type
Definition lcp_byte.hpp:56
const_reference * pointer
Definition lcp_byte.hpp:53
lcp_tag index_tag
Definition lcp_byte.hpp:59
lcp_byte(lcp_byte &&)=default
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serialize to a stream.
Definition lcp_byte.hpp:172
lcp_byte & operator=(lcp_byte const &)=default
const_reference reference
Definition lcp_byte.hpp:52
int_vector< t_width >::value_type value_type
Definition lcp_byte.hpp:48
size_type size() const
Number of elements in the instance.
Definition lcp_byte.hpp:125
random_access_const_iterator< lcp_byte > const_iterator
Definition lcp_byte.hpp:49
static size_type max_size()
Returns the largest size that lcp_byte can ever have.
Definition lcp_byte.hpp:131
lcp_plain_tag lcp_category
Definition lcp_byte.hpp:58
const_iterator begin() const
Returns a const_iterator to the first element.
Definition lcp_byte.hpp:143
int_vector ::size_type size_type
Definition lcp_byte.hpp:55
bool empty() const
Returns if the data strucutre is empty.
Definition lcp_byte.hpp:137
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition lcp_byte.hpp:192
Generic iterator for a random access container.
Definition iterators.hpp:24
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
io.hpp contains some methods for reading/writing sdsl structures.
iterators.hpp contains an generic iterator for random access containers.
constexpr char KEY_LCP[]
Definition config.hpp:43
Namespace for the succinct data structure library.
std::string cache_file_name(std::string const &key, cache_config const &config)
Returns the file name of the resource.
Definition io.hpp:688
Contains declarations and definitions of data structure concepts.
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition bits.hpp:653
Helper class for construction process.
Definition config.hpp:66
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.