SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
enc_vector.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef SDSL_ENC_VECTOR
9#define SDSL_ENC_VECTOR
10
11#include <assert.h>
12#include <iosfwd>
13#include <stddef.h>
14#include <stdint.h>
15#include <string>
16
17#include <sdsl/bits.hpp>
18#include <sdsl/cereal.hpp>
20#include <sdsl/int_vector.hpp>
22#include <sdsl/io.hpp>
23#include <sdsl/iterators.hpp>
26#include <sdsl/util.hpp>
27
29namespace sdsl
30{
31
32template <uint8_t t_width>
37
38template <>
43
44template <>
49
51
61template <class t_coder = coder::elias_delta<>, uint32_t t_dens = 128, uint8_t t_width = 0>
63{
64private:
65 static_assert(t_dens > 1, "enc_vector: sample density must be larger than `1`");
66
67public:
68 typedef uint64_t value_type;
71 typedef const value_type reference;
73 typedef value_type const * const_pointer;
74 typedef ptrdiff_t difference_type;
76 typedef t_coder coder;
79 static const uint32_t sample_dens = t_dens;
81
82 int_vector<0> m_z; // storage for encoded deltas
83
84private:
85 int_vector_type m_sample_vals_and_pointer; // samples and pointers
86 size_type m_size = 0; // number of vector elements
87
88 void clear()
89 {
90 m_z.resize(0);
92 m_size = 0;
93 m_sample_vals_and_pointer.resize(0);
94 m_sample_vals_and_pointer.shrink_to_fit();
95 }
96
97public:
98 enc_vector() = default;
99 enc_vector(enc_vector const &) = default;
100 enc_vector(enc_vector &&) = default;
101 enc_vector & operator=(enc_vector const &) = default;
103
105
107 template <class Container>
108 enc_vector(Container const & c);
109
111 /*
112 * \param v_buf A int_vector_buf.
113 */
114 template <uint8_t int_width>
116
119 {}
120
123 {
124 return m_size;
125 }
126
129 {
130 return int_vector<>::max_size() / 2;
131 }
132
134 bool empty() const
135 {
136 return 0 == m_size;
137 }
138
140 const const_iterator begin() const
141 {
142 return const_iterator(this, 0);
143 }
144
146 const const_iterator end() const
147 {
148 return const_iterator(this, this->m_size);
149 }
150
151 bool operator==(enc_vector const & v) const
152 {
153 return m_size && v.m_size && m_z == v.m_z && m_sample_vals_and_pointer == v.m_sample_vals_and_pointer;
154 }
155
156 bool operator!=(enc_vector const & v) const
157 {
158 return !(*this == v);
159 }
160
162
165
167
170 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const;
171
173 void load(std::istream & in);
174
175 template <typename archive_t>
176 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const;
177
178 template <typename archive_t>
179 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar);
180
182
185 value_type sample(const size_type i) const;
186
187 uint32_t get_sample_dens() const
188 {
189 return t_dens;
190 }
191
196 void get_inter_sampled_values(const size_type i, uint64_t * it) const
197 {
198 *(it++) = 0;
199 if (i * t_dens + t_dens - 1 < size())
200 {
201 t_coder::template decode<true, true>(m_z.data(), m_sample_vals_and_pointer[(i << 1) + 1], t_dens - 1, it);
202 }
203 else
204 {
205 assert(i * t_dens < size());
206 t_coder::template decode<true, true>(m_z.data(),
207 m_sample_vals_and_pointer[(i << 1) + 1],
208 size() - i * t_dens - 1,
209 it);
210 }
211 };
212};
213
214template <class t_coder, uint32_t t_dens, uint8_t t_width>
217{
218 assert(i + 1 != 0);
219 assert(i < m_size);
220 size_type idx = i / get_sample_dens();
221 return m_sample_vals_and_pointer[idx << 1]
222 + t_coder::decode_prefix_sum(m_z.data(), m_sample_vals_and_pointer[(idx << 1) + 1], i - t_dens * idx);
223}
224
225template <class t_coder, uint32_t t_dens, uint8_t t_width>
228{
229 assert(i * get_sample_dens() + 1 != 0);
230 assert(i * get_sample_dens() < m_size);
231 return m_sample_vals_and_pointer[i << 1];
232}
233
234template <class t_coder, uint32_t t_dens, uint8_t t_width>
235template <class Container>
237{
238 // clear bit_vectors
239 clear();
240
241 if (c.empty()) // if c is empty there is nothing to do...
242 return;
243 typename Container::const_iterator it = c.begin(), end = c.end();
244 typename Container::value_type v1 = *it, v2, max_sample_value = 0, x;
245 size_type samples = 0;
246 size_type z_size = 0;
247 // (1) Calculate maximal value of samples and of deltas
248 for (size_type i = 0, no_sample = 0; it != end; ++it, ++i, --no_sample)
249 {
250 v2 = *it;
251 if (!no_sample)
252 { // add a sample
253 no_sample = get_sample_dens();
254 if (max_sample_value < v2)
255 max_sample_value = v2;
256 ++samples;
257 }
258 else
259 {
260 z_size += t_coder::encoding_length(v2 - v1);
261 }
262 v1 = v2;
263 }
264 // (2) Write sample values and deltas
265 {
266 if (max_sample_value > z_size + 1)
267 m_sample_vals_and_pointer.width(bits::hi(max_sample_value) + 1);
268 else
269 m_sample_vals_and_pointer.width(bits::hi(z_size + 1) + 1);
270 m_sample_vals_and_pointer.resize(2 * samples + 2); // add 2 for last entry
271 util::set_to_value(m_sample_vals_and_pointer, 0);
272 typename int_vector_type::iterator sv_it = m_sample_vals_and_pointer.begin();
273 z_size = 0;
274 size_type no_sample = 0;
275 for (it = c.begin(); it != end; ++it, --no_sample)
276 {
277 v2 = *it;
278 if (!no_sample)
279 { // add a sample
280 no_sample = get_sample_dens();
281 *sv_it = v2;
282 ++sv_it;
283 *sv_it = z_size;
284 ++sv_it;
285 }
286 else
287 {
288 x = v2 - v1;
289 z_size += t_coder::encoding_length(x);
290 }
291 v1 = v2;
292 }
293 *sv_it = 0;
294 ++sv_it; // initialize
295 *sv_it = z_size + 1;
296 ++sv_it; // last entry
297
298 m_z = int_vector<>(z_size, 0, 1);
299 uint64_t * z_data = t_coder::raw_data(m_z);
300 uint8_t offset = 0;
301 no_sample = 0;
302 for (it = c.begin(); it != end; ++it, --no_sample)
303 {
304 v2 = *it;
305 if (!no_sample)
306 { // add a sample
307 no_sample = get_sample_dens();
308 }
309 else
310 {
311 t_coder::encode(v2 - v1, z_data, offset);
312 }
313 v1 = v2;
314 }
315 }
316 m_size = c.size();
317}
318
319template <class t_coder, uint32_t t_dens, uint8_t t_width>
320template <uint8_t int_width>
322{
323 // clear bit_vectors
324 clear();
325 size_type n = v_buf.size();
326 if (n == 0) // if c is empty there is nothing to do...
327 return;
328 value_type v1 = 0, v2 = 0, max_sample_value = 0;
329 size_type samples = 0, z_size = 0;
330 const size_type sd = get_sample_dens();
331 // (1) Calculate maximal value of samples and of deltas
332 for (size_type i = 0, no_sample = 0; i < n; ++i, --no_sample)
333 {
334 v2 = v_buf[i];
335 if (!no_sample)
336 { // is sample
337 no_sample = sd;
338 if (max_sample_value < v2)
339 max_sample_value = v2;
340 ++samples;
341 }
342 else
343 {
344 z_size += t_coder::encoding_length(v2 - v1);
345 }
346 v1 = v2;
347 }
348
349 // (2) Write sample values and deltas
350 // (a) Initialize array for sample values and pointers
351 if (max_sample_value > z_size + 1)
352 m_sample_vals_and_pointer.width(bits::hi(max_sample_value) + 1);
353 else
354 m_sample_vals_and_pointer.width(bits::hi(z_size + 1) + 1);
355 m_sample_vals_and_pointer.resize(2 * samples + 2); // add 2 for last entry
356 util::set_to_value(m_sample_vals_and_pointer, 0);
357
358 // (b) Initilize bit_vector for encoded data
359 m_z = int_vector<>(z_size, 0, 1);
360 uint64_t * z_data = t_coder::raw_data(m_z);
361 uint8_t offset = 0;
362
363 // (c) Write sample values and deltas
364 z_size = 0;
365 for (size_type i = 0, j = 0, no_sample = 0; i < n; ++i, --no_sample)
366 {
367 v2 = v_buf[i];
368 if (!no_sample)
369 { // is sample
370 no_sample = sd;
371 m_sample_vals_and_pointer[j++] = v2; // write samples
372 m_sample_vals_and_pointer[j++] = z_size; // write pointers
373 }
374 else
375 {
376 z_size += t_coder::encoding_length(v2 - v1);
377 t_coder::encode(v2 - v1, z_data, offset); // write encoded values
378 }
379 v1 = v2;
380 }
381 m_size = n;
382}
383
384template <class t_coder, uint32_t t_dens, uint8_t t_width>
386enc_vector<t_coder, t_dens, t_width>::serialize(std::ostream & out, structure_tree_node * v, std::string name) const
387{
388 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
389 size_type written_bytes = 0;
390 written_bytes += write_member(m_size, out, child, "size");
391 written_bytes += m_z.serialize(out, child, "encoded deltas");
392 written_bytes += m_sample_vals_and_pointer.serialize(out, child, "samples_and_pointers");
393 structure_tree::add_size(child, written_bytes);
394 return written_bytes;
395}
396
397template <class t_coder, uint32_t t_dens, uint8_t t_width>
399{
400 read_member(m_size, in);
401 m_z.load(in);
402 m_sample_vals_and_pointer.load(in);
403}
404
405template <class t_coder, uint32_t t_dens, uint8_t t_width>
406template <typename archive_t>
408{
409 ar(CEREAL_NVP(m_size));
410 ar(CEREAL_NVP(m_z));
411 ar(CEREAL_NVP(m_sample_vals_and_pointer));
412}
413
414template <class t_coder, uint32_t t_dens, uint8_t t_width>
415template <typename archive_t>
417{
418 ar(CEREAL_NVP(m_size));
419 ar(CEREAL_NVP(m_z));
420 ar(CEREAL_NVP(m_sample_vals_and_pointer));
421}
422
423} // end namespace sdsl
424#endif
bits.hpp contains the sdsl::bits class.
cereal.hpp offers cereal support
#define CEREAL_NVP(X)
Definition cereal.hpp:31
#define CEREAL_LOAD_FUNCTION_NAME
Definition cereal.hpp:34
#define CEREAL_SAVE_FUNCTION_NAME
Definition cereal.hpp:35
A generic immutable space-saving vector class for unsigned integers.
enc_vector(enc_vector const &)=default
enc_vector & operator=(enc_vector &&)=default
enc_vector enc_vec_type
const value_type const_reference
uint32_t get_sample_dens() const
enc_vector()=default
random_access_const_iterator< enc_vector > iterator
enc_vector_trait< t_width >::int_vector_type int_vector_type
const const_iterator begin() const
Iterator that points to the first element of the enc_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serialize the enc_vector to a stream.
void load(std::istream &in)
Load the enc_vector from a stream.
static size_type max_size()
Return the largest size that this container can ever have.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
value_type operator[](size_type i) const
operator[]
const value_type reference
uint64_t value_type
value_type sample(const size_type i) const
Returns the i-th sample of enc_vector.
int_vector ::size_type size_type
void get_inter_sampled_values(const size_type i, uint64_t *it) const
const const_iterator end() const
Iterator that points to the position after the last element of the enc_vector.
~enc_vector()
Default Destructor.
static const uint32_t sample_dens
bool operator==(enc_vector const &v) const
value_type const * const_pointer
size_type size() const
The number of elements in the enc_vector.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
ptrdiff_t difference_type
enc_vector & operator=(enc_vector const &)=default
int_vector< 0 > m_z
iterator const_iterator
bool operator!=(enc_vector const &v) const
enc_vector(enc_vector &&)=default
bool empty() const
Returns if the enc_vector is empty.
uint64_t size() const
Returns the number of elements currently stored.
A generic vector class for integers of width .
uint64_t const * data() const noexcept
Pointer to the raw data of the int_vector.
void shrink_to_fit()
Free unused allocated memory.
static size_type max_size() noexcept
Maximum size of the int_vector.
void resize(const size_type size)
Resize the int_vector in terms of elements.
Generic iterator for a random access container.
Definition iterators.hpp:24
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
coder_elias_delta.hpp contains the class sdsl::coder::elias_delta
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
io.hpp contains some methods for reading/writing sdsl structures.
iterators.hpp contains an generic iterator for random access containers.
void set_to_value(t_int_vec &v, uint64_t k)
Set all entries of int_vector to value k.
Definition util.hpp:597
Namespace for the succinct data structure library.
size_t write_member(T const &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition io.hpp:94
void read_member(T &t, std::istream &in)
Definition io.hpp:119
Contains declarations and definitions of data structure concepts.
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition bits.hpp:653
int_vector< 32 > int_vector_type
int_vector< 64 > int_vector_type
int_vector< 0 > int_vector_type
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.