SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
construct_lcp_helper.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
4#ifndef INCLUDED_SDSL_CONSTRUCT_LCP_HELPER
5#define INCLUDED_SDSL_CONSTRUCT_LCP_HELPER
6
7#include <istream>
8#include <list>
9#include <queue>
10#include <stdint.h>
11#include <string.h>
12#include <string>
13#include <vector>
14
15#include <sdsl/bits.hpp>
16#include <sdsl/int_vector.hpp>
18#include <sdsl/ram_fs.hpp>
19#include <sdsl/util.hpp>
20
21namespace sdsl
22{
23
25
34inline void insert_lcp_values(int_vector<> & partial_lcp,
35 bit_vector & index_done,
36 std::string lcp_file,
37 uint64_t max_lcp_value,
38 uint64_t lcp_value_offset)
39{
40 std::string tmp_lcp_file = lcp_file + "_TMP";
41 const uint64_t buffer_size = 1000000; // has to be a multiple of 64
43 int_vector_buffer<> lcp_buffer(lcp_file, std::ios::in, buffer_size); // open lcp_file
44 uint64_t n = lcp_buffer.size();
45
46 // open tmp_lcp_file
47 uint8_t int_width = bits::hi(max_lcp_value) + 1;
48 int_vector_buffer<> out_buf(tmp_lcp_file, std::ios::out, buffer_size, int_width); // Output buffer
49 // Write values into buffer
50 for (size_type i = 0, calc_idx = 0; i < n; ++i)
51 {
52 if (index_done[i])
53 { // If value was already calculated
54 out_buf[i] = lcp_buffer[i]; // Copy value
55 }
56 else
57 {
58 if (partial_lcp[calc_idx])
59 { // If value was calculated now
60 // Insert value
61 out_buf[i] = partial_lcp[calc_idx] + lcp_value_offset;
62 index_done[i] = true;
63 }
64 ++calc_idx;
65 }
66 }
67 lcp_buffer.close();
68 out_buf.close();
69 // Close file and replace old file with new one
70 sdsl::rename(tmp_lcp_file, lcp_file);
71}
72
73template <class tWT>
74void create_C_array(std::vector<uint64_t> & C, tWT const & wt)
75{
76 uint64_t quantity; // quantity of characters in interval
77 std::vector<unsigned char> cs(wt.sigma); // list of characters in the interval
78 std::vector<uint64_t> rank_c_i(wt.sigma); // number of occurrence of character in [0 .. i-1]
79 std::vector<uint64_t> rank_c_j(wt.sigma); // number of occurrence of character in [0 .. j-1]
80
81 C = std::vector<uint64_t>(257, 0);
82 interval_symbols(wt, 0, wt.size(), quantity, cs, rank_c_i, rank_c_j);
83 for (uint64_t i = 0; i < quantity; ++i)
84 {
85 unsigned char c = cs[i];
86 C[c + 1] = rank_c_j[i];
87 }
88 for (uint64_t i = 1; i < C.size() - 1; ++i)
89 {
90 C[i + 1] += C[i];
91 }
92}
93
95{
96 typedef bit_vector::size_type size_type;
97 typedef std::queue<uint8_t> tQ;
98
99private:
100 static const uint32_t m_buffer_size = 10000; // 409600;
101 uint8_t m_write_buf[m_buffer_size];
102 uint8_t m_read_buf[m_buffer_size];
103 size_type m_widx; // write index
104 size_type m_ridx; // read index
105 bool m_sync; // are read and write buffer the same?
106 size_type m_disk_buffered_blocks; // number of blocks written to disk and not read again yet
107 char m_c;
108 size_type m_rb; // read blocks
109 size_type m_wb; // written blocks
110
111 std::string m_file_name;
112
113 std::fstream m_stream;
114
115public:
116 buffered_char_queue() : m_widx(0), m_ridx(0), m_sync(true), m_disk_buffered_blocks(0), m_c('?'), m_rb(0), m_wb(0)
117 {}
118
119 void init(std::string const & dir, char c)
120 {
121 m_c = c;
122 m_file_name = dir + "buffered_char_queue_" + util::to_string(util::pid());
123 // m_stream.rdbuf()->pubsetbuf(0, 0);
124 }
125
127 {
128 m_stream.close();
129 sdsl::remove(m_file_name);
130 }
131
132 void push_back(uint8_t x)
133 {
134 m_write_buf[m_widx] = x;
135 if (m_sync)
136 {
137 m_read_buf[m_widx] = x;
138 }
139 ++m_widx;
140 if (m_widx == m_buffer_size)
141 {
142 if (!m_sync)
143 { // if not sync, write block to disk
144 if (!m_stream.is_open())
145 {
146 m_stream.open(m_file_name, std::ios::in | std::ios::out | std::ios::binary | std::ios::trunc);
147 }
148 m_stream.seekp(m_buffer_size * (m_wb++), std::ios::beg);
149 m_stream.write((char *)m_write_buf, m_buffer_size);
150 ++m_disk_buffered_blocks;
151 }
152 m_sync = 0;
153 m_widx = 0;
154 }
155 }
156
157 uint8_t pop_front()
158 {
159 uint8_t x = m_read_buf[m_ridx];
160 ++m_ridx;
161 if (m_ridx == m_buffer_size)
162 {
163 if (m_disk_buffered_blocks > 0)
164 {
165 m_stream.seekg(m_buffer_size * (m_rb++), std::ios::beg);
166 m_stream.read((char *)m_read_buf, m_buffer_size);
167 --m_disk_buffered_blocks;
168 }
169 else
170 { // m_disk_buffered_blocks == 0
171 m_sync = 1;
172 memcpy(m_read_buf, m_write_buf, m_widx + 1);
173 }
174 m_ridx = 0;
175 }
176 return x;
177 }
178};
179
180typedef std::list<int_vector<>::size_type> tLI;
181typedef std::vector<int_vector<>::size_type> tVI;
182
183template <class size_type_class>
184void push_front_m_index(size_type_class i,
185 uint8_t c,
186 tLI (&m_list)[256],
187 uint8_t (&m_chars)[256],
188 size_type_class & m_char_count)
189{
190 if (m_list[c].empty())
191 {
192 m_chars[m_char_count++] = c;
193 }
194 m_list[c].push_front(i);
195}
196
197template <class size_type_class>
198void push_back_m_index(size_type_class i,
199 uint8_t c,
200 tLI (&m_list)[256],
201 uint8_t (&m_chars)[256],
202 size_type_class & m_char_count)
203{
204 if (m_list[c].empty())
205 {
206 m_chars[m_char_count++] = c;
207 }
208 m_list[c].push_back(i);
209}
210
211} // namespace sdsl
212
213#endif
bits.hpp contains the sdsl::bits class.
void init(std::string const &dir, char c)
uint64_t size() const
Returns the number of elements currently stored.
void close(bool remove_file=false)
Close the int_vector_buffer.
int_vector_size_type size_type
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
uint64_t pid()
std::string to_string(T const &t, int w=1)
Namespace for the succinct data structure library.
void push_back_m_index(size_type_class i, uint8_t c, tLI(&m_list)[256], uint8_t(&m_chars)[256], size_type_class &m_char_count)
int remove(std::string const &)
Remove a file.
Definition ram_fs.hpp:221
void insert_lcp_values(int_vector<> &partial_lcp, bit_vector &index_done, std::string lcp_file, uint64_t max_lcp_value, uint64_t lcp_value_offset)
Merges a partial LCP array into the LCP array on disk.
void create_C_array(std::vector< uint64_t > &C, tWT const &wt)
int rename(std::string const &old_filename, std::string const &new_filename)
Rename a file.
Definition ram_fs.hpp:234
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
void interval_symbols(t_wt const &wt, typename t_wt::size_type i, typename t_wt::size_type j, typename t_wt::size_type &k, std::vector< typename t_wt::value_type > &cs, std::vector< typename t_wt::size_type > &rank_c_i, std::vector< typename t_wt::size_type > &rank_c_j)
For each symbol c in wt[i..j-1] get rank(i,c) and rank(j,c).
std::vector< int_vector<>::size_type > tVI
bool empty(range_type const &r)
Empty range check.
std::list< int_vector<>::size_type > tLI
void push_front_m_index(size_type_class i, uint8_t c, tLI(&m_list)[256], uint8_t(&m_chars)[256], size_type_class &m_char_count)
ram_fs.hpp
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition bits.hpp:653
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.