SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
construct_sa.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8
9#ifndef INCLUDED_SDSL_CONSTRUCT_SA
10#define INCLUDED_SDSL_CONSTRUCT_SA
11
12#include <iostream>
13#include <stdexcept>
14#include <stdint.h>
15#include <string>
16
17#include <sdsl/bits.hpp>
18#include <sdsl/config.hpp>
21#include <sdsl/divsufsort.hpp>
22#include <sdsl/int_vector.hpp>
25#include <sdsl/io.hpp>
26#include <sdsl/qsufsort.hpp>
27
28namespace sdsl
29{
30
32
53inline void construct_sa_se(cache_config & config)
54{
55 int_vector<8> text;
57
58 if (text.size() <= 2)
59 {
60 // If text is c$ or $ write suffix array [1, 0] or [0]
61 int_vector_buffer<> sa(cache_file_name(conf::KEY_SA, config), std::ios::out, 8, 2);
62 if (text.size() == 2)
63 {
64 sa.push_back(1);
65 }
66 sa.push_back(0);
67 }
68 else
69 {
71 }
73}
74
75namespace algorithm
76{
77
78//
79// Forward declarations
80//----------------------------------------------------------
81
83
89
90template <typename t_int_vec>
91void calculate_sa(unsigned char const * c, typename t_int_vec::size_type len, t_int_vec & sa)
92{
93 typedef typename t_int_vec::size_type size_type;
94 constexpr uint8_t t_width = t_int_vec::fixed_int_width;
95 if (len <= 1)
96 { // handle special case
97 sa.width(1);
98 sa.resize(len);
99 if (len > 0)
100 sa[0] = 0;
101 return;
102 }
103 bool small_file = (sizeof(len) <= 4 or len < 0x7FFFFFFFULL);
104 if (small_file)
105 {
106 uint8_t sa_width = sa.width();
107 if (32 == t_width or (0 == t_width and 32 >= sa_width))
108 {
109 sa.width(32);
110 sa.resize(len);
111 divsufsort(c, (int32_t *)sa.data(), (int32_t)len);
112 // copy integers back to the right positions
113 if (sa_width != 32)
114 {
115 for (size_type i = 0, p = 0; i < len; ++i, p += sa_width)
116 {
117 sa.set_int(p, sa.get_int(i << 5, 32), sa_width);
118 }
119 sa.width(sa_width);
120 sa.resize(len);
121 }
122 }
123 else
124 {
125 if (sa.width() < bits::hi(len) + 1)
126 {
127 throw std::logic_error("width of int_vector is to small for the text!!!");
128 }
129 int_vector<> sufarray(len, 0, 32);
130 divsufsort(c, (int32_t *)sufarray.data(), (int32_t)len);
131 sa.resize(len);
132 for (size_type i = 0; i < len; ++i)
133 {
134 sa[i] = sufarray[i];
135 }
136 }
137 }
138 else
139 {
140 uint8_t sa_width = sa.width();
141 sa.width(64);
142 sa.resize(len);
143 divsufsort64(c, (int64_t *)sa.data(), len);
144 // copy integers back to the right positions
145 if (sa_width != 64)
146 {
147 for (size_type i = 0, p = 0; i < len; ++i, p += sa_width)
148 {
149 sa.set_int(p, sa.get_int(i << 6, 64), sa_width);
150 }
151 sa.width(sa_width);
152 sa.resize(len);
153 }
154 }
155}
156
157} // end namespace algorithm
158
160
175template <uint8_t t_width>
177{
178 static_assert(t_width == 0 or t_width == 8,
179 "construct_sa: width must be `0` for integer alphabet and `8` for byte alphabet");
181 if (t_width == 8)
182 {
183 if (construct_config().byte_algo_sa == LIBDIVSUFSORT)
184 {
186 auto sa = write_out_mapper<0>::create(cache_file_name(conf::KEY_SA, config), 0, bits::hi(text.size()) + 1);
187 // call divsufsort
188 algorithm::calculate_sa((unsigned char const *)text.data(), text.size(), sa);
189 }
190 else if (construct_config().byte_algo_sa == SE_SAIS)
191 {
192 construct_sa_se(config);
193 }
194 }
195 else if (t_width == 0)
196 {
197 // call qsufsort
198 int_vector<> sa;
200 store_to_cache(sa, conf::KEY_SA, config);
201 }
202 else
203 {
204 std::cerr << "Unknown alphabet type" << std::endl;
205 }
206}
207
208} // end namespace sdsl
209
210#endif
bits.hpp contains the sdsl::bits class.
void push_back(const uint64_t value)
Appends the given element value to the end of the int_vector_buffer.
uint64_t const * data() const
A generic vector class for integers of width .
uint64_t const * data() const noexcept
Pointer to the raw data of the int_vector.
size_type size() const noexcept
The number of elements in the int_vector.
static int_vector_mapper< t_width > create(std::string const &key, cache_config &config)
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
io.hpp contains some methods for reading/writing sdsl structures.
void calculate_sa(unsigned char const *c, typename t_int_vec::size_type len, t_int_vec &sa)
Calculates the Suffix Array for a text.
constexpr char KEY_SA[]
Definition config.hpp:36
constexpr char KEY_TEXT[]
Definition config.hpp:40
void construct_sa(int_vector_type &sa, char const *file, uint8_t num_bytes)
Construct a suffix array for the sequence stored in a file.
Definition qsufsort.hpp:72
Namespace for the succinct data structure library.
bool store_to_cache(T const &v, std::string const &key, cache_config &config, bool add_type_hash=false)
Stores the object v as a resource in the cache.
Definition io.hpp:804
@ LIBDIVSUFSORT
Definition config.hpp:60
@ SE_SAIS
Definition config.hpp:61
int32_t divsufsort(uint8_t const *T, saidx_t *SA, saidx_t n)
std::string cache_file_name(std::string const &key, cache_config const &config)
Returns the file name of the resource.
Definition io.hpp:688
char const * key_text_trait_impl< 0, T >::KEY_TEXT
Definition config.hpp:132
void construct_sa_se(cache_config &config)
Constructs the Suffix Array (SA) from text over byte-alphabet.
bool load_from_file(T &v, std::string const &file)
Load sdsl-object v from a file.
Definition io.hpp:989
void _construct_sa_se(int_vector_type &text, std::string filename_sa, uint64_t sigma, uint64_t recursion)
void register_cache_file(std::string const &key, cache_config &config)
Register the existing resource specified by the key to the cache.
Definition io.hpp:717
int32_t divsufsort64(uint8_t const *T, int64_t *SA, int64_t n)
void construct_sa(cache_config &config)
Constructs the Suffix Array (SA) from text over byte- or integer-alphabet.
construct_config_data & construct_config()
int_vector_mapper< t_width, std::ios_base::in > const read_only_mapper
qsufsort.hpp contains the interface for the suffix array construction algorithm of Larsson.
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition bits.hpp:653
Helper class for construction process.
Definition config.hpp:66