SDSL 3.0.3
Succinct Data Structure Library
Loading...
Searching...
No Matches
construct_bwt.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef INCLUDED_SDSL_CONSTRUCT_BWT
9#define INCLUDED_SDSL_CONSTRUCT_BWT
10
11#include <iostream>
12#include <stdint.h>
13#include <string>
14
15#include <sdsl/config.hpp>
16#include <sdsl/int_vector.hpp>
19#include <sdsl/io.hpp>
20#include <sdsl/ram_fs.hpp>
21
22namespace sdsl
23{
24
26
37template <uint8_t t_width>
39{
40 static_assert(t_width == 0 or t_width == 8,
41 "construct_bwt: width must be `0` for integer alphabet and `8` for byte alphabet");
42
46
47 // (1) Load text from disk
49 size_type n = text.size();
50 uint8_t bwt_width = text.width();
51 std::string bwt_file = cache_file_name(KEY_BWT, config);
52
53 auto gen_bwt = [&n](auto & bwt, auto & text, auto & sa)
54 {
55 size_type to_add[2] = {(size_type)-1, n - 1};
56 for (size_type i = 0; i < n; ++i)
57 {
58 bwt[i] = text[sa[i] + to_add[sa[i] == 0]];
59 }
60 };
61 // (2) Prepare to stream SA from disc and BWT to disc
62 if (is_ram_file(bwt_file))
63 {
65 auto bwt = write_out_mapper<t_width>::create(bwt_file, n, bwt_width);
66 gen_bwt(bwt, text, sa);
67 }
68 else
69 {
70 size_type buffer_size = 1000000; // buffer_size is a multiple of 8!
71 std::string sa_file = cache_file_name(conf::KEY_SA, config);
72 int_vector_buffer<> sa_buf(sa_file, std::ios::in, buffer_size);
73 auto bwt = write_out_mapper<t_width>::create(bwt_file, n, bwt_width);
74 // (3) Construct BWT sequentially by streaming SA and random access to text
75 gen_bwt(bwt, text, sa_buf);
76 }
78}
79
80} // namespace sdsl
81
82#endif
A generic vector class for integers of width .
static int_vector_mapper< t_width > create(std::string const &key, cache_config &config)
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
io.hpp contains some methods for reading/writing sdsl structures.
constexpr char KEY_SA[]
Definition config.hpp:36
Namespace for the succinct data structure library.
char const * key_bwt_trait_impl< 0, T >::KEY_BWT
Definition config.hpp:138
std::string cache_file_name(std::string const &key, cache_config const &config)
Returns the file name of the resource.
Definition io.hpp:688
char const * key_text_trait_impl< 0, T >::KEY_TEXT
Definition config.hpp:132
void register_cache_file(std::string const &key, cache_config &config)
Register the existing resource specified by the key to the cache.
Definition io.hpp:717
bool is_ram_file(std::string const &file)
Determines if the given file is a RAM-file.
Definition ram_fs.hpp:176
void construct_bwt(cache_config &config)
Constructs the Burrows and Wheeler Transform (BWT) from text over byte- or integer-alphabet and suffi...
int_vector_mapper< t_width, std::ios_base::in > const read_only_mapper
ram_fs.hpp
Helper class for construction process.
Definition config.hpp:66