21#ifndef TLX_SORT_STRINGS_STRING_SET_HEADER
22#define TLX_SORT_STRINGS_STRING_SET_HEADER
47template <
typename StringSet,
typename Traits>
52 typename Traits::String&
at(
size_t i)
const {
53 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
54 return *(ss.begin() + i);
61 bool is_equal(
const typename Traits::String& a,
62 const typename Traits::CharIterator& ai,
63 const typename Traits::String& b,
64 const typename Traits::CharIterator& bi)
const {
65 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
66 return !ss.is_end(a, ai) && !ss.is_end(b, bi) && (*ai == *bi);
70 bool is_less(
const typename Traits::String& a,
71 const typename Traits::CharIterator& ai,
72 const typename Traits::String& b,
73 const typename Traits::CharIterator& bi)
const {
74 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
75 return ss.is_end(a, ai) ||
76 (!ss.is_end(a, ai) && !ss.is_end(b, bi) && *ai < *bi);
80 bool is_leq(
const typename Traits::String& a,
81 const typename Traits::CharIterator& ai,
82 const typename Traits::String& b,
83 const typename Traits::CharIterator& bi)
const {
84 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
85 return ss.is_end(a, ai) ||
86 (!ss.is_end(a, ai) && !ss.is_end(b, bi) && *ai <= *bi);
95 get_char(
const typename Traits::String& s,
size_t depth)
const {
96 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
97 return *ss.get_chars(s, depth);
103 const typename Traits::String& s,
typename Traits::CharIterator i)
const {
104 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
106 if (ss.is_end(s, i))
return 0;
107 return std::uint8_t(*i);
113 const typename Traits::String& s,
typename Traits::CharIterator i)
const {
114 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
117 if (ss.is_end(s, i))
return v;
118 v = (std::uint16_t(*i) << 8);
120 if (ss.is_end(s, i))
return v;
121 v |= (std::uint16_t(*i) << 0);
128 const typename Traits::String& s,
typename Traits::CharIterator i)
const {
129 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
132 if (ss.is_end(s, i))
return v;
133 v = (std::uint32_t(*i) << 24);
135 if (ss.is_end(s, i))
return v;
136 v |= (std::uint32_t(*i) << 16);
138 if (ss.is_end(s, i))
return v;
139 v |= (std::uint32_t(*i) << 8);
141 if (ss.is_end(s, i))
return v;
142 v |= (std::uint32_t(*i) << 0);
149 const typename Traits::String& s,
typename Traits::CharIterator i)
const {
150 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
153 if (ss.is_end(s, i))
return v;
154 v = (std::uint64_t(*i) << 56);
156 if (ss.is_end(s, i))
return v;
157 v |= (std::uint64_t(*i) << 48);
159 if (ss.is_end(s, i))
return v;
160 v |= (std::uint64_t(*i) << 40);
162 if (ss.is_end(s, i))
return v;
163 v |= (std::uint64_t(*i) << 32);
165 if (ss.is_end(s, i))
return v;
166 v |= (std::uint64_t(*i) << 24);
168 if (ss.is_end(s, i))
return v;
169 v |= (std::uint64_t(*i) << 16);
171 if (ss.is_end(s, i))
return v;
172 v |= (std::uint64_t(*i) << 8);
174 if (ss.is_end(s, i))
return v;
175 v |= (std::uint64_t(*i) << 0);
179 std::uint8_t
get_uint8(
const typename Traits::String& s,
size_t depth)
const {
180 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
181 return get_uint8(s, ss.get_chars(s, depth));
184 std::uint16_t
get_uint16(
const typename Traits::String& s,
size_t depth)
const {
185 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
189 std::uint32_t
get_uint32(
const typename Traits::String& s,
size_t depth)
const {
190 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
194 std::uint64_t
get_uint64(
const typename Traits::String& s,
size_t depth)
const {
195 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
202 StringSet
subi(
size_t begin,
size_t end)
const {
203 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
204 return ss.sub(ss.begin() + begin, ss.begin() + end);
208 const typename Traits::String& s2)
const {
209 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
211 typename StringSet::CharIterator c1 = ss.get_chars(s1, 0);
212 typename StringSet::CharIterator c2 = ss.get_chars(s2, 0);
214 while (ss.is_equal(s1, c1, s2, c2))
217 return ss.is_leq(s1, c1, s2, c2);
221 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
223 for (
typename Traits::Iterator pi = ss.begin();
224 pi + 1 != ss.end(); ++pi)
227 TLX_LOG1 <<
"check_order() failed at position " << pi - ss.begin();
235 const StringSet& ss = *
static_cast<const StringSet*
>(
this);
237 for (
typename Traits::Iterator pi = ss.begin(); pi != ss.end(); ++pi)
239 TLX_LOG1 <<
"[" << i++ <<
"] = " << *pi
240 <<
" = " << ss.get_string(*pi, 0);
245template <
typename Type,
typename StringSet>
247typename enable_if<
sizeof(Type) == 4, std::uint32_t>::type
249 const typename StringSet::String& s,
size_t depth) {
250 return strset.get_uint32(s, depth);
253template <
typename Type,
typename StringSet>
255typename enable_if<
sizeof(Type) == 8, std::uint64_t>::type
257 const typename StringSet::String& s,
size_t depth) {
258 return strset.get_uint64(s, depth);
261template <
typename Type,
typename StringSet>
263Type
get_key_at(
const StringSet& strset,
size_t idx,
size_t depth) {
273template <
typename CharType>
296template <
typename CharType>
300 GenericCharStringSetTraits<CharType> >
334 {
return s + depth; }
338 {
return (*i == 0); }
342 {
return std::string(
reinterpret_cast<const char*
>(s) + depth); }
350 {
return std::make_pair(
new String[n], n); }
354 {
delete[] c.first; c.first =
nullptr; }
422 {
return reinterpret_cast<CharIterator>(s.data()) + depth; }
426 {
return (i >=
reinterpret_cast<CharIterator>(s.data()) + s.size()); }
430 {
return s.substr(depth); }
438 {
return std::make_pair(
new String[n], n); }
442 {
delete[] c.first; c.first =
nullptr; }
462 typedef std::unique_ptr<std::string>
String;
480 public StringSetBase<UPtrStdStringSet, UPtrStdStringSetTraits>
506 {
return reinterpret_cast<CharIterator>(s->data()) + depth; }
510 {
return (i >=
reinterpret_cast<CharIterator>(s->data()) + s->size()); }
514 {
return s->substr(depth); }
522 {
return std::make_pair(
new String[n], n); }
526 {
delete[] c.first; c.first =
nullptr; }
532 TLX_LOG1 <<
"[" << i++ <<
"] = " << pi->get()
562 typedef typename std::vector<String>::iterator
Iterator;
568 typedef std::pair<Text, std::vector<String> >
Container;
590 sa.resize(text.size());
591 for (
size_t i = 0; i < text.size(); ++i)
617 {
return text_->substr(s + depth); }
625 {
return std::make_pair(*
text_, std::vector<String>(n)); }
629 { std::vector<String> v; v.swap(c.second); }
Traits class implementing StringSet concept for char* and unsigned char* strings.
Char * String
String reference: pointer to first character.
const Char * CharIterator
iterator of characters in a string
String * Iterator
Iterator over string references: pointer over pointers.
CharType Char
exported alias for character type
std::pair< Iterator, size_t > Container
exported alias for assumed string container
Class implementing StringSet concept for char* and unsigned char* strings.
GenericCharStringSetTraits< CharType > Traits
Traits::Container Container
size_t size() const
Return size of string array.
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belong to this set.
static void deallocate(Container &c)
Deallocate a temporary string container.
String & operator[](Iterator i) const
Iterator-based array access (readable and writable) to String objects.
GenericCharStringSet(const Container &c)
Construct from a string container.
GenericCharStringSet(Iterator begin, Iterator end)
Construct from begin and end string pointers.
bool is_end(const String &, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
static Container allocate(size_t n)
Allocate a new temporary string container with n empty Strings.
std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
GenericCharStringSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
Traits::Iterator Iterator
Traits::CharIterator CharIterator
Class implementing StringSet concept for a std::string objects.
const Char * CharIterator
iterator of characters in a string
String * Iterator
Iterator over string references: pointer to std::string.
std::pair< Iterator, size_t > Container
exported alias for assumed string container
std::string String
String reference: std::string, which should be reference counted.
std::uint8_t Char
exported alias for character type
Iterator begin() const
Iterator representing first String position.
size_t size() const
Return size of string array.
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belongs to this set.
static void deallocate(Container &c)
Deallocate a temporary string container.
StdStringSet(Container &c)
Construct from a string container.
Iterator end() const
Iterator representing beyond last String position.
static Container allocate(size_t n)
Allocate a new temporary string container with n empty Strings.
std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
bool is_end(const String &s, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
StdStringSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
StdStringSet(const Iterator &begin, const Iterator &end)
Construct from begin and end string pointers.
Iterator begin_
pointers to std::string objects
String & operator[](const Iterator &i) const
Array access (readable and writable) to String objects.
Base class for common string set functions, included via CRTP.
Traits::String & at(size_t i) const
index-based array access (readable and writable) to String objects.
std::uint8_t get_uint8(const typename Traits::String &s, size_t depth) const
bool is_less(const typename Traits::String &a, const typename Traits::CharIterator &ai, const typename Traits::String &b, const typename Traits::CharIterator &bi) const
check if string a is less or equal to string b at iterators ai and bi.
std::uint64_t get_uint64(const typename Traits::String &s, size_t depth) const
std::uint16_t get_uint16(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 2 characters of string s at iterator i packed into a uint16_t (only works correctly for ...
bool is_equal(const typename Traits::String &a, const typename Traits::CharIterator &ai, const typename Traits::String &b, const typename Traits::CharIterator &bi) const
check equality of two strings a and b at char iterators ai and bi.
Traits::Char get_char(const typename Traits::String &s, size_t depth) const
StringSet subi(size_t begin, size_t end) const
Subset this string set using index range.
bool check_order(const typename Traits::String &s1, const typename Traits::String &s2) const
bool is_leq(const typename Traits::String &a, const typename Traits::CharIterator &ai, const typename Traits::String &b, const typename Traits::CharIterator &bi) const
check if string a is less or equal to string b at iterators ai and bi.
std::uint16_t get_uint16(const typename Traits::String &s, size_t depth) const
std::uint8_t get_uint8(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 1 characters of string s at iterator i packed into a uint8_t (only works correctly for 8...
std::uint64_t get_uint64(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 8 characters of string s at iterator i packed into a uint64_t (only works correctly for ...
std::uint32_t get_uint32(const typename Traits::String &s, size_t depth) const
std::uint32_t get_uint32(const typename Traits::String &s, typename Traits::CharIterator i) const
Return up to 4 characters of string s at iterator i packed into a uint32_t (only works correctly for ...
Class implementing StringSet concept for suffix sorting indexes of a std::string text object.
std::string Text
exported alias for assumed string container
std::pair< Text, std::vector< String > > Container
exported alias for assumed string container
Text::size_type String
String reference: suffix index of the text.
const Char * CharIterator
iterator of characters in a string
std::vector< String >::iterator Iterator
Iterator over string references: using std::vector's iterator over suffix array vector.
std::uint8_t Char
exported alias for character type
Class implementing StringSet concept for suffix sorting indexes of a std::string text object.
Iterator begin() const
Iterator representing first String position.
size_t size() const
Return size of string array.
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belongs to this set.
static void deallocate(Container &c)
Deallocate a temporary string container.
Iterator end() const
Iterator representing beyond last String position.
bool is_end(const String &, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
StringSuffixSet(const Text &text, const Iterator &begin, const Iterator &end)
Construct from begin and end string pointers.
std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
StringSuffixSet(Container &c)
Construct from a string container.
Container allocate(size_t n) const
Allocate a new temporary string container with n empty Strings.
const Text * text_
reference to base text
Iterator begin_
iterators inside the output suffix array.
StringSuffixSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
static StringSuffixSet Initialize(const Text &text, std::vector< String > &sa)
Initializing constructor which fills output vector sa with indices.
String & operator[](const Iterator &i) const
Array access (readable and writable) to String objects.
Class implementing StringSet concept for a std::unique_ptr<std::string objects, which are non-copyabl...
const Char * CharIterator
iterator of characters in a string
String * Iterator
Iterator over string references: using std::vector's iterator.
std::unique_ptr< std::string > String
String reference: std::string, which should be reference counted.
std::pair< Iterator, size_t > Container
exported alias for assumed string container
std::uint8_t Char
exported alias for character type
Iterator begin() const
Iterator representing first String position.
size_t size() const
Return size of string array.
CharIterator get_chars(const String &s, size_t depth) const
Return CharIterator for referenced string, which belongs to this set.
UPtrStdStringSet(Container &c)
Construct from a string container.
static void deallocate(Container &c)
Deallocate a temporary string container.
Iterator end() const
Iterator representing beyond last String position.
static Container allocate(size_t n)
Allocate a new temporary string container with n empty Strings.
UPtrStdStringSet(const Iterator &begin, const Iterator &end)
Construct from begin and end string pointers.
std::string get_string(const String &s, size_t depth=0) const
Return complete string (for debugging purposes)
bool is_end(const String &s, const CharIterator &i) const
Returns true if CharIterator is at end of the given String.
UPtrStdStringSet sub(Iterator begin, Iterator end) const
Subset this string set using iterator range.
Iterator begin_
vector of std::string objects
String & operator[](const Iterator &i) const
Array access (readable and writable) to String objects.
Type get_key_at(const StringSet &strset, size_t idx, size_t depth)
GenericCharStringSet< const char > CCharStringSet
enable_if< sizeof(Type)==4, std::uint32_t >::type get_key(const StringSet &strset, const typename StringSet::String &s, size_t depth)
GenericCharStringSet< char > CharStringSet
GenericCharStringSet< const unsigned char > CUCharStringSet
GenericCharStringSet< unsigned char > UCharStringSet
SFINAE enable_if – copy of std::enable_if<> with less extra cruft.