41#include <initializer_list>
48#ifndef TINY_UTF8_THROW
49 #if defined(__cpp_exceptions) && !defined(TINY_UTF8_NOEXCEPT)
51 #define TINY_UTF8_THROW( LOCATION , FAILING_PREDICATE ) throw std::out_of_range( LOCATION ": " #FAILING_PREDICATE )
53 #define TINY_UTF8_THROW( ... ) void()
58#if defined(_MSC_VER) && defined(_MSVC_LANG)
59 #define TINY_UTF8_CPLUSPLUS _MSVC_LANG
61 #define TINY_UTF8_CPLUSPLUS __cplusplus
65#if TINY_UTF8_CPLUSPLUS >= 201703L
66 #define TINY_UTF8_FALLTHROUGH [[fallthrough]];
67#elif defined(__clang__)
69 #define TINY_UTF8_FALLTHROUGH
70#elif defined(__GNUC__) && __GNUG__ > 6
71 #define TINY_UTF8_FALLTHROUGH [[gnu::fallthrough]];
73 #define TINY_UTF8_FALLTHROUGH
78 #pragma clang diagnostic push
80#elif defined(__GNUC__)
81 #pragma GCC diagnostic push
82#elif defined(_MSC_VER)
84 #pragma warning(disable:4701)
85 #pragma warning(disable:4702)
86 #pragma warning(disable:4703)
87 #pragma warning(disable:26819)
91#if TINY_UTF8_CPLUSPLUS >= 201703L
92 #define TINY_UTF8_CPP17( ... ) __VA_ARGS__
94 #define TINY_UTF8_CPP17( ... )
98#if defined(TINY_UTF8_NOEXCEPT)
99 #undef TINY_UTF8_NOEXCEPT
100 #define TINY_UTF8_NOEXCEPT true
101#elif !defined(__cpp_exceptions)
102 #define TINY_UTF8_NOEXCEPT true
104 #define TINY_UTF8_NOEXCEPT false
108#ifdef TINY_UTF8_GLOBAL_NAMESPACE
116 typename ValueType = char32_t
117 ,
typename DataType = char
118 ,
typename Allocator = std::allocator<DataType>
127 #if defined(__cpp_char8_t)
134 namespace tiny_utf8_detail
141 #if defined(__GNUC__)
142 #define TINY_UTF8_HAS_CLZ true
143 static inline unsigned int clz(
unsigned int value )
noexcept {
return (
unsigned int)__builtin_clz( value ); }
144 static inline unsigned int clz(
unsigned long int value )
noexcept {
return (
unsigned int)__builtin_clzl( value ); }
145 static inline unsigned int clz(
char32_t value )
noexcept {
146 return sizeof(char32_t) ==
sizeof(
unsigned long int) ? (
unsigned int)__builtin_clzl( value ) : (
unsigned int)__builtin_clz( value );
148 #elif defined(_MSC_VER)
149 #define TINY_UTF8_HAS_CLZ true
151 static inline unsigned int lzcnt( T value )
noexcept {
152 unsigned long value_log2;
153 #if INTPTR_MAX >= INT64_MAX
154 _BitScanReverse64( &value_log2 , value );
156 _BitScanReverse( &value_log2 , value );
158 return sizeof(T) * 8 - value_log2 - 1;
160 static inline unsigned int clz( std::uint16_t value )
noexcept {
return lzcnt( value ); }
161 static inline unsigned int clz( std::uint32_t value )
noexcept {
return lzcnt( value ); }
162 #if INTPTR_MAX >= INT64_MAX
163 static inline unsigned int clz( std::uint64_t value )
noexcept {
return lzcnt( value ); }
165 static inline unsigned int clz(
char32_t value )
noexcept {
return lzcnt( value ); }
167 #define TINY_UTF8_HAS_CLZ false
174 constexpr static std::uint32_t u4 = 1;
175 constexpr static std::uint8_t u1 = (
const std::uint8_t &) u4;
181 template<
typename T, std::
size_t = sizeof(T)>
201 inline std::size_t
strlen(
const T* str ){ std::size_t len = 0u;
while( *str++ ) ++len;
return len; }
202 template<>
inline std::size_t
strlen<char>(
const char* str ){
return std::strlen( str ); }
206 template<
typename Container,
bool RangeCheck>
221 operator typename Container::value_type() const noexcept(
TINY_UTF8_NOEXCEPT || RangeCheck == false ) {
239 template<
typename Container,
bool RangeCheck>
254 t_index( reference.t_instance->get_num_bytes_from_start( reference.t_index ) )
259 operator typename Container::value_type() const noexcept(
TINY_UTF8_NOEXCEPT || RangeCheck == false ) {
263 return static_cast<const Container*
>(
t_instance)->raw_at(
t_index , std::nothrow );
282 template<
typename Container,
bool Raw>
285 template<
typename,
typename,
typename>
343 template<
typename Container>
346 template<
typename,
typename,
typename>
376 t_index( other.get_raw_index() )
420 template<
typename Container,
bool Raw>
struct iterator;
421 template<
typename Container,
bool Raw>
struct const_iterator;
422 template<
typename Container,
bool Raw>
struct reverse_iterator;
423 template<
typename Container,
bool Raw>
struct const_reverse_iterator;
425 template<
typename Container,
bool Raw = false>
493 template<
typename Container,
bool Raw>
518 template<
typename Container,
bool Raw>
594 template<
typename Container,
bool Raw>
633 template<
typename Container,
bool Raw>
634 static inline bool operator>(
const const_iterator<Container, false>& lhs ,
const const_iterator<Container, Raw>& rhs )
noexcept {
return lhs.get_index() > rhs.get_index(); }
635 template<
typename Container,
bool Raw>
636 static inline bool operator>(
const const_reverse_iterator<Container, false>& lhs ,
const const_reverse_iterator<Container, Raw>& rhs )
noexcept {
return lhs.get_index() < rhs.get_index(); }
637 template<
typename Container,
bool Raw>
638 static inline bool operator>=(
const const_iterator<Container, false>& lhs ,
const const_iterator<Container, Raw>& rhs )
noexcept {
return lhs.get_index() >= rhs.get_index(); }
639 template<
typename Container,
bool Raw>
640 static inline bool operator>=(
const const_reverse_iterator<Container, false>& lhs ,
const const_reverse_iterator<Container, Raw>& rhs )
noexcept {
return lhs.get_index() <= rhs.get_index(); }
641 template<
typename Container,
bool Raw>
642 static inline bool operator<(
const const_iterator<Container, false>& lhs ,
const const_iterator<Container, Raw>& rhs )
noexcept {
return lhs.get_index() < rhs.get_index(); }
643 template<
typename Container,
bool Raw>
644 static inline bool operator<(
const const_reverse_iterator<Container, false>& lhs ,
const const_reverse_iterator<Container, Raw>& rhs )
noexcept {
return lhs.get_index() > rhs.get_index(); }
645 template<
typename Container,
bool Raw>
646 static inline bool operator<=(
const const_iterator<Container, false>& lhs ,
const const_iterator<Container, Raw>& rhs )
noexcept {
return lhs.get_index() <= rhs.get_index(); }
647 template<
typename Container,
bool Raw>
648 static inline bool operator<=(
const const_reverse_iterator<Container, false>& lhs ,
const const_reverse_iterator<Container, Raw>& rhs )
noexcept {
return lhs.get_index() >= rhs.get_index(); }
650 template<
typename Container,
bool Raw>
651 static inline bool operator>(
const const_iterator<Container, true>& lhs ,
const const_iterator<Container, Raw>& rhs )
noexcept {
return lhs.get_raw_index() > rhs.get_raw_index(); }
652 template<
typename Container,
bool Raw>
653 static inline bool operator>(
const const_reverse_iterator<Container, true>& lhs ,
const const_reverse_iterator<Container, Raw>& rhs )
noexcept {
return lhs.get_raw_index() < rhs.get_raw_index(); }
654 template<
typename Container,
bool Raw>
655 static inline bool operator>=(
const const_iterator<Container, true>& lhs ,
const const_iterator<Container, Raw>& rhs )
noexcept {
return lhs.get_raw_index() >= rhs.get_raw_index(); }
656 template<
typename Container,
bool Raw>
657 static inline bool operator>=(
const const_reverse_iterator<Container, true>& lhs ,
const const_reverse_iterator<Container, Raw>& rhs )
noexcept {
return lhs.get_raw_index() <= rhs.get_raw_index(); }
658 template<
typename Container,
bool Raw>
659 static inline bool operator<(
const const_iterator<Container, true>& lhs ,
const const_iterator<Container, Raw>& rhs )
noexcept {
return lhs.get_raw_index() < rhs.get_raw_index(); }
660 template<
typename Container,
bool Raw>
661 static inline bool operator<(
const const_reverse_iterator<Container, true>& lhs ,
const const_reverse_iterator<Container, Raw>& rhs )
noexcept {
return lhs.get_raw_index() > rhs.get_raw_index(); }
662 template<
typename Container,
bool Raw>
663 static inline bool operator<=(
const const_iterator<Container, true>& lhs ,
const const_iterator<Container, Raw>& rhs )
noexcept {
return lhs.get_raw_index() <= rhs.get_raw_index(); }
664 template<
typename Container,
bool Raw>
665 static inline bool operator<=(
const const_reverse_iterator<Container, true>& lhs ,
const const_reverse_iterator<Container, Raw>& rhs )
noexcept {
return lhs.get_raw_index() >= rhs.get_raw_index(); }
671 template<
typename Container>
673 return lhs.get_index() - rhs.get_index();
675 template<
typename Container>
677 return rhs.get_index() - lhs.get_index();
680 template<
typename Container>
685 return max_index == lhs.
get_raw_index() ? num_codepoints : -num_codepoints;
687 template<
typename Container>
692 return max_index == rhs.
get_raw_index() ? num_codepoints : -num_codepoints;
707 typedef typename std::allocator_traits<Allocator>::size_type
size_type;
775 template<
size_type L>
777 template<
size_type L>
781 template<
typename T,
typename CharType,
typename _DataType =
bool>
783 std::is_pointer<typename std::remove_reference<T>::type>::value
787 ,
typename std::remove_cv<
788 typename std::remove_pointer<
789 typename std::remove_reference<T>::type
797 static inline bool is_lut_active(
const data_type* lut_base_ptr )
noexcept {
return *((
const unsigned char*)lut_base_ptr) & 0x1; }
810 *(
indicator_type*)lut_base_ptr = active ? ( lut_len << 1 ) | 0x1 : 0;
820 return buffer_size <= (
size_type)std::numeric_limits<std::uint8_t>::max() + 1
821 ?
sizeof(std::uint8_t)
822 : buffer_size <= (
size_type)std::numeric_limits<std::uint16_t>::max() + 1
823 ?
sizeof(std::uint16_t)
824 : buffer_size <= (
size_type)std::numeric_limits<std::uint32_t>::max() + 1
825 ?
sizeof(std::uint32_t)
826 :
sizeof(std::uint64_t)
832 size_type threshold = biased ? ( lut_present ? string_len / 3u : string_len / 6u ) : string_len / 4u;
834 return size_type( pot_lut_len - 1 ) < threshold;
840 data_len += lut_len * width_guess;
841 data_len += lut_len * ( ( *lut_width =
get_lut_width( data_len ) ) - width_guess );
861 case sizeof(std::uint8_t):
return *(
const std::uint8_t*)iter;
862 case sizeof(std::uint16_t):
return *(
const std::uint16_t*)iter;
863 case sizeof(std::uint32_t):
return *(
const std::uint32_t*)iter;
865 return (
size_type)*(
const std::uint64_t*)iter;
869 case sizeof(std::uint8_t): *(std::uint8_t*)iter = (std::uint8_t)value;
break;
870 case sizeof(std::uint16_t): *(std::uint16_t*)iter = (std::uint16_t)value;
break;
871 case sizeof(std::uint32_t): *(std::uint32_t*)iter = (std::uint32_t)value;
break;
872 case sizeof(std::uint64_t): *(std::uint64_t*)iter = (std::uint64_t)value;
break;
885 #if TINY_UTF8_HAS_CLZ
890 size_type codepoint_bytes = tiny_utf8_detail::clz( ~((
unsigned int)first_byte << (
sizeof(
unsigned int)-1)*8 ) );
908 #if TINY_UTF8_HAS_CLZ
912 1 , 1 , 1 , 1 , 1 , 1 , 1 , 2 , 2 , 2 , 2 , 3 , 3 , 3 , 3 , 3
913 , 4 , 4 , 4 , 4 , 4 , 5 , 5 , 5 , 5 , 5 , 6 , 6 , 6 , 6 , 6 , 7
915 return lut[ 31 - tiny_utf8_detail::clz( cp ) ];
919 else if( cp <= 0x7FF )
921 else if( cp <= 0xFFFF )
923 else if( cp <= 0x1FFFFF )
925 else if( cp <= 0x3FFFFFF )
927 else if( cp <= 0x7FFFFFFF )
941 cp &= 0x7F >> num_bytes;
943 cp = ( cp << 6 ) | ( (
unsigned char)
data[i] & 0x3F );
970 case 2: dest[cp_bytes-1] = 0x80 | ((cp >> 0) & 0x3F);
971 dest[0] = (
unsigned char)( ( std::uint_least16_t(0xFF00uL) >> cp_bytes ) | ( cp >> ( 6 * cp_bytes - 6 ) ) );
974 dest[0] = (
unsigned char)cp;
1040 template<
typename T>
1056 std::basic_string<data_type>
cpp_str_bom() const noexcept ;
1060 using appropriate_allocator =
typename std::allocator_traits<Allocator>::template rebind_alloc<size_type>;
1061 appropriate_allocator casted_allocator = (
const Allocator&)*
this;
1063 std::allocator_traits<appropriate_allocator>::allocate(
1072 using appropriate_allocator =
typename std::allocator_traits<Allocator>::template rebind_alloc<size_type>;
1073 appropriate_allocator casted_allocator = (
const Allocator&)*
this;
1074 std::allocator_traits<appropriate_allocator>::deallocate(
1076 ,
reinterpret_cast<size_type*
>( buffer )
1104 : Allocator( alloc )
1116 template<
typename T>
1155 template<
size_type LITLEN>
1158 : Allocator( alloc )
1160 std::memcpy(
t_sso.
data , str , LITLEN );
1161 if( str[LITLEN-1] ){
1168 template<
size_type LITLEN>
1171 :
basic_string( str , LITLEN - ( str[LITLEN-1] ? 0 : 1 ) , alloc , tiny_utf8_detail::read_bytes_tag() )
1181 template<
size_type LITLEN>
1184 :
basic_string( str , 0 , len , LITLEN - ( str[LITLEN-1] ? 0 : 1 ) , alloc , tiny_utf8_detail::read_codepoints_tag() )
1195 template<
size_type LITLEN>
1198 :
basic_string( str , pos , len , LITLEN - ( str[LITLEN-1] ? 0 : 1 ) , alloc , tiny_utf8_detail::read_codepoints_tag() )
1207 template<
typename C,
typename A>
1220 template<
typename C,
typename A>
1223 :
basic_string( str.
data() , 0 , len , str.
size() , alloc , tiny_utf8_detail::read_codepoints_tag() )
1225 template<
typename C,
typename A>
1228 :
basic_string( str.
data() , pos , len , str.
size() , alloc , tiny_utf8_detail::read_codepoints_tag() )
1268 template<
typename InputIt>
1271 : Allocator( alloc )
1274 while( first != last )
push_back( *first++ );
1286 std::memcpy( (
void*)&this->
t_sso , (
void*)&str.t_sso ,
sizeof(
SSO) );
1289 if( str.sso_inactive() ){
1292 std::memcpy(
t_non_sso.
data , str.t_non_sso.data , total_buffer_size );
1304 : Allocator( alloc )
1306 std::memcpy( (
void*)&this->
t_sso , (
void*)&str.t_sso ,
sizeof(
SSO) );
1309 if( str.sso_inactive() ){
1312 std::memcpy(
t_non_sso.
data , str.t_non_sso.data , total_buffer_size );
1324 template<typename T>
1329 template<
size_type LITLEN>
1332 :
basic_string( str , LITLEN - ( str[LITLEN-1] ? 0 : 1 ) , alloc )
1352 : Allocator( alloc )
1365 : Allocator( alloc )
1379 std::memcpy( (
void*)&this->
t_sso , (
void*)&str.t_sso ,
sizeof(
SSO) );
1380 str.set_sso_data_len( 0u );
1391 : Allocator( alloc )
1393 std::memcpy( (
void*)&this->
t_sso , (
void*)&str.t_sso ,
sizeof(
SSO) );
1394 str.set_sso_data_len( 0u );
1426 std::memcpy( (
void*)&this->
t_sso , (
void*)&str.t_sso ,
sizeof(
SSO) );
1427 str.set_sso_data_len(0);
1469 std::memcpy( &tmp , (
void*)&str.t_sso ,
sizeof(
SSO) );
1470 std::memcpy( (
void*)&str , (
void*)&this->
t_sso ,
sizeof(
SSO) );
1471 std::memcpy( (
void*)&this->t_sso , &tmp ,
sizeof(
SSO) );
1513 if( byte_index >=
size ){
1723 return decode_utf8( buffer + my_size - bytes , bytes );
1737 return replace( index , 1 , repl , n );
1764 return raw_replace( first.get_raw_index() , last.get_raw_index() - first.get_raw_index() ,
basic_string( n , repl ) );
1767 return raw_replace( first.get_raw_index() , last.get_raw_index() - first.get_raw_index() ,
basic_string( repl ) );
1784 return raw_replace( first.get_raw_index() , last.get_raw_index() - first.get_raw_index() , repl );
1910 return *
this = std::move(str);
1918 template<
typename T>
1930 template<
size_type LITLEN>
1940 template<
typename T>
1952 template<
size_type LITLEN>
1963 template<
typename InputIt>
2016 return raw_insert( it.get_raw_index() , str );
2066 return raw_erase( first.get_raw_index() , last.get_raw_index() - first.get_raw_index() );
2102 size_type byte_count = last.get_raw_index() - first.get_raw_index();
2103 return raw_substr( first.get_raw_index() , byte_count );
2120 return raw_substr( byte_start , byte_count );
2146 return start_codepoint;
2161 const data_type* result = std::strstr( buffer + actual_start , pattern.
data() );
2164 return start_codepoint +
get_num_codepoints( actual_start , result - ( buffer + actual_start ) );
2178 const data_type* result = std::strstr( buffer + actual_start , pattern );
2181 return start_codepoint +
get_num_codepoints( actual_start , result - ( buffer + actual_start ) );
2192 if( start_byte >= my_size )
2196 return it -
begin();
2207 if( start_byte >=
size() )
2210 const data_type* result = std::strstr( buffer + start_byte , pattern.
data() );
2213 return result - buffer;
2223 if( start_byte >=
size() )
2226 const data_type* result = std::strstr( buffer + start_byte , pattern );
2229 return result - buffer;
2243 if( start_codepoint >= string_len )
2244 it =
crbegin(), start_codepoint = string_len - 1;
2246 it =
rget( start_codepoint );
2247 for( ; it !=
end ; ++it, --start_codepoint )
2249 return start_codepoint;
2283 return my_size >= str_size && std::memcmp(
data() , str.data() , str_size ) == 0;
2293 return my_size >= str_size && std::memcmp(
data() , str.data() , str_size ) == 0;
2310 template<
typename T>
2313 if( my_size < str_size )
2315 for(
const data_type* my_data =
data() ; *str && *str == *my_data ; ++str, ++my_data );
2324 template<
size_type LITLEN>
2326 size_type my_size =
size(), str_size = str[LITLEN-1] ? LITLEN : LITLEN-1;
2327 return my_size >= str_size && std::memcmp(
data() , str , str_size ) == 0;
2335 template<
typename T>
2346 template<
size_type LITLEN>
2348 size_type str_len = str[LITLEN-1] ? LITLEN : LITLEN-1;
2350 while( it !=
end && str_len ){
2353 ++it, ++str, --str_len;
2367 return my_size >= str_size && std::memcmp(
data() + my_size - str_size , str.data() , str_size ) == 0;
2375 inline bool ends_with(
const std::string& str )
const noexcept {
2377 return my_size >= str_size && std::memcmp(
data() + my_size - str_size , str.data() , str_size ) == 0;
2394 template<
typename T>
2397 return my_size >= str_size && std::memcmp(
data() + my_size - str_size , str , str_size ) == 0;
2405 template<
size_type LITLEN>
2407 size_type my_size =
size(), str_size = str[LITLEN-1] ? LITLEN : LITLEN-1;
2408 return my_size >= str_size && std::memcmp(
data() + my_size - str_size , str , str_size ) == 0;
2416 template<
typename T>
2420 while( it !=
end && str_len ){
2421 if( *it != str[--str_len] )
2433 template<
size_type LITLEN>
2435 size_type str_len = str[LITLEN-1] ? LITLEN : LITLEN-1;
2437 while( it !=
end && str_len ){
2438 if( *it != str[--str_len] )
2458 int result = std::memcmp(
data() , str.data() , my_size < str_size ? my_size : str_size );
2459 if( !result && my_size != str_size )
2460 result = my_size < str_size ? -1 : 1;
2473 inline int compare(
const std::string& str )
const noexcept {
2475 int result = std::memcmp(
data() , str.data() , my_size < str_size ? my_size : str_size );
2476 if( !result && my_size != str_size )
2477 result = my_size < str_size ? -1 : 1;
2492 template<
typename T>
2495 while( it !=
end && *str ){
2497 return *it < *str ? -1 : 1;
2500 return *str ? -1 : it ==
end ? 0 : 1;
2512 template<
size_type LITLEN>
2517 if( *it != str[index] )
2518 return *it < str[index] ? -1 : 1;
2521 return index <
length ? -1 : it ==
end ? 0 : 1;
2535 template<
typename T>
2538 while( it !=
end && *str ){
2540 return *it < *str ? -1 : 1;
2543 return *str ? -1 : it ==
end ? 0 : 1;
2555 template<
size_type LITLEN>
2560 if( *it != str[index] )
2561 return *it < str[index] ? -1 : 1;
2564 return index <
length ? -1 : it ==
end ? 0 : 1;
2702 template<
typename V,
typename D,
typename A>
2707 std::hash<data_type> hasher;
2708 std::size_t size =
string.size();
2709 std::size_t result = 0;
2710 const data_type* buffer =
string.data();
2711 for( std::size_t iterator = 0 ; iterator < size ; ++iterator )
2712 result = result * 31u + hasher( buffer[iterator] );
2719template<
typename V,
typename D,
typename A>
2721 return stream << str.cpp_str();
2723template<
typename V,
typename D,
typename A>
2735 template<
typename V,
typename D,
typename A>
2736 basic_string<V, D, A>::basic_string(
basic_string<V, D, A>::size_type count ,
basic_string<V, D, A>::value_type cp ,
const typename basic_string<V, D, A>::allocator_type& alloc )
2744 width_type num_bytes_per_cp = get_codepoint_bytes( cp );
2745 size_type data_len = num_bytes_per_cp * count;
2752 size_type buffer_size = determine_main_buffer_size( data_len );
2753 buffer = this->allocate( determine_total_buffer_size( buffer_size ) );
2754 #if defined(TINY_UTF8_NOEXCEPT)
2758 t_non_sso.data = buffer;
2761 set_lut_indiciator( buffer + buffer_size , num_bytes_per_cp == 1 , 0 );
2762 t_non_sso.buffer_size = buffer_size;
2763 t_non_sso.data_len = data_len;
2764 set_non_sso_string_len( count );
2767 buffer = t_sso.data;
2770 set_sso_data_len( (
unsigned char)data_len );
2774 if( num_bytes_per_cp > 1 ){
2776 for( data_type* buffer_iter = buffer ; --count > 0 ; )
2777 std::memcpy( buffer_iter += num_bytes_per_cp , buffer , num_bytes_per_cp );
2780 std::memset( buffer , cp , count );
2783 buffer[data_len] = 0;
2786 template<
typename V,
typename D,
typename A>
2800 size_type buffer_size = determine_main_buffer_size( count );
2801 buffer = this->allocate( determine_total_buffer_size( buffer_size ) );
2802 #if defined(TINY_UTF8_NOEXCEPT)
2806 t_non_sso.data = buffer;
2809 set_lut_indiciator( buffer + buffer_size ,
true , 0 );
2810 t_non_sso.buffer_size = buffer_size;
2811 t_non_sso.data_len = count;
2812 set_non_sso_string_len( count );
2815 buffer = t_sso.data;
2816 set_sso_data_len( (
unsigned char)count );
2820 std::memset( buffer , cp , count );
2826 template<
typename V,
typename D,
typename A>
2829 : basic_string( alloc )
2834 size_type num_multibytes = 0;
2835 size_type data_len = 0;
2836 size_type string_len = 0;
2840 while( *str && string_len < pos && data_left != 0u ){
2841 width_type bytes = get_codepoint_bytes( str[data_len] , data_left );
2849 while( str[data_len] && string_len < count )
2852 width_type bytes = get_codepoint_bytes( str[data_len] , data_left );
2856 num_multibytes += bytes > 1 ? 1 : 0;
2865 width_type lut_width;
2866 size_type buffer_size = determine_main_buffer_size( data_len , num_multibytes , &lut_width );
2867 buffer = this->allocate( determine_total_buffer_size( buffer_size ) );
2868 #if defined(TINY_UTF8_NOEXCEPT)
2872 t_non_sso.data = buffer;
2879 data_type* buffer_iter = buffer;
2880 const data_type* str_iter = str;
2881 const data_type* str_end = str + data_len;
2882 while( str_iter < str_end )
2884 width_type bytes = get_codepoint_bytes( *str_iter , str_end - str_iter );
2892 case 2: buffer_iter[1] = str_iter[1];
2896 case 1: buffer_iter[0] = str_iter[0];
break;
2898 buffer_iter += bytes;
2901 *buffer_iter =
'\0';
2904 t_non_sso.buffer_size = buffer_size;
2905 t_non_sso.data_len = data_len;
2906 set_non_sso_string_len( string_len );
2911 size_type buffer_size = determine_main_buffer_size( data_len );
2912 buffer = t_non_sso.data = this->allocate( determine_total_buffer_size( buffer_size ) );
2919 t_non_sso.buffer_size = buffer_size;
2920 t_non_sso.data_len = data_len;
2921 set_non_sso_string_len( string_len );
2924 buffer = t_sso.data;
2927 set_sso_data_len( (
unsigned char)data_len );
2934 std::memcpy( buffer , str , data_len );
2935 buffer[data_len] =
'\0';
2938 template<
typename V,
typename D,
typename A>
2941 : basic_string( alloc )
2946 size_type num_multibytes = 0;
2947 size_type index = 0;
2948 size_type string_len = 0;
2951 while( index < data_len )
2957 num_multibytes += bytes > 1 ? 1 : 0;
2968 width_type lut_width;
2969 size_type buffer_size = determine_main_buffer_size( data_len , num_multibytes , &lut_width );
2970 buffer = this->allocate( determine_total_buffer_size( buffer_size ) );
2971 #if defined(TINY_UTF8_NOEXCEPT)
2975 t_non_sso.data = buffer;
2982 data_type* buffer_iter = buffer;
2983 const data_type* str_iter = str;
2984 const data_type* str_end = str + data_len;
2985 while( str_iter < str_end )
2987 width_type bytes = get_codepoint_bytes( *str_iter , str_end - str_iter );
2995 case 2: buffer_iter[1] = str_iter[1];
2999 case 1: buffer_iter[0] = str_iter[0];
break;
3001 buffer_iter += bytes;
3004 *buffer_iter =
'\0';
3007 t_non_sso.buffer_size = buffer_size;
3008 t_non_sso.data_len = data_len;
3009 set_non_sso_string_len( string_len );
3014 size_type buffer_size = determine_main_buffer_size( data_len );
3015 buffer = t_non_sso.data = this->allocate( determine_total_buffer_size( buffer_size ) );
3022 t_non_sso.buffer_size = buffer_size;
3023 t_non_sso.data_len = data_len;
3024 set_non_sso_string_len( string_len );
3027 buffer = t_sso.data;
3030 set_sso_data_len( (
unsigned char)data_len );
3037 std::memcpy( buffer , str , data_len );
3038 buffer[data_len] =
'\0';
3041 template<
typename V,
typename D,
typename A>
3044 : basic_string( alloc )
3049 size_type num_multibytes = 0;
3050 size_type data_len = 0;
3051 size_type string_len = 0;
3057 width_type bytes = get_codepoint_bytes( str[string_len] );
3061 num_multibytes += bytes > 1 ;
3072 width_type lut_width;
3073 size_type buffer_size = determine_main_buffer_size( data_len , num_multibytes , &lut_width );
3074 buffer = this->allocate( determine_total_buffer_size( buffer_size ) );
3075 #if defined(TINY_UTF8_NOEXCEPT)
3079 t_non_sso.data = buffer;
3083 data_type* buffer_iter = buffer;
3087 for( size_type i = 0 ; i < string_len ; i++ )
3093 if( codepoint_bytes > 1 )
3097 buffer_iter += codepoint_bytes;
3099 *buffer_iter =
'\0';
3102 t_non_sso.buffer_size = buffer_size;
3103 t_non_sso.data_len = data_len;
3104 set_non_sso_string_len( string_len );
3109 size_type buffer_size = determine_main_buffer_size( data_len );
3110 buffer = t_non_sso.data = this->allocate( determine_total_buffer_size( buffer_size ) );
3117 t_non_sso.buffer_size = buffer_size;
3118 t_non_sso.data_len = data_len;
3119 set_non_sso_string_len( string_len );
3122 buffer = t_sso.data;
3125 set_sso_data_len( (
unsigned char)data_len );
3131 data_type* buffer_iter = buffer;
3134 for( size_type i = 0 ; i < string_len ; i++ )
3138 *buffer_iter =
'\0';
3141 template<
typename V,
typename D,
typename A>
3144 data_start += index;
3149 if( ((
unsigned char)data_start[-7] & 0xFE ) == 0xFC )
3153 if( ((
unsigned char)data_start[-6] & 0xFE ) == 0xFC )
3157 if( ((
unsigned char)data_start[-5] & 0xFC ) == 0xF8 )
3161 if( ((
unsigned char)data_start[-4] & 0xF8 ) == 0xF0 )
3165 if( ((
unsigned char)data_start[-3] & 0xF0 ) == 0xE0 )
3169 if( ((
unsigned char)data_start[-2] & 0xE0 ) == 0xC0 )
3178 #if !TINY_UTF8_HAS_CLZ
3179 template<
typename V,
typename D,
typename A>
3186 if( ( (
unsigned char)first_byte & 0xFFu ) == 0xFEu )
3189 if( ( (
unsigned char)first_byte & 0xFEu ) == 0xFCu )
3192 if( ( (
unsigned char)first_byte & 0xFCu ) == 0xF8u )
3195 if( ( (
unsigned char)first_byte & 0xF8u ) == 0xF0u )
3198 if( ( (
unsigned char)first_byte & 0xF0u ) == 0xE0u )
3201 if( ( (
unsigned char)first_byte & 0xE0u ) == 0xC0u )
3210 template<
typename V,
typename D,
typename A>
3217 switch( sso_inactive() + str.sso_inactive() * 2 )
3226 width_type lut_width = get_lut_width( t_non_sso.buffer_size );
3232 width_type str_lut_width = get_lut_width( str.t_non_sso.buffer_size );
3235 if( lut_width == str_lut_width ){
3236 str_lut_len *= str_lut_width;
3239 , str_lut_base_ptr - str_lut_len
3240 , str_lut_len +
sizeof(indicator_type)
3245 for( ; str_lut_len > 0 ; --str_lut_len )
3247 lut_iter -= lut_width
3254 goto lbl_replicate_whole_buffer;
3258 goto lbl_replicate_whole_buffer;
3261 std::memcpy( t_non_sso.data , str.t_non_sso.data , str.t_non_sso.data_len + 1 );
3266 t_non_sso.data_len = str.t_non_sso.data_len;
3267 t_non_sso.string_len = str.t_non_sso.string_len;
3268 (allocator_type&)*
this = (
const allocator_type&)str;
3271 lbl_replicate_whole_buffer:
3272 this->deallocate( t_non_sso.data , t_non_sso.buffer_size );
3276 (allocator_type&)*
this = (
const allocator_type&)str;
3278 std::memcpy( t_non_sso.data , str.t_non_sso.data , str.t_non_sso.buffer_size +
sizeof(indicator_type) );
3279 t_non_sso.buffer_size = str.t_non_sso.buffer_size;
3280 t_non_sso.data_len = str.t_non_sso.data_len;
3281 t_non_sso.string_len = str.t_non_sso.string_len;
3284 this->deallocate( t_non_sso.data , t_non_sso.buffer_size );
3288 (allocator_type&)*
this = (
const allocator_type&)str;
3289 std::memcpy( (
void*)&this->t_sso , &str.t_sso ,
sizeof(basic_string::SSO) );
3296 template<
typename V,
typename D,
typename A>
3307 size_type buffer_size = get_buffer_size();
3312 if( is_lut_active( lut_base_ptr ) )
3314 size_type lut_len = get_lut_len( lut_base_ptr );
3316 required_buffer_size = determine_main_buffer_size( data_len , lut_len , &new_lut_width );
3319 if( buffer_size < std::max<size_type>( required_buffer_size + 10 , required_buffer_size >> 2 ) )
3323 t_non_sso.data = this->allocate( determine_total_buffer_size( required_buffer_size ) );
3328 if( old_lut_width != new_lut_width ){
3330 for(
size_type i = 0 ; i < lut_len ; i++ )
3332 new_lut_base_ptr -= new_lut_width
3334 , get_lut( lut_base_ptr -= old_lut_width , old_lut_width )
3338 size_type lut_size = lut_len * old_lut_width;
3339 std::memcpy( new_lut_base_ptr - lut_size , lut_base_ptr - lut_size , lut_size +
sizeof(
indicator_type) );
3344 required_buffer_size = determine_main_buffer_size( data_len );
3347 if( buffer_size < std::max<size_type>( required_buffer_size + 10 , required_buffer_size >> 2 ) )
3350 t_non_sso.data = this->allocate( determine_total_buffer_size( required_buffer_size ) );
3354 std::memcpy( t_non_sso.data , buffer , data_len + 1 );
3355 t_non_sso.buffer_size = required_buffer_size;
3358 this->deallocate( buffer , buffer_size );
3361 template<
typename V,
typename D,
typename A>
3364 size_type data_len = t_non_sso.data_len;
3365 size_type buffer_size = t_non_sso.buffer_size;
3369 return buffer_size - 1;
3371 const data_type* buffer = t_non_sso.data;
3372 size_type string_len = get_non_sso_string_len();
3380 return ( buffer_size - 1 ) * string_len / data_len;
3383 template<
typename V,
typename D,
typename A>
3386 constexpr size_type mask = get_msb_mask<size_type>();
3387 size_type data_len = get_sso_data_len();
3391 for( ; i < data_len /
sizeof(
size_type) ; i++ )
3392 if( ((
size_type*)t_sso.data)[i] & mask )
3397 for( ; i < data_len ; i++ )
3398 if( t_sso.data[i] & 0x80 )
3404 template<
typename V,
typename D,
typename A>
3408 std::basic_string<data_type> result = std::basic_string<data_type>( size() + 3 ,
' ' );
3412 tmp_buffer[0] =
static_cast<data_type>(0xEF);
3413 tmp_buffer[1] =
static_cast<data_type>(0xBB);
3414 tmp_buffer[2] =
static_cast<data_type>(0xBF);
3417 std::memcpy( tmp_buffer + 3 , get_buffer() , size() + 1 );
3422 template<
typename V,
typename D,
typename A>
3428 if( sso_inactive() )
3430 buffer = t_non_sso.data;
3431 data_len = t_non_sso.data_len;
3432 size_type buffer_size = t_non_sso.buffer_size;
3444 const data_type* lut_begin = lut_iter - lut_len * lut_width;
3445 size_type end_index = index + byte_count;
3448 while( lut_iter >= lut_begin ){
3449 lut_iter -= lut_width;
3455 while( lut_iter >= lut_begin ){
3457 if( multibyte_index >= end_index )
3460 lut_iter -= lut_width;
3468 buffer = t_sso.data;
3469 data_len = get_sso_data_len();
3473 const data_type* buffer_iter = buffer + index;
3474 const data_type* fragment_end = buffer_iter + byte_count;
3477 while( buffer_iter < fragment_end ){
3479 buffer_iter += bytes;
3480 byte_count -= bytes - 1;
3487 template<
typename V,
typename D,
typename A>
3493 if( sso_inactive() )
3495 buffer = t_non_sso.data;
3496 data_len = t_non_sso.data_len;
3497 size_type buffer_size = t_non_sso.buffer_size;
3510 if( multibyte_index >= cp_count )
3519 buffer = t_sso.data;
3520 data_len = get_sso_data_len();
3524 while( cp_count-- > 0 && num_bytes <= data_len )
3525 num_bytes += get_codepoint_bytes( buffer[num_bytes] , data_len - num_bytes );
3530 template<
typename V,
typename D,
typename A>
3533 size_type potential_end_index = index + cp_count;
3538 if( sso_inactive() )
3540 buffer = t_non_sso.data;
3541 data_len = t_non_sso.data_len;
3542 size_type buffer_size = t_non_sso.buffer_size;
3546 if( potential_end_index > data_len || potential_end_index < index )
3547 return data_len - index;
3560 const data_type* lut_begin = lut_iter - lut_len * lut_width;
3563 for( lut_iter -= lut_width ; lut_iter >= lut_begin ; lut_iter -= lut_width )
3571 while( lut_iter >= lut_begin ){
3573 if( multibyte_index >= index )
3576 lut_iter -= lut_width;
3579 return index - orig_index;
3583 buffer = t_sso.data;
3584 data_len = get_sso_data_len();
3587 if( potential_end_index > data_len || potential_end_index < index )
3588 return data_len - index;
3594 while( cp_count-- > 0 && index <= data_len )
3595 index += get_codepoint_bytes( buffer[index] , data_len - index );
3597 return index - orig_index;
3600 template<
typename V,
typename D,
typename A>
3605 if( index > data_len ){
3606 TINY_UTF8_THROW(
"tiny_utf8::basic_string::(raw_)substr" , index > data_len );
3609 size_type end_index = index + byte_count;
3610 if( end_index > data_len || end_index < index ){
3611 end_index = data_len;
3612 byte_count = end_index - index;
3616 if( byte_count == data_len )
3625 std::memcpy( result.
t_sso.
data , get_buffer() + index , byte_count );
3639 size_type buffer_size = t_non_sso.buffer_size;
3640 const data_type* buffer = t_non_sso.data;
3651 const data_type* lut_iter = lut_base_ptr;
3652 for( lut_iter -= lut_width; lut_iter >= lut_begin ; lut_iter -= lut_width ){
3657 substr_cps = byte_count;
3658 for( ; lut_iter >= lut_begin ; lut_iter -= lut_width ){
3660 if( multibyte_index >= end_index )
3665 substr_cps += substr_mbs;
3671 while( iter < end_index ){
3672 width_type bytes = get_codepoint_bytes( buffer[iter] , data_len - iter );
3673 substr_mbs += bytes > 1; iter += bytes; ++substr_cps;
3682 substr_buffer_size = determine_main_buffer_size( byte_count , substr_mbs , &substr_lut_width );
3684 substr_lut_width = 0;
3685 substr_buffer_size = determine_main_buffer_size( byte_count );
3688 data_type* substr_buffer = this->allocate( determine_total_buffer_size( substr_buffer_size ) );
3692 std::memcpy( substr_buffer , buffer + index , byte_count );
3693 substr_buffer[byte_count] =
'\0';
3695 if( substr_lut_width )
3704 if( index == 0 && substr_lut_width == lut_width )
3706 substr_lut_base_ptr - substr_mbs * lut_width
3707 , lut_base_ptr - ( mb_index + substr_mbs ) * lut_width
3708 , substr_mbs * lut_width
3711 for(
const data_type* lut_iter = lut_base_ptr - mb_index * lut_width; substr_mbs-- > 0 ; )
3713 substr_lut_base_ptr -= substr_lut_width
3719 for(
size_type substr_iter = 0 ; substr_iter < byte_count ; ){
3720 width_type bytes = get_codepoint_bytes( substr_buffer[substr_iter] , byte_count - substr_iter );
3723 substr_iter += bytes;
3739 template<
typename V,
typename D,
typename A>
3743 bool app_sso_inactive = app.sso_inactive();
3744 size_type app_data_len = app_sso_inactive ? app.t_non_sso.data_len : app.get_sso_data_len();
3745 if( app_data_len == 0 )
3750 size_type new_data_len = old_data_len + app_data_len;
3754 std::memcpy( t_sso.data + old_data_len , app.t_sso.data , app_data_len );
3755 t_sso.data[new_data_len] =
'\0';
3756 set_sso_data_len( (
unsigned char)new_data_len );
3764 bool app_lut_active;
3770 if( app.sso_inactive() )
3772 app_buffer_size = app.t_non_sso.buffer_size;
3773 app_buffer = app.t_non_sso.data;
3774 app_string_len = app.get_non_sso_string_len();
3779 if( app_lut_active )
3783 for(
size_type iter = 0 ; iter < app_data_len ; ){
3784 width_type bytes = get_codepoint_bytes( app_buffer[iter] , app_data_len - iter );
3785 app_lut_len += bytes > 1; iter += bytes;
3791 app_lut_active =
false;
3793 app_buffer = app.t_sso.data;
3796 for(
size_type iter = 0 ; iter < app_data_len ; ){
3797 width_type bytes = get_codepoint_bytes( app_buffer[iter] , app_data_len - iter );
3798 app_lut_len += bytes > 1; iter += bytes; ++app_string_len;
3807 bool old_lut_active;
3809 bool old_sso_inactive = sso_inactive();
3810 if( old_sso_inactive )
3812 old_buffer_size = t_non_sso.buffer_size;
3813 old_buffer = t_non_sso.data;
3814 old_string_len = get_non_sso_string_len();
3819 if( old_lut_active )
3823 for(
size_type iter = 0 ; iter < old_data_len ; ){
3824 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
3825 old_lut_len += bytes > 1; iter += bytes;
3831 old_buffer = t_sso.data;
3836 old_lut_active =
false;
3837 while( iter < old_data_len ){
3838 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
3839 old_lut_len += bytes > 1; iter += bytes; ++old_string_len;
3845 size_type new_lut_len = old_lut_len + app_lut_len;
3846 size_type new_string_len = old_string_len + app_string_len;
3854 new_buffer_size = determine_main_buffer_size( new_data_len , new_lut_len , &new_lut_width );
3857 new_buffer_size = determine_main_buffer_size( new_data_len );
3861 if( new_buffer_size <= old_buffer_size )
3873 data_type* lut_dest_iter = old_lut_base_ptr - old_lut_len * new_lut_width;
3874 if( app_lut_active )
3877 const data_type* app_lut_iter = app_lut_base_ptr;
3878 while( app_lut_len-- > 0 )
3880 lut_dest_iter -= new_lut_width
3887 while( iter < app_data_len ){
3888 width_type bytes = get_codepoint_bytes( app_buffer[iter] , app_data_len - iter );
3902 std::memcpy( old_buffer + old_data_len , app_buffer , app_data_len );
3903 old_buffer[new_data_len] =
'\0';
3907 new_buffer_size <<= 1;
3908 data_type* new_buffer = this->allocate( determine_total_buffer_size( new_buffer_size ) );
3912 std::memcpy( new_buffer , old_buffer , old_data_len );
3913 std::memcpy( new_buffer + old_data_len , app_buffer , app_data_len );
3914 new_buffer[new_data_len] =
'\0';
3923 if( old_lut_active )
3928 if( new_lut_width != old_lut_width )
3931 data_type* new_lut_iter = new_lut_base_ptr;
3933 while( num_indices-- > 0 )
3935 new_lut_iter -= new_lut_width
3942 new_lut_base_ptr - old_lut_len * new_lut_width
3943 , old_lut_base_ptr - old_lut_len * old_lut_width
3944 , old_lut_len * old_lut_width
3949 data_type* new_lut_iter = new_lut_base_ptr;
3951 while( iter < old_data_len ){
3952 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
3960 data_type* lut_dest_iter = new_lut_base_ptr - old_lut_len * new_lut_width;
3961 if( app_lut_active )
3964 const data_type* app_lut_iter = app_lut_base_ptr;
3965 while( app_lut_len-- > 0 )
3967 lut_dest_iter -= new_lut_width
3974 while( app_iter < app_data_len ){
3975 width_type bytes = get_codepoint_bytes( app_buffer[app_iter] , app_data_len - app_iter );
3988 if( old_sso_inactive )
3989 this->deallocate( old_buffer , old_buffer_size );
3992 t_non_sso.data = new_buffer;
3993 t_non_sso.buffer_size = new_buffer_size;
3997 t_non_sso.data_len = new_data_len;
3998 set_non_sso_string_len( new_string_len );
4003 template<
typename V,
typename D,
typename A>
4007 size_type old_data_len = size();
4008 if( index > old_data_len ){
4009 TINY_UTF8_THROW(
"tiny_utf8::basic_string::(raw_)insert" , index > old_data_len );
4014 size_type str_data_len = str.size();
4015 size_type new_data_len = old_data_len + str_data_len;
4018 if( str_data_len == 0 )
4025 std::memmove( t_sso.data + index + str_data_len , t_sso.data + index , old_data_len - index );
4028 std::memcpy( t_sso.data + index , str.t_sso.data , str_data_len );
4031 t_sso.data[new_data_len] =
'\0';
4032 set_sso_data_len( (
unsigned char)new_data_len );
4041 bool str_lut_active;
4042 const data_type* str_buffer;
4043 const data_type* str_lut_base_ptr;
4044 size_type str_buffer_size;
4045 size_type str_string_len;
4046 size_type str_lut_len;
4047 if( str.sso_inactive() )
4049 str_buffer_size = str.t_non_sso.buffer_size;
4050 str_buffer = str.t_non_sso.data;
4051 str_string_len = str.get_non_sso_string_len();
4056 if( str_lut_active )
4060 for( size_type iter = 0 ; iter < str_data_len ; ){
4061 width_type bytes = get_codepoint_bytes( str_buffer[iter] , str_data_len - iter );
4062 str_lut_len += bytes > 1; iter += bytes;
4068 str_lut_active =
false;
4070 str_buffer = str.t_sso.data;
4073 for( size_type iter = 0 ; iter < str_data_len ; ){
4074 width_type bytes = get_codepoint_bytes( str_buffer[iter] , str_data_len - iter );
4075 str_lut_len += bytes > 1; iter += bytes; ++str_string_len;
4080 data_type* old_buffer;
4081 data_type* old_lut_base_ptr;
4082 size_type old_buffer_size;
4083 size_type old_string_len;
4084 bool old_lut_active;
4085 size_type mb_index = 0;
4086 size_type old_lut_len;
4087 bool old_sso_inactive = sso_inactive();
4088 if( old_sso_inactive )
4090 old_buffer_size = t_non_sso.buffer_size;
4091 old_buffer = t_non_sso.data;
4092 old_string_len = get_non_sso_string_len();
4094 while( iter < index ){
4095 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4096 mb_index += bytes > 1; iter += bytes;
4101 if( old_lut_active )
4104 old_lut_len = mb_index;
4105 while( iter < old_data_len ){
4106 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4107 old_lut_len += bytes > 1; iter += bytes;
4113 old_buffer = t_sso.data;
4118 old_lut_active =
false;
4119 while( iter < index ){
4120 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4121 mb_index += bytes > 1; iter += bytes; ++old_string_len;
4123 old_lut_len = mb_index;
4124 while( iter < old_data_len ){
4125 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4126 old_lut_len += bytes > 1; iter += bytes; ++old_string_len;
4132 size_type new_lut_len = old_lut_len + str_lut_len;
4133 size_type new_string_len = old_string_len + str_string_len;
4134 size_type new_buffer_size;
4135 width_type new_lut_width;
4139 new_buffer_size = determine_main_buffer_size( new_data_len , new_lut_len , &new_lut_width );
4142 new_buffer_size = determine_main_buffer_size( new_data_len );
4146 if( new_buffer_size <= old_buffer_size )
4158 if( old_lut_active )
4161 data_type* lut_iter = old_lut_base_ptr - mb_index * new_lut_width;
4162 size_type num_indices = old_lut_len - mb_index;
4163 while( num_indices-- > 0 ){
4164 lut_iter -= new_lut_width;
4174 old_lut_base_ptr - new_lut_len * new_lut_width
4175 , old_lut_base_ptr - old_lut_len * new_lut_width
4176 , ( old_lut_len - mb_index ) * new_lut_width
4185 data_type* lut_iter = old_lut_base_ptr;
4186 while( iter < index ){
4187 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4194 lut_iter -= str_lut_len * new_lut_width;
4195 while( iter < old_data_len ){
4196 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4206 data_type* lut_dest_iter = old_lut_base_ptr - mb_index * new_lut_width;
4207 if( str_lut_active )
4210 const data_type* str_lut_iter = str_lut_base_ptr;
4211 while( str_lut_len-- > 0 )
4213 lut_dest_iter -= new_lut_width
4220 while( iter < str_data_len ){
4221 width_type bytes = get_codepoint_bytes( str_buffer[iter] , str_data_len - iter );
4232 std::memmove( old_buffer + index + str_data_len , old_buffer + index , old_data_len - index );
4233 old_buffer[new_data_len] =
'\0';
4236 std::memcpy( old_buffer + index , str_buffer , str_data_len );
4240 new_buffer_size <<= 1;
4241 data_type* new_buffer = this->allocate( determine_total_buffer_size( new_buffer_size ) );
4245 std::memcpy( new_buffer , old_buffer , index );
4248 std::memcpy( new_buffer + index , str_buffer , str_data_len );
4251 std::memcpy( new_buffer + index + str_data_len , old_buffer + index , old_data_len - index );
4252 new_buffer[new_data_len] =
'\0';
4261 if( old_lut_active )
4266 if( new_lut_width != old_lut_width )
4268 data_type* lut_iter = old_lut_base_ptr;
4269 data_type* new_lut_iter = new_lut_base_ptr;
4270 size_type num_indices = mb_index;
4271 while( num_indices-- > 0 )
4273 new_lut_iter -= new_lut_width
4280 new_lut_base_ptr - mb_index * new_lut_width
4281 , old_lut_base_ptr - mb_index * old_lut_width
4282 , mb_index * old_lut_width
4287 data_type* lut_iter = old_lut_base_ptr - mb_index * old_lut_width;
4288 data_type* new_lut_iter = new_lut_base_ptr - ( mb_index + str_lut_len ) * new_lut_width;
4289 size_type num_indices = old_lut_len - mb_index;
4290 while( num_indices-- > 0 )
4292 new_lut_iter -= new_lut_width
4301 data_type* lut_iter = new_lut_base_ptr;
4302 while( iter < index ){
4303 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4310 lut_iter -= str_lut_len * new_lut_width;
4311 while( iter < old_data_len ){
4312 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4320 data_type* lut_dest_iter = new_lut_base_ptr - mb_index * new_lut_width;
4321 if( str_lut_active )
4324 const data_type* str_lut_iter = str_lut_base_ptr;
4325 while( str_lut_len-- > 0 )
4327 lut_dest_iter -= new_lut_width
4333 size_type str_iter = 0;
4334 while( str_iter < str_data_len ){
4335 width_type bytes = get_codepoint_bytes( str_buffer[str_iter] , str_data_len - str_iter );
4348 if( old_sso_inactive )
4349 this->deallocate( old_buffer , old_buffer_size );
4352 t_non_sso.data = new_buffer;
4353 t_non_sso.buffer_size = new_buffer_size;
4357 t_non_sso.data_len = new_data_len;
4358 set_non_sso_string_len( new_string_len );
4363 template<
typename V,
typename D,
typename A>
4364 basic_string<V, D, A>&
basic_string<V, D, A>::raw_replace(
typename basic_string<V, D, A>::size_type index ,
typename basic_string<V, D, A>::size_type replaced_len ,
const basic_string<V, D, A>& repl )
noexcept(
TINY_UTF8_NOEXCEPT)
4368 if( index > old_data_len ){
4369 TINY_UTF8_THROW(
"tiny_utf8::basic_string::(raw_)replace" , index > old_data_len );
4372 size_type end_index = index + replaced_len;
4373 if( end_index > old_data_len || end_index < index ){
4374 end_index = old_data_len;
4375 replaced_len = end_index - index;
4381 size_type new_data_len = old_data_len + delta_len;
4384 if( !new_data_len ){
4388 else if( replaced_len == 0 && repl_data_len == 0 )
4392 bool old_sso_inactive = sso_inactive();
4396 if( old_sso_inactive )
4399 size_type old_buffer_size = t_non_sso.buffer_size;
4402 std::memcpy( t_sso.data , old_buffer , index );
4405 std::memcpy( t_sso.data + index + repl_data_len , old_buffer + end_index , old_data_len - end_index );
4407 this->deallocate( old_buffer , old_buffer_size );
4410 else if( new_data_len != old_data_len )
4411 std::memmove( t_sso.data + index + repl_data_len , t_sso.data + index + replaced_len , old_data_len - index );
4414 std::memcpy( t_sso.data + index , repl.t_sso.data , repl_data_len );
4417 t_sso.data[new_data_len] =
'\0';
4418 set_sso_data_len( (
unsigned char)new_data_len );
4426 bool repl_lut_active;
4432 if( repl.sso_inactive() )
4434 repl_buffer_size = repl.t_non_sso.buffer_size;
4435 repl_buffer = repl.t_non_sso.data;
4436 repl_string_len = repl.get_non_sso_string_len();
4441 if( repl_lut_active )
4445 for(
size_type iter = 0 ; iter < repl_data_len ; ){
4446 width_type bytes = get_codepoint_bytes( repl_buffer[iter] , repl_data_len - iter );
4447 repl_lut_len += bytes > 1; iter += bytes;
4453 repl_lut_active =
false;
4454 repl_string_len = 0;
4455 repl_buffer = repl.t_sso.data;
4458 for(
size_type iter = 0 ; iter < repl_data_len ; ){
4459 width_type bytes = get_codepoint_bytes( repl_buffer[iter] , repl_data_len - iter );
4460 repl_lut_len += bytes > 1; iter += bytes; ++repl_string_len;
4469 bool old_lut_active;
4474 if( old_sso_inactive )
4476 old_buffer_size = t_non_sso.buffer_size;
4477 old_buffer = t_non_sso.data;
4478 old_string_len = get_non_sso_string_len();
4480 while( iter < index ){
4481 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4482 mb_index += bytes > 1; iter += bytes;
4484 while( iter < end_index ){
4485 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4486 replaced_mbs += bytes > 1; iter += bytes; ++replaced_cps;
4491 if( old_lut_active )
4494 old_lut_len = mb_index + replaced_mbs;
4495 while( iter < old_data_len ){
4496 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4497 old_lut_len += bytes > 1; iter += bytes;
4503 old_buffer = t_sso.data;
4508 old_lut_active =
false;
4509 while( iter < index ){
4510 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4511 mb_index += bytes > 1; iter += bytes; ++old_string_len;
4513 while( iter < end_index ){
4514 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4515 replaced_mbs += bytes > 1; iter += bytes; ++replaced_cps;
4517 old_lut_len = mb_index + replaced_mbs;
4518 while( iter < old_data_len ){
4519 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4520 old_lut_len += bytes > 1; iter += bytes; ++old_string_len;
4522 old_string_len += replaced_cps;
4527 size_type new_lut_len = old_lut_len - replaced_mbs + repl_lut_len;
4528 size_type new_string_len = old_string_len - replaced_cps + repl_string_len;
4535 new_buffer_size = determine_main_buffer_size( new_data_len , new_lut_len , &new_lut_width );
4538 new_buffer_size = determine_main_buffer_size( new_data_len );
4542 if( new_buffer_size <= old_buffer_size )
4554 if( old_lut_active )
4556 size_type mb_end_index = mb_index + replaced_mbs;
4560 data_type* lut_iter = old_lut_base_ptr - mb_end_index * new_lut_width;
4561 size_type num_indices = old_lut_len - mb_end_index;
4562 while( num_indices-- > 0 ){
4563 lut_iter -= new_lut_width;
4570 if( replaced_mbs != repl_lut_len )
4574 old_lut_base_ptr - new_lut_len * new_lut_width
4575 , old_lut_base_ptr - old_lut_len * new_lut_width
4576 , ( old_lut_len - mb_end_index ) * new_lut_width
4587 while( iter < index ){
4588 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4595 iter += replaced_len;
4596 lut_iter -= repl_lut_len * new_lut_width;
4597 while( iter < old_data_len ){
4598 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4608 data_type* lut_dest_iter = old_lut_base_ptr - mb_index * new_lut_width;
4609 if( repl_lut_active )
4612 const data_type* repl_lut_iter = repl_lut_base_ptr;
4613 while( repl_lut_len-- > 0 )
4615 lut_dest_iter -= new_lut_width
4622 while( iter < repl_data_len ){
4623 width_type bytes = get_codepoint_bytes( repl_buffer[iter] , repl_data_len - iter );
4634 if( new_data_len != old_data_len ){
4635 std::memmove( old_buffer + index + repl_data_len , old_buffer + end_index , old_data_len - end_index );
4636 t_non_sso.data_len = new_data_len;
4637 old_buffer[new_data_len] =
'\0';
4641 std::memcpy( old_buffer + index , repl_buffer , repl_data_len );
4645 new_buffer_size <<= 1;
4646 data_type* new_buffer = this->allocate( determine_total_buffer_size( new_buffer_size ) );
4650 std::memcpy( new_buffer , old_buffer , index );
4653 std::memcpy( new_buffer + index , repl_buffer , repl_data_len );
4656 std::memcpy( new_buffer + index + repl_data_len , old_buffer + end_index , old_data_len - end_index );
4657 new_buffer[new_data_len] =
'\0';
4666 if( old_lut_active )
4668 size_type mb_end_index = mb_index + replaced_mbs;
4672 if( new_lut_width != old_lut_width )
4675 data_type* new_lut_iter = new_lut_base_ptr;
4677 while( num_indices-- > 0 )
4679 new_lut_iter -= new_lut_width
4686 new_lut_base_ptr - mb_index * new_lut_width
4687 , old_lut_base_ptr - mb_index * old_lut_width
4688 , mb_index * old_lut_width
4693 if( delta_len || new_lut_width != old_lut_width ){
4694 data_type* lut_iter = old_lut_base_ptr - mb_end_index * old_lut_width;
4695 data_type* new_lut_iter = new_lut_base_ptr - ( mb_index + repl_lut_len ) * new_lut_width;
4696 size_type num_indices = old_lut_len - mb_end_index;
4697 while( num_indices-- > 0 )
4699 new_lut_iter -= new_lut_width
4706 new_lut_base_ptr - new_lut_len * new_lut_width
4707 , old_lut_base_ptr - old_lut_len * old_lut_width
4708 , ( old_lut_len - mb_end_index ) * old_lut_width
4716 while( iter < index ){
4717 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4724 iter += replaced_len;
4725 lut_iter -= repl_lut_len * new_lut_width;
4726 while( iter < old_data_len ){
4727 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4735 data_type* lut_dest_iter = new_lut_base_ptr - mb_index * new_lut_width;
4736 if( repl_lut_active )
4739 const data_type* repl_lut_iter = repl_lut_base_ptr;
4740 while( repl_lut_len-- > 0 )
4742 lut_dest_iter -= new_lut_width
4749 while( repl_iter < repl_data_len ){
4750 width_type bytes = get_codepoint_bytes( repl_buffer[repl_iter] , repl_data_len - repl_iter );
4763 if( old_sso_inactive )
4764 this->deallocate( old_buffer , old_buffer_size );
4767 t_non_sso.data = new_buffer;
4768 t_non_sso.data_len = new_data_len;
4769 t_non_sso.buffer_size = new_buffer_size;
4773 set_non_sso_string_len( new_string_len );
4778 template<
typename V,
typename D,
typename A>
4783 if( index > old_data_len ){
4784 TINY_UTF8_THROW(
"tiny_utf8::basic_string::(raw_)erase" , index > old_data_len );
4790 if( end_index > old_data_len || end_index < index ){
4791 end_index = old_data_len;
4792 len = end_index - index;
4796 size_type new_data_len = old_data_len - len;
4799 if( !new_data_len ){
4805 bool old_sso_inactive = sso_inactive();
4809 if( old_sso_inactive )
4812 size_type old_buffer_size = t_non_sso.buffer_size;
4815 std::memcpy( t_sso.data , old_buffer , index );
4818 std::memcpy( t_sso.data + index , old_buffer + end_index , old_data_len - end_index );
4820 this->deallocate( old_buffer , old_buffer_size );
4823 else if( new_data_len != old_data_len )
4824 std::memmove( t_sso.data + index , t_sso.data + index + len , old_data_len - index );
4827 t_sso.data[new_data_len] =
'\0';
4828 set_sso_data_len( (
unsigned char)new_data_len );
4838 size_type old_buffer_size = t_non_sso.buffer_size;
4844 t_non_sso.data_len -= len;
4847 if( old_lut_active )
4854 while( iter < index ){
4855 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4856 mb_end_index += bytes > 1; iter += bytes;
4858 while( iter < end_index ){
4859 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4860 replaced_mbs += bytes > 1; iter += bytes; ++replaced_cps;
4862 mb_end_index += replaced_mbs;
4865 data_type* lut_iter = old_lut_base_ptr - mb_end_index * old_lut_width;
4866 size_type num_indices = old_lut_len - mb_end_index;
4867 while( num_indices-- > 0 ){
4868 lut_iter -= old_lut_width;
4876 size_type new_lut_len = old_lut_len - replaced_mbs;
4879 old_lut_base_ptr - new_lut_len * old_lut_width
4880 , old_lut_base_ptr - old_lut_len * old_lut_width
4881 , ( old_lut_len - mb_end_index ) * old_lut_width
4890 while( iter < index )
4891 iter += get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4892 while( iter < end_index ){
4893 iter += get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4899 std::memmove( old_buffer + index , old_buffer + end_index , old_data_len - end_index + 1 );
4902 set_non_sso_string_len( get_non_sso_string_len() - replaced_cps );
4907 template<
typename V,
typename D,
typename A>
4909 if( index >= size() )
4910 index = raw_back_index();
4911 for(
difference_type it = index ; it >= 0 ; it -= get_index_pre_bytes( it ) )
4912 if( raw_at(it) == cp )
4917 template<
typename V,
typename D,
typename A>
4920 if( start_pos >= length() )
4923 for(
const_iterator it = get( start_pos ), end = cend() ; it < end ; ++it, ++start_pos )
4936 template<
typename V,
typename D,
typename A>
4939 if( index >= size() )
4942 for(
const_iterator it = raw_get(index), end = cend() ; it < end ; ++it )
4955 template<
typename V,
typename D,
typename A>
4960 if( start_pos >= string_len ){
4962 start_pos = string_len - 1;
4965 it = rget( start_pos );
4979 template<
typename V,
typename D,
typename A>
4985 if( index >= size() )
4986 index = raw_back_index();
4988 for(
difference_type it = index ; it >= 0 ; it -= get_index_pre_bytes( it ) ){
5000 template<
typename V,
typename D,
typename A>
5003 if( start_pos >= length() )
5006 for(
const_iterator it = get(start_pos) , end = cend() ; it != end ; ++it, ++start_pos ){
5020 template<
typename V,
typename D,
typename A>
5023 if( index >= size() )
5026 for(
const_iterator it = raw_get(index), end = cend() ; it < end ; ++it )
5041 template<
typename V,
typename D,
typename A>
5049 if( start_pos >= string_len ){
5051 start_pos = string_len - 1;
5054 it = rget( start_pos );
5056 for( ; it < end ; ++it, --start_pos ){
5070 template<
typename V,
typename D,
typename A>
5076 if( index >= size() )
5077 index = raw_back_index();
5079 for(
difference_type it = index ; it >= 0 ; it -= get_index_pre_bytes( it ) )
5099#if defined (__clang__)
5100#pragma clang diagnostic pop
5101#elif defined (__GNUC__)
5102#pragma GCC diagnostic pop
5103#elif defined (_MSC_VER)
Definition tinyutf8.h:703
raw_const_iterator raw_end() const noexcept
Definition tinyutf8.h:1646
size_type get_num_bytes(size_type byte_start, size_type cp_count) const noexcept
Definition tinyutf8.h:3531
void set_sso_data_len(unsigned char data_len=0) noexcept
Set the data length (also enables SSO)
Definition tinyutf8.h:1026
basic_string(std::basic_string< data_type, C, A > str, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1208
basic_string & erase(iterator first, iterator last) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2070
const_iterator cend() const noexcept
Definition tinyutf8.h:1684
basic_string & assign(const data_type *str, size_type len) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1922
basic_string(T &&str, const allocator_type &alloc=allocator_type(), enable_if_ptr< T, data_type > *={}) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1117
static constexpr T get_msb_mask(width_type bytes=sizeof(T)) noexcept
Definition tinyutf8.h:1041
bool operator!=(const basic_string &str) const noexcept
Definition tinyutf8.h:2569
basic_string(value_type cp, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1350
static size_type determine_main_buffer_size(size_type data_len, size_type lut_len, width_type lut_width) noexcept
Determine the needed buffer size if the lut width is known (excluding the trailling LUT indicator)
Definition tinyutf8.h:845
basic_string operator+(basic_string summand) const &noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1853
basic_string substr(iterator first, iterator last) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2107
basic_string(basic_string &&str) noexcept(TINY_UTF8_NOEXCEPT &&std::is_nothrow_move_constructible< Allocator >())
Definition tinyutf8.h:1375
reverse_iterator rbegin() noexcept
Definition tinyutf8.h:1654
const data_type * c_str() const noexcept
Definition tinyutf8.h:1592
static bool is_lut_active(const data_type *lut_base_ptr) noexcept
Check, if the lut is active using the lut base ptr.
Definition tinyutf8.h:797
static size_type determine_total_buffer_size(size_type main_buffer_size) noexcept
Same as above but this time including the LUT indicator.
Definition tinyutf8.h:854
size_type raw_find_last_of(const value_type *str, size_type start_byte=basic_string::npos) const noexcept
Definition tinyutf8.h:4980
reverse_iterator rget(size_type n) noexcept
Definition tinyutf8.h:1552
basic_string & replace(raw_iterator first, raw_iterator last, value_type repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1766
basic_string & replace(raw_iterator first, raw_iterator last, const basic_string &repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1783
const_iterator begin() const noexcept
Definition tinyutf8.h:1635
basic_string & replace(raw_iterator first, raw_iterator last, value_type repl, size_type n) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1763
const_reverse_iterator rbegin() const noexcept
Definition tinyutf8.h:1655
tiny_utf8::iterator< basic_string, true > raw_iterator
Definition tinyutf8.h:720
bool operator!=(const std::string &str) const noexcept
Definition tinyutf8.h:2571
size_type raw_find(value_type cp, size_type start_byte=0) const noexcept
Definition tinyutf8.h:2190
int compare(T str, enable_if_ptr< T, data_type > *={}) const noexcept
Definition tinyutf8.h:2493
size_type raw_find_first_of(const value_type *str, size_type start_byte=0) const noexcept
Definition tinyutf8.h:4937
size_type find_last_not_of(const value_type *str, size_type start_codepoint=basic_string::npos) const noexcept
Definition tinyutf8.h:5042
bool operator==(const basic_string &str) const noexcept
Equality Comparison Operators.
Definition tinyutf8.h:2568
basic_string & replace(iterator first, raw_iterator last, const basic_string &repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1787
basic_string substr(raw_iterator first, raw_iterator last) const noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2101
bool operator>=(const value_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2603
int compare(T str, enable_if_ptr< T, value_type > *={}) const noexcept
Definition tinyutf8.h:2536
basic_string & erase(iterator first, raw_iterator last) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2069
basic_string(InputIt first, InputIt last, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1269
basic_string(const data_type *str, size_type len, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1129
tiny_utf8::const_reverse_iterator< basic_string, false > const_reverse_iterator
Definition tinyutf8.h:719
@ npos
Definition tinyutf8.h:726
basic_string & assign(size_type count, value_type cp) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1880
size_type get_buffer_size() const noexcept
Get buffer size (excluding the trailing LUT indicator)
Definition tinyutf8.h:1051
tiny_utf8::reverse_iterator< basic_string, true > raw_reverse_iterator
Definition tinyutf8.h:722
typename std::enable_if<(L<=SSO::size), bool >::type enable_if_small_string
SFINAE helpers for constructors.
Definition tinyutf8.h:776
tiny_utf8::reverse_iterator< basic_string, false > reverse_iterator
Definition tinyutf8.h:718
void shrink_to_fit() noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:3297
data_type * allocate(size_type total_buffer_size) const noexcept
Allocates size_type-aligned storage (make sure, total_buffer_size is a multiple of sizeof(size_type)!...
Definition tinyutf8.h:1059
enable_if_ptr< T, value_type > operator!=(T &&str) const noexcept
Definition tinyutf8.h:2575
bool starts_with(const value_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2347
raw_const_reverse_iterator raw_crbegin() const noexcept
Definition tinyutf8.h:1695
raw_reference back() noexcept
Definition tinyutf8.h:1718
bool operator<=(const std::string &str) const noexcept
Definition tinyutf8.h:2589
static data_type * get_lut_base_ptr(data_type *buffer, size_type buffer_size) noexcept
Get the LUT base pointer from buffer and buffer size.
Definition tinyutf8.h:805
basic_string & replace(iterator first, raw_iterator last, value_type repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1773
bool operator>=(const basic_string &str) const noexcept
Definition tinyutf8.h:2583
bool starts_with(const data_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2325
raw_reference front() noexcept
Definition tinyutf8.h:1711
const_reverse_iterator rget(size_type n) const noexcept
Definition tinyutf8.h:1553
static width_type get_num_bytes_of_utf8_char_before(const data_type *data_start, size_type index) noexcept
Returns the number of bytes to expect before this one (including this one) that belong to this utf8 c...
Definition tinyutf8.h:3142
static constexpr size_type get_sso_capacity() noexcept
Static helper methods.
Definition tinyutf8.h:772
ValueType value_type
Definition tinyutf8.h:709
bool starts_with(const basic_string &str) const noexcept
Definition tinyutf8.h:2281
bool operator<=(const data_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2601
bool starts_with(T str, enable_if_ptr< T, data_type > *={}) const noexcept
Definition tinyutf8.h:2311
basic_string & replace(size_type index, size_type len, value_type repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1751
basic_string & replace(iterator first, iterator last, const basic_string &repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1788
basic_string & assign(std::initializer_list< value_type > ilist) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1973
basic_string & erase(size_type pos, size_type len=1) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2078
void clear() noexcept
Definition tinyutf8.h:1438
bool empty() const noexcept
Definition tinyutf8.h:1623
width_type get_index_bytes(size_type byte_index) const noexcept
Get the number of bytes of codepoint in basic_string.
Definition tinyutf8.h:2609
basic_string(const value_type(&str)[LITLEN], const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1330
static void set_lut(data_type *iter, width_type lut_width, size_type value) noexcept
Definition tinyutf8.h:867
bool requires_unicode() const noexcept
tinyutf8-specific features
Definition tinyutf8.h:2654
raw_const_reverse_iterator raw_rbegin() const noexcept
Definition tinyutf8.h:1657
size_type find_first_not_of(const value_type *str, size_type start_codepoint=0) const noexcept
Find absence of characters in string.
Definition tinyutf8.h:5001
bool operator==(const value_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2578
friend enable_if_ptr< T, value_type, basic_string > operator+(T &&lhs, basic_string rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1868
bool starts_with(T str, enable_if_ptr< T, value_type > *={}) const noexcept
Definition tinyutf8.h:2336
basic_string & insert(size_type pos, value_type cp) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1985
value_type at(size_type n, std::nothrow_t) const noexcept
Definition tinyutf8.h:1496
bool sso_inactive() const noexcept
Check, if sso is inactive (this operation doesn't require a negation and is faster)
Definition tinyutf8.h:1037
typename std::enable_if<(L > SSO::size), bool >::type enable_if_not_small_string
Definition tinyutf8.h:778
static void copy_lut_indicator(data_type *dest, const data_type *source) noexcept
Copy lut indicator.
Definition tinyutf8.h:813
friend enable_if_ptr< T, data_type, basic_string > operator+(T &&lhs, basic_string rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1867
bool starts_with(value_type cp) const noexcept
Definition tinyutf8.h:2301
basic_string & assign(basic_string &&str) noexcept(TINY_UTF8_NOEXCEPT &&std::is_nothrow_move_assignable< Allocator >())
Definition tinyutf8.h:1909
size_type length() const noexcept
Definition tinyutf8.h:1604
bool ends_with(const basic_string &str) const noexcept
Definition tinyutf8.h:2365
bool operator<(const data_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2600
bool operator<(const basic_string &str) const noexcept
Definition tinyutf8.h:2584
basic_string(size_type n, value_type cp, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
static width_type get_lut_width(size_type buffer_size) noexcept
Definition tinyutf8.h:819
allocator_type get_allocator() const noexcept(TINY_UTF8_NOEXCEPT &&std::is_nothrow_copy_constructible< Allocator >())
Definition tinyutf8.h:1451
bool ends_with(const data_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2406
basic_string(std::initializer_list< value_type > ilist, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1340
value_type operator[](size_type n) const noexcept
Definition tinyutf8.h:1573
bool ends_with(value_type cp) const noexcept
Definition tinyutf8.h:2385
friend basic_string operator+(basic_string lhs, const value_type(&rhs)[LITLEN]) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1862
friend basic_string operator+(const data_type(&lhs)[LITLEN], basic_string rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1869
friend basic_string operator+(const value_type(&lhs)[LITLEN], basic_string rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1870
reference operator[](size_type n) noexcept
Definition tinyutf8.h:1572
raw_const_iterator raw_get(size_type n) const noexcept
Definition tinyutf8.h:1543
std::uint_fast8_t width_type
Definition tinyutf8.h:715
size_type get_num_codepoints(size_type byte_start, size_type byte_count) const noexcept
Definition tinyutf8.h:3423
friend basic_string operator+(value_type lhs, basic_string rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1866
basic_string & replace(size_type index, size_type count, const basic_string &repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1797
typename std::enable_if< std::is_pointer< typename std::remove_reference< T >::type >::value && std::is_same< CharType, typename std::remove_cv< typename std::remove_pointer< typename std::remove_reference< T >::type >::type >::type >::value, _DataType >::type enable_if_ptr
Definition tinyutf8.h:782
basic_string & assign(const value_type *str, size_type len) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1944
basic_string raw_substr(size_type start_byte, size_type byte_count) const noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:3601
raw_iterator raw_begin() noexcept
Definition tinyutf8.h:1636
raw_codepoint_reference< basic_string, true > raw_checked_reference
Definition tinyutf8.h:713
static void set_lut_indiciator(data_type *lut_base_ptr, bool active, size_type lut_len=0) noexcept
Construct the lut mode indicator.
Definition tinyutf8.h:809
size_type find_first_of(const value_type *str, size_type start_codepoint=0) const noexcept
Find characters in string.
Definition tinyutf8.h:4918
friend basic_string operator+(basic_string lhs, const data_type(&rhs)[LITLEN]) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1861
size_type rfind(value_type cp, size_type start_codepoint=basic_string::npos) const noexcept
Definition tinyutf8.h:2240
void to_wide_literal(value_type *dest) const noexcept
Definition tinyutf8.h:2681
basic_string & operator=(basic_string &&str) noexcept(TINY_UTF8_NOEXCEPT &&std::is_nothrow_move_assignable< Allocator >())
Definition tinyutf8.h:1422
basic_string & assign(const basic_string &str) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1889
enable_if_ptr< T, data_type > operator>(T &&str) const noexcept
Definition tinyutf8.h:2590
void swap(basic_string &str) noexcept(TINY_UTF8_NOEXCEPT &&std::is_nothrow_move_assignable< Allocator >())
Definition tinyutf8.h:1466
basic_string & operator+=(value_type cp) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1843
int compare(const basic_string &str) const noexcept
Definition tinyutf8.h:2456
raw_const_reverse_iterator raw_crend() const noexcept
Definition tinyutf8.h:1703
basic_string & raw_insert(size_type pos, value_type cp) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2037
basic_string & raw_replace(size_type start_byte, size_type byte_count, const basic_string &repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:4364
int compare(const std::string &str) const noexcept
Definition tinyutf8.h:2473
basic_string & replace(raw_iterator first, iterator last, const basic_string &repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1786
DataType data_type
Definition tinyutf8.h:706
bool requires_unicode_sso() const noexcept
Check, whether the string contains codepoints > 127.
Definition tinyutf8.h:3384
raw_checked_reference raw_at(size_type byte_index) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1509
friend enable_if_ptr< T, value_type, basic_string > operator+(basic_string lhs, T &&rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1860
raw_reverse_iterator raw_rbegin() noexcept
Definition tinyutf8.h:1656
basic_string & assign(T &&str, enable_if_ptr< T, value_type > *={}) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1941
static void encode_utf8(value_type cp, data_type *dest, width_type cp_bytes) noexcept
Definition tinyutf8.h:963
codepoint_reference< basic_string, false > reference
Definition tinyutf8.h:710
size_type raw_rfind(value_type cp, size_type start_byte=basic_string::npos) const noexcept
Definition tinyutf8.h:4908
size_type find_last_of(const value_type *str, size_type start_codepoint=basic_string::npos) const noexcept
Definition tinyutf8.h:4956
friend enable_if_ptr< T, data_type, basic_string > operator+(basic_string lhs, T &&rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1859
raw_const_reverse_iterator raw_rend() const noexcept
Definition tinyutf8.h:1667
size_type get_sso_data_len() const noexcept
Get the data length (when SSO is active)
Definition tinyutf8.h:1031
basic_string & replace(size_type index, size_type len, value_type repl, size_type n) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1748
basic_string substr(iterator first, raw_iterator last) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2106
basic_string & erase(raw_iterator first, raw_iterator last) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2065
bool operator>=(const std::string &str) const noexcept
Definition tinyutf8.h:2587
enable_if_ptr< T, value_type > operator==(T &&str) const noexcept
Definition tinyutf8.h:2574
basic_string(data_type ch, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1363
basic_string & replace(raw_iterator first, iterator last, value_type repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1772
~basic_string() noexcept
Definition tinyutf8.h:1403
friend basic_string operator+(basic_string lhs, data_type rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1857
static value_type decode_utf8(const data_type *data, width_type num_bytes) noexcept
Decodes a given input of rle utf8 data to a unicode codepoint, given the number of bytes it's made of...
Definition tinyutf8.h:938
bool operator<(const value_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2604
checked_reference at(size_type n) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1499
tiny_utf8::iterator< basic_string, false > iterator
Definition tinyutf8.h:716
bool operator>=(const data_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2599
width_type get_index_pre_bytes(size_type byte_index) const noexcept
Get the number of bytes before a codepoint, that build up a new codepoint.
Definition tinyutf8.h:2618
basic_string & assign(const basic_string &str, size_type pos, size_type count) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1900
basic_string(std::basic_string< data_type, C, A > str, size_type len, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1221
basic_string(const data_type *str, size_type count, const allocator_type &alloc, tiny_utf8_detail::read_bytes_tag) noexcept(TINY_UTF8_NOEXCEPT)
basic_string & raw_insert(size_type pos, const basic_string &str) noexcept(TINY_UTF8_NOEXCEPT)
basic_string & append(const basic_string &appendix) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:3740
tiny_utf8::const_iterator< basic_string, true > raw_const_iterator
Definition tinyutf8.h:721
size_type find(const basic_string &pattern, size_type start_codepoint=0) const noexcept
Definition tinyutf8.h:2156
raw_reverse_iterator raw_rend() noexcept
Definition tinyutf8.h:1666
static width_type get_codepoint_bytes(value_type cp) noexcept
Definition tinyutf8.h:906
raw_const_iterator raw_begin() const noexcept
Definition tinyutf8.h:1637
const data_type * get_buffer() const noexcept
Get buffer.
Definition tinyutf8.h:1047
bool operator==(const data_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2576
value_type back() const noexcept
Definition tinyutf8.h:1719
raw_codepoint_reference< basic_string, false > raw_reference
Definition tinyutf8.h:712
size_type get_non_sso_capacity() const noexcept
Return a good guess of how many codepoints the currently allocated buffer can hold.
Definition tinyutf8.h:3362
data_type * data() noexcept
Definition tinyutf8.h:1594
bool ends_with(T str, enable_if_ptr< T, value_type > *={}) const noexcept
Definition tinyutf8.h:2417
const_iterator cbegin() const noexcept
Definition tinyutf8.h:1676
bool lut_active() const noexcept
Definition tinyutf8.h:2672
bool operator>(const std::string &str) const noexcept
Definition tinyutf8.h:2586
raw_const_reverse_iterator raw_rget(size_type n) const noexcept
Definition tinyutf8.h:1563
value_type operator()(size_type n) const noexcept
Definition tinyutf8.h:1583
size_type raw_find_last_not_of(const value_type *str, size_type start_byte=basic_string::npos) const noexcept
Definition tinyutf8.h:5071
static width_type get_codepoint_bytes(data_type first_byte, size_type data_left) noexcept
basic_string & replace(iterator first, iterator last, value_type repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1774
basic_string & erase(raw_iterator first, iterator last) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2068
size_type get_num_bytes_from_start(size_type cp_count) const noexcept
Definition tinyutf8.h:3488
basic_string(std::basic_string< data_type, C, A > str, size_type pos, size_type len, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1226
std::allocator_traits< Allocator >::size_type size_type
Definition tinyutf8.h:707
basic_string & raw_erase(size_type pos, size_type len) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:4779
iterator begin() noexcept
Definition tinyutf8.h:1634
basic_string & assign(T &&str, enable_if_ptr< T, data_type > *={}) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1919
basic_string(const basic_string &str) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1282
const_reverse_iterator crend() const noexcept
Definition tinyutf8.h:1702
basic_string & replace(size_type index, value_type repl, size_type n=1) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1736
NON_SSO t_non_sso
Definition tinyutf8.h:766
static size_type determine_main_buffer_size(size_type data_len, size_type lut_len, width_type *lut_width) noexcept
Determine the needed buffer size and the needed lut width (excluding the trailling LUT indicator)
Definition tinyutf8.h:838
basic_string operator+(const basic_string &summand) &&noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1854
basic_string(const data_type(&str)[LITLEN], const allocator_type &alloc=allocator_type(), enable_if_small_string< LITLEN >={}) noexcept(TINY_UTF8_NOEXCEPT &&std::is_nothrow_copy_constructible< Allocator >())
Definition tinyutf8.h:1156
bool operator<=(const value_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2605
static width_type encode_utf8(value_type cp, data_type *dest) noexcept
Definition tinyutf8.h:983
basic_string substr(size_type pos, size_type len=basic_string::npos) const noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2115
static size_type get_lut_len(const data_type *lut_base_ptr) noexcept
Get the LUT size (given the lut is active!)
Definition tinyutf8.h:877
raw_const_iterator raw_cbegin() const noexcept
Definition tinyutf8.h:1677
basic_string substr(raw_iterator first, iterator last) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2105
basic_string & assign(const value_type(&str)[LITLEN]) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1953
const data_type * data() const noexcept
Definition tinyutf8.h:1593
basic_string(const data_type *str, size_type pos, size_type len, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1142
size_type find(const data_type *pattern, size_type start_codepoint=0) const noexcept
Definition tinyutf8.h:2173
basic_string & assign(InputIt first, InputIt last) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1964
basic_string() noexcept(TINY_UTF8_NOEXCEPT &&std::is_nothrow_default_constructible< Allocator >())
Definition tinyutf8.h:1092
basic_string & pop_back() noexcept(TINY_UTF8_NOEXCEPT)
Removes the last codepoint in the basic_string.
Definition tinyutf8.h:2043
size_type find(value_type cp, size_type start_codepoint=0) const noexcept
Definition tinyutf8.h:2141
bool operator<(const std::string &str) const noexcept
Definition tinyutf8.h:2588
raw_reference raw_at(size_type byte_index, std::nothrow_t) noexcept
Definition tinyutf8.h:1510
friend basic_string operator+(data_type lhs, basic_string rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1865
static bool is_lut_worth(size_type pot_lut_len, size_type string_len, bool lut_present, bool biased=true) noexcept
Determine, whether or not a LUT is worth to set up. General case: worth below 25%....
Definition tinyutf8.h:831
bool ends_with(const std::string &str) const noexcept
Definition tinyutf8.h:2375
const value_type & const_reference
Definition tinyutf8.h:714
raw_iterator raw_end() noexcept
Definition tinyutf8.h:1645
size_type raw_find(const data_type *pattern, size_type start_byte=0) const noexcept
Definition tinyutf8.h:2222
raw_reference operator()(size_type n) noexcept
Definition tinyutf8.h:1582
int compare(const value_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2556
basic_string & insert(raw_iterator it, const basic_string &str) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2015
size_type get_non_sso_string_len() const noexcept
Get buffer size, if SSO is disabled.
Definition tinyutf8.h:1011
basic_string(const data_type(&str)[LITLEN], size_type pos, size_type len, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1196
static size_type get_lut(const data_type *iter, width_type lut_width) noexcept
Get the nth index within a multibyte index table.
Definition tinyutf8.h:859
void set_non_sso_string_len(size_type string_len) noexcept
Non-static helper methods.
Definition tinyutf8.h:992
value_type raw_at(size_type byte_index) const noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1511
bool operator>(const data_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2598
value_type front() const noexcept
Definition tinyutf8.h:1712
basic_string(const value_type *str, size_type len, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
enable_if_ptr< T, data_type > operator==(T &&str) const noexcept
Definition tinyutf8.h:2572
basic_string & assign(const data_type(&str)[LITLEN]) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1931
iterator end() noexcept
Definition tinyutf8.h:1643
iterator get(size_type n) noexcept
Definition tinyutf8.h:1532
basic_string & erase(raw_iterator pos) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2055
const_reverse_iterator rend() const noexcept
Definition tinyutf8.h:1665
enable_if_ptr< T, value_type > operator>(T &&str) const noexcept
Definition tinyutf8.h:2594
size_type raw_find(const basic_string &pattern, size_type start_byte=0) const noexcept
Definition tinyutf8.h:2206
static size_type round_up_to_align(size_type val) noexcept
Rounds the supplied value to a multiple of sizeof(size_type)
Definition tinyutf8.h:800
codepoint_reference< basic_string, true > checked_reference
Definition tinyutf8.h:711
size_type indicator_type
Definition tinyutf8.h:725
bool operator!=(const value_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2579
width_type get_codepoint_bytes(size_type codepoint_index) const noexcept
Definition tinyutf8.h:2612
SSO t_sso
Definition tinyutf8.h:765
size_type raw_back_index() const noexcept
Get the byte index of the last codepoint.
Definition tinyutf8.h:2628
raw_reverse_iterator raw_rget(size_type n) noexcept
Definition tinyutf8.h:1562
const_reverse_iterator crbegin() const noexcept
Definition tinyutf8.h:1694
width_type get_codepoint_pre_bytes(size_type codepoint_index) const noexcept
Definition tinyutf8.h:2622
bool sso_active() const noexcept
Definition tinyutf8.h:2664
reverse_iterator rend() noexcept
Definition tinyutf8.h:1664
basic_string(const basic_string &str, const allocator_type &alloc) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1302
size_type raw_find_first_not_of(const value_type *str, size_type start_byte=0) const noexcept
Definition tinyutf8.h:5021
int compare(const data_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2513
static size_type determine_main_buffer_size(size_type data_len) noexcept
Determine the needed buffer size if the lut is empty (excluding the trailling LUT indicator)
Definition tinyutf8.h:849
bool operator==(const std::string &str) const noexcept
Definition tinyutf8.h:2570
const_iterator get(size_type n) const noexcept
Definition tinyutf8.h:1533
static width_type decode_utf8_and_len(const data_type *data, value_type &dest, size_type data_left) noexcept
Definition tinyutf8.h:952
basic_string & operator+=(const basic_string &appendix) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1833
std::basic_string< data_type > cpp_str(bool prepend_bom=false) const noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2694
bool ends_with(T str, enable_if_ptr< T, data_type > *={}) const noexcept
Definition tinyutf8.h:2395
size_type capacity() const noexcept
Definition tinyutf8.h:1482
bool ends_with(const value_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2434
bool operator!=(const data_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2577
basic_string(const data_type(&str)[LITLEN], const allocator_type &alloc=allocator_type(), enable_if_not_small_string< LITLEN >={}) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1169
bool starts_with(const std::string &str) const noexcept
Definition tinyutf8.h:2291
void deallocate(data_type *buffer, size_type buffer_size) const noexcept
Allocates size_type-aligned storage (make sure, buffer_size is a multiple of sizeof(size_type)!...
Definition tinyutf8.h:1071
basic_string & insert(raw_iterator it, value_type cp) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2005
const_iterator end() const noexcept
Definition tinyutf8.h:1644
data_type * get_buffer() noexcept
Definition tinyutf8.h:1048
basic_string & replace(raw_iterator first, iterator last, value_type repl, size_type n) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1769
basic_string & prepend(const basic_string &prependix) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1824
basic_string(basic_string &&str, const allocator_type &alloc) noexcept(TINY_UTF8_NOEXCEPT &&std::is_nothrow_copy_constructible< Allocator >())
Definition tinyutf8.h:1389
basic_string & operator=(const basic_string &str) noexcept(TINY_UTF8_NOEXCEPT)
basic_string & insert(size_type pos, const basic_string &str) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1995
std::allocator_traits< Allocator >::difference_type difference_type
Definition tinyutf8.h:708
basic_string(const data_type(&str)[LITLEN], size_type len, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1182
bool operator>(const value_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2602
raw_iterator raw_get(size_type n) noexcept
Definition tinyutf8.h:1542
value_type raw_at(size_type byte_index, std::nothrow_t) const noexcept
Definition tinyutf8.h:1520
std::basic_string< data_type > cpp_str_bom() const noexcept
Returns an std::string with the UTF-8 BOM prepended.
Definition tinyutf8.h:3405
basic_string & replace(iterator first, iterator last, value_type repl, size_type n) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1771
reference at(size_type n, std::nothrow_t) noexcept
Definition tinyutf8.h:1500
size_type size() const noexcept
Definition tinyutf8.h:1614
Allocator allocator_type
Definition tinyutf8.h:724
tiny_utf8::const_iterator< basic_string, false > const_iterator
Definition tinyutf8.h:717
bool operator>(const basic_string &str) const noexcept
Lexicographical comparison Operators.
Definition tinyutf8.h:2582
basic_string & replace(iterator first, raw_iterator last, value_type repl, size_type n) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1770
friend basic_string operator+(basic_string lhs, value_type rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1858
enable_if_ptr< T, data_type > operator!=(T &&str) const noexcept
Definition tinyutf8.h:2573
tiny_utf8::const_reverse_iterator< basic_string, true > raw_const_reverse_iterator
Definition tinyutf8.h:723
raw_const_iterator raw_cend() const noexcept
Definition tinyutf8.h:1685
bool operator<=(const basic_string &str) const noexcept
Definition tinyutf8.h:2585
static const data_type * get_lut_base_ptr(const data_type *buffer, size_type buffer_size) noexcept
Definition tinyutf8.h:806
basic_string(const data_type *str, size_type pos, size_type count, size_type data_left, const allocator_type &alloc, tiny_utf8_detail::read_codepoints_tag) noexcept(TINY_UTF8_NOEXCEPT)
Constructs an basic_string from a character literal.
basic_string(const allocator_type &alloc) noexcept(TINY_UTF8_NOEXCEPT &&std::is_nothrow_copy_constructible< Allocator >())
Definition tinyutf8.h:1102
value_type at(size_type n) const noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1493
basic_string & push_back(value_type cp) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1842
Helper to detect little endian.
Definition tinyutf8.h:173
static constexpr bool value
Definition tinyutf8.h:177
std::size_t strlen< char >(const char *str)
Definition tinyutf8.h:202
std::size_t strlen(const T *str)
strlen for different character types
Definition tinyutf8.h:201
Want global declarations?
Definition tinyutf8.h:113
iterator< Container, true >::difference_type operator-(const iterator< Container, false > &lhs, const iterator< Container, false > &rhs) noexcept
Definition tinyutf8.h:672
basic_string< char32_t, char > utf8_string
Definition tinyutf8.h:124
std::size_t operator()(const tiny_utf8::basic_string< V, D, A > &string) const noexcept
Definition tinyutf8.h:2705
Layout specifications.
Definition tinyutf8.h:738
size_type data_len
Definition tinyutf8.h:740
size_type buffer_size
Definition tinyutf8.h:741
size_type string_len
Definition tinyutf8.h:742
data_type * data
Definition tinyutf8.h:739
Definition tinyutf8.h:747
SSO() noexcept
Definition tinyutf8.h:756
SSO(data_type value) noexcept
Definition tinyutf8.h:752
data_type data[size]
Definition tinyutf8.h:749
unsigned char data_len
Definition tinyutf8.h:750
@ size
Definition tinyutf8.h:748
Definition tinyutf8.h:208
Container::size_type t_index
Definition tinyutf8.h:209
codepoint_reference(typename Container::size_type index, Container *instance) noexcept
Ctor.
Definition tinyutf8.h:215
codepoint_reference & operator=(const codepoint_reference &ref) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:236
codepoint_reference & operator*() const noexcept
Dereference operator to act as pointer type.
Definition tinyutf8.h:229
Container * t_instance
Definition tinyutf8.h:210
codepoint_reference & operator=(typename Container::value_type cp) noexcept(TINY_UTF8_NOEXCEPT)
Assignment operator.
Definition tinyutf8.h:232
Definition tinyutf8.h:495
const_iterator(typename iterator_base< Container, Raw >::difference_type index, const Container *instance) noexcept
Ctor.
Definition tinyutf8.h:497
const_iterator(const iterator< Container, Raw > &other) noexcept
Ctor from non const.
Definition tinyutf8.h:502
const_iterator(const iterator< Container, !Raw > &other) noexcept
Definition tinyutf8.h:505
const_iterator() noexcept=default
Default Functions.
Definition tinyutf8.h:596
const_reverse_iterator(const reverse_iterator< Container, Raw > &other) noexcept
Ctor from non const.
Definition tinyutf8.h:603
const_reverse_iterator(typename iterator_base< Container, Raw >::difference_type index, const Container *instance) noexcept
Ctor.
Definition tinyutf8.h:598
const_reverse_iterator(const const_iterator< Container, Raw > &other) noexcept
Ctor from normal iterator.
Definition tinyutf8.h:611
const_reverse_iterator(const const_iterator< Container, !Raw > &other) noexcept
Definition tinyutf8.h:614
const_reverse_iterator(const reverse_iterator< Container, !Raw > &other) noexcept
Definition tinyutf8.h:606
const_iterator< Container, Raw > base() const noexcept
Get the underlying iterator instance.
Definition tinyutf8.h:627
const_reverse_iterator() noexcept=default
Default Functions.
iterator_base(iterator_base< Container, false > other) noexcept
Constructor from non-raw iterator.
Definition tinyutf8.h:375
iterator_base(difference_type index, Container *instance) noexcept
Ctor.
Definition tinyutf8.h:361
bool operator!=(const iterator_base &it) const noexcept
Definition tinyutf8.h:358
void * pointer
Definition tinyutf8.h:354
iterator_base() noexcept=default
Default function.
raw_codepoint_reference< Container, false > reference
Definition tinyutf8.h:353
Container::value_type value_type
Definition tinyutf8.h:351
difference_type t_index
Definition tinyutf8.h:397
void advance(difference_type n) noexcept
Advance the iterator n times (negative values allowed!)
Definition tinyutf8.h:403
value_type get_value() const noexcept
Get the value that the iterator points to.
Definition tinyutf8.h:393
void increment() noexcept
Move the iterator one codepoint ahead.
Definition tinyutf8.h:414
difference_type get_index() const noexcept
Definition tinyutf8.h:384
void decrement() noexcept
Move the iterator one codepoint backwards.
Definition tinyutf8.h:417
bool operator==(const iterator_base &it) const noexcept
Definition tinyutf8.h:357
difference_type get_raw_index() const noexcept
Get the index of the codepoint the iterator points to.
Definition tinyutf8.h:387
std::bidirectional_iterator_tag iterator_category
Definition tinyutf8.h:355
Container::difference_type difference_type
Definition tinyutf8.h:352
reference get_reference() const noexcept
Get a reference to the codepoint the iterator points to.
Definition tinyutf8.h:390
Definition tinyutf8.h:284
iterator_base(difference_type index, Container *instance) noexcept
Ctor.
Definition tinyutf8.h:300
difference_type get_raw_index() const noexcept
Get the index of the codepoint the iterator points to.
Definition tinyutf8.h:317
difference_type get_index() const noexcept
Definition tinyutf8.h:314
difference_type t_index
Definition tinyutf8.h:327
Container::difference_type difference_type
Definition tinyutf8.h:291
void increment() noexcept
Move the iterator one codepoint ahead.
Definition tinyutf8.h:336
Container * t_instance
Definition tinyutf8.h:328
std::random_access_iterator_tag iterator_category
Definition tinyutf8.h:294
iterator_base() noexcept=default
Default function.
Container * get_instance() const noexcept
Getter for the instance.
Definition tinyutf8.h:311
void * pointer
Definition tinyutf8.h:293
codepoint_reference< Container, false > reference
Definition tinyutf8.h:292
Container::value_type value_type
Definition tinyutf8.h:290
bool operator==(const iterator_base &it) const noexcept
Definition tinyutf8.h:296
bool operator!=(const iterator_base &it) const noexcept
Definition tinyutf8.h:297
value_type get_value() const noexcept
Get the value that the iterator points to.
Definition tinyutf8.h:323
reference get_reference() const noexcept
Get a reference to the codepoint the iterator points to.
Definition tinyutf8.h:320
void advance(difference_type n) noexcept
Advance the iterator n times (negative values allowed!)
Definition tinyutf8.h:333
void decrement() noexcept
Move the iterator one codepoint backwards.
Definition tinyutf8.h:339
Definition tinyutf8.h:427
iterator operator+(typename iterator_base< Container, Raw >::difference_type n) const noexcept
Increase the Iterator n times.
Definition tinyutf8.h:468
iterator() noexcept=default
Default Functions.
iterator::reference operator*() const noexcept
Returns the value of the codepoint behind the iterator.
Definition tinyutf8.h:490
iterator & operator--() noexcept
Decrease the iterator by one.
Definition tinyutf8.h:457
iterator & operator-=(typename iterator_base< Container, Raw >::difference_type n) noexcept
Definition tinyutf8.h:484
iterator operator--(int) noexcept
Definition tinyutf8.h:461
iterator & operator+=(typename iterator_base< Container, Raw >::difference_type n) noexcept
Definition tinyutf8.h:473
iterator operator-(typename iterator_base< Container, Raw >::difference_type n) const noexcept
Decrease the Iterator n times.
Definition tinyutf8.h:479
iterator(const iterator< Container, !Raw > &other) noexcept
Definition tinyutf8.h:432
iterator(typename iterator_base< Container, Raw >::difference_type index, Container *instance) noexcept
Ctor.
Definition tinyutf8.h:429
iterator operator++(int) noexcept
Definition tinyutf8.h:450
Definition tinyutf8.h:241
raw_codepoint_reference(const codepoint_reference< Container, RC > &reference) noexcept
Definition tinyutf8.h:253
raw_codepoint_reference & operator=(typename Container::value_type cp) noexcept(TINY_UTF8_NOEXCEPT)
Assignment operator.
Definition tinyutf8.h:274
Container::size_type t_index
Definition tinyutf8.h:242
raw_codepoint_reference & operator*() const noexcept
Dereference operator to act as pointer type.
Definition tinyutf8.h:267
Container * t_instance
Definition tinyutf8.h:243
raw_codepoint_reference(typename Container::size_type raw_index, Container *instance) noexcept
Ctors.
Definition tinyutf8.h:248
raw_codepoint_reference & operator=(const raw_codepoint_reference &ref) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:278
Definition tinyutf8.h:520
reverse_iterator & operator+=(typename iterator_base< Container, Raw >::difference_type n) noexcept
Definition tinyutf8.h:571
reverse_iterator operator++(int) noexcept
Definition tinyutf8.h:548
reverse_iterator operator--(int) noexcept
Definition tinyutf8.h:559
reverse_iterator(const iterator< Container, !Raw > &other) noexcept
Definition tinyutf8.h:530
reverse_iterator & operator-=(typename iterator_base< Container, Raw >::difference_type n) noexcept
Definition tinyutf8.h:582
reverse_iterator(typename iterator_base< Container, Raw >::difference_type index, Container *instance) noexcept
Ctor.
Definition tinyutf8.h:522
reverse_iterator(const iterator< Container, Raw > &other) noexcept
Ctor from normal iterator.
Definition tinyutf8.h:527
reverse_iterator() noexcept=default
Default Functions.
reverse_iterator & operator--() noexcept
Decrease the Iterator by one.
Definition tinyutf8.h:555
reverse_iterator operator+(typename iterator_base< Container, Raw >::difference_type n) const noexcept
Increase the Iterator n times.
Definition tinyutf8.h:566
iterator< Container, Raw > base() const noexcept
Get the underlying iterator instance.
Definition tinyutf8.h:591
iterator< Container, Raw >::reference operator*() const noexcept
Returns the value of the codepoint behind the iterator.
Definition tinyutf8.h:588
reverse_iterator operator-(typename iterator_base< Container, Raw >::difference_type n) const noexcept
Decrease the Iterator n times.
Definition tinyutf8.h:577
Definition tinyutf8.h:138
Definition tinyutf8.h:137
#define TINY_UTF8_FALLTHROUGH
Determine the way to inform about fallthrough behavior.
Definition tinyutf8.h:73
#define TINY_UTF8_CPP17(...)
Remove Warnings, since it is wrong for all cases in this file.
Definition tinyutf8.h:94
std::istream & operator>>(std::istream &stream, tiny_utf8::basic_string< V, D, A > &str) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2724
#define TINY_UTF8_NOEXCEPT
Determine noexcept specifications.
Definition tinyutf8.h:102
std::ostream & operator<<(std::ostream &stream, const tiny_utf8::basic_string< V, D, A > &str) noexcept(TINY_UTF8_NOEXCEPT)
Stream Operations.
Definition tinyutf8.h:2720
#define TINY_UTF8_THROW(...)
Determine the mode of error handling.
Definition tinyutf8.h:53
char last
Definition tinyutf8.h:195
T number
Definition tinyutf8.h:193
Helper to modify the last (address-wise) byte of a little endian value of type 'T'.
Definition tinyutf8.h:183
struct tiny_utf8::tiny_utf8_detail::last_byte::@0 bytes
T number
Definition tinyutf8.h:184
char last
Definition tinyutf8.h:187
char dummy[sizeof(T) -1]
Definition tinyutf8.h:186