tinyutf8 4.0.2
Unicode (UTF-8) capable std::string
Loading...
Searching...
No Matches
tinyutf8.h
Go to the documentation of this file.
1
28#ifndef _TINY_UTF8_H_
29#define _TINY_UTF8_H_
30
31// Includes
32#include <memory> // for std::unique_ptr
33#include <cstring> // for std::memcpy, std::memmove
34#include <string> // for std::string
35#include <limits> // for std::numeric_limits
36#include <functional> // for std::hash
37#include <algorithm> // for std::min, std::max
38#include <type_traits> // for std::is_*
39#include <cstddef> // for std::size_t and offsetof
40#include <cstdint> // for std::uint8_t, std::uint16_t, std::uint32_t, std::uint_least16_t, std::uint_fast32_t
41#include <initializer_list> // for std::initializer_list
42#include <iosfwd> // for std::ostream and std::istream forward declarations
43#ifdef _MSC_VER
44#include <intrin.h> // for _BitScanReverse, _BitScanReverse64
45#endif
46
48#ifndef TINY_UTF8_THROW
49 #if defined(__cpp_exceptions) && !defined(TINY_UTF8_NOEXCEPT)
50 #include <stdexcept> // for std::out_of_range
51 #define TINY_UTF8_THROW( LOCATION , FAILING_PREDICATE ) throw std::out_of_range( LOCATION ": " #FAILING_PREDICATE )
52 #else
53 #define TINY_UTF8_THROW( ... ) void()
54 #endif
55#endif
56
58#if defined(_MSC_VER) && defined(_MSVC_LANG)
59 #define TINY_UTF8_CPLUSPLUS _MSVC_LANG
60#else
61 #define TINY_UTF8_CPLUSPLUS __cplusplus
62#endif
63
65#if TINY_UTF8_CPLUSPLUS >= 201703L
66 #define TINY_UTF8_FALLTHROUGH [[fallthrough]];
67#elif defined(__clang__)
68 // Clang does not warn about implicit fallthrough
69 #define TINY_UTF8_FALLTHROUGH
70#elif defined(__GNUC__) && __GNUG__ > 6
71 #define TINY_UTF8_FALLTHROUGH [[gnu::fallthrough]];
72#else
73 #define TINY_UTF8_FALLTHROUGH /* fall through */
74#endif
75
77#if defined(__clang__)
78 #pragma clang diagnostic push
79 // #pragma clang diagnostic ignored "-Wmaybe-uninitialized" // Clang is missing it. See https://bugs.llvm.org/show_bug.cgi?id=24979
80#elif defined(__GNUC__)
81 #pragma GCC diagnostic push
82#elif defined(_MSC_VER)
83 #pragma warning(push)
84 #pragma warning(disable:4701) // Maybe unitialized
85 #pragma warning(disable:4702) // Unreachable code after call to TINY_UTF8_THROW()
86 #pragma warning(disable:4703) // Maybe unitialized
87 #pragma warning(disable:26819) // Implicit Fallthrough
88#endif
89
91#if TINY_UTF8_CPLUSPLUS >= 201703L
92 #define TINY_UTF8_CPP17( ... ) __VA_ARGS__
93#else
94 #define TINY_UTF8_CPP17( ... )
95#endif
96
98#if defined(TINY_UTF8_NOEXCEPT)
99 #undef TINY_UTF8_NOEXCEPT
100 #define TINY_UTF8_NOEXCEPT true
101#elif !defined(__cpp_exceptions)
102 #define TINY_UTF8_NOEXCEPT true
103#else
104 #define TINY_UTF8_NOEXCEPT false
105#endif
106
108#ifdef TINY_UTF8_GLOBAL_NAMESPACE
109inline
110#endif
111
112namespace tiny_utf8
113{
114 // Forward Declaration
115 template<
116 typename ValueType = char32_t
117 , typename DataType = char
118 , typename Allocator = std::allocator<DataType>
119 >
120 class basic_string;
121
124 using utf8_string = basic_string<char32_t, char>; // For backwards compatibility
125
127 #if defined(__cpp_char8_t)
129 #else
131 #endif
132
134 namespace tiny_utf8_detail
135 {
136 // Used for tag dispatching in constructor
139
141 #if defined(__GNUC__)
142 #define TINY_UTF8_HAS_CLZ true
143 static inline unsigned int clz( unsigned int value ) noexcept { return (unsigned int)__builtin_clz( value ); }
144 static inline unsigned int clz( unsigned long int value ) noexcept { return (unsigned int)__builtin_clzl( value ); }
145 static inline unsigned int clz( char32_t value ) noexcept {
146 return sizeof(char32_t) == sizeof(unsigned long int) ? (unsigned int)__builtin_clzl( value ) : (unsigned int)__builtin_clz( value );
147 }
148 #elif defined(_MSC_VER)
149 #define TINY_UTF8_HAS_CLZ true
150 template<typename T>
151 static inline unsigned int lzcnt( T value ) noexcept {
152 unsigned long value_log2;
153 #if INTPTR_MAX >= INT64_MAX
154 _BitScanReverse64( &value_log2 , value );
155 #else
156 _BitScanReverse( &value_log2 , value );
157 #endif
158 return sizeof(T) * 8 - value_log2 - 1;
159 }
160 static inline unsigned int clz( std::uint16_t value ) noexcept { return lzcnt( value ); }
161 static inline unsigned int clz( std::uint32_t value ) noexcept { return lzcnt( value ); }
162 #if INTPTR_MAX >= INT64_MAX
163 static inline unsigned int clz( std::uint64_t value ) noexcept { return lzcnt( value ); }
164 #endif
165 static inline unsigned int clz( char32_t value ) noexcept { return lzcnt( value ); }
166 #else
167 #define TINY_UTF8_HAS_CLZ false
168 #endif
169
170
173 {
174 constexpr static std::uint32_t u4 = 1;
175 constexpr static std::uint8_t u1 = (const std::uint8_t &) u4;
176 public:
177 constexpr static bool value = u1;
178 };
179
181 template<typename T, std::size_t = sizeof(T)>
183 {
185 struct{
186 char dummy[sizeof(T)-1];
187 char last;
189 };
190 template<typename T>
191 union last_byte<T, 1>
192 {
194 struct{
195 char last;
197 };
198
200 template<typename T>
201 inline std::size_t strlen( const T* str ){ std::size_t len = 0u; while( *str++ ) ++len; return len; }
202 template<> inline std::size_t strlen<char>( const char* str ){ return std::strlen( str ); }
203 }
204
205
206 template<typename Container, bool RangeCheck>
208 {
209 typename Container::size_type t_index;
210 Container* t_instance;
211
212 public:
213
215 codepoint_reference( typename Container::size_type index , Container* instance ) noexcept :
216 t_index( index )
217 , t_instance( instance )
218 {}
219
221 operator typename Container::value_type() const noexcept( TINY_UTF8_NOEXCEPT || RangeCheck == false ) {
222 if TINY_UTF8_CPP17(constexpr) ( RangeCheck )
223 return static_cast<const Container*>(t_instance)->at( t_index );
224 else
225 return static_cast<const Container*>(t_instance)->at( t_index , std::nothrow );
226 }
227
229 codepoint_reference& operator*() const noexcept { return *this; }
230
232 codepoint_reference& operator=( typename Container::value_type cp ) noexcept(TINY_UTF8_NOEXCEPT) {
233 t_instance->replace( t_index , cp );
234 return *this;
235 }
236 codepoint_reference& operator=( const codepoint_reference& ref ) noexcept(TINY_UTF8_NOEXCEPT) { return *this = (typename Container::value_type)ref; }
237 };
238
239 template<typename Container, bool RangeCheck>
241 {
242 typename Container::size_type t_index;
243 Container* t_instance;
244
245 public:
246
248 raw_codepoint_reference( typename Container::size_type raw_index , Container* instance ) noexcept :
249 t_index( raw_index )
250 , t_instance( instance )
251 {}
252 template<bool RC>
253 explicit raw_codepoint_reference( const codepoint_reference<Container, RC>& reference ) noexcept :
254 t_index( reference.t_instance->get_num_bytes_from_start( reference.t_index ) )
255 , t_instance( reference.t_instance )
256 {}
257
259 operator typename Container::value_type() const noexcept( TINY_UTF8_NOEXCEPT || RangeCheck == false ) {
260 if TINY_UTF8_CPP17(constexpr) ( RangeCheck )
261 return static_cast<const Container*>(t_instance)->raw_at( t_index );
262 else
263 return static_cast<const Container*>(t_instance)->raw_at( t_index , std::nothrow );
264 }
265
267 raw_codepoint_reference& operator*() const noexcept { return *this; }
268
270 template<bool RC>
271 explicit operator codepoint_reference<Container, RC>() const noexcept { return { t_instance->get_num_codepoints( 0 , t_index ) , t_instance }; }
272
274 raw_codepoint_reference& operator=( typename Container::value_type cp ) noexcept(TINY_UTF8_NOEXCEPT) {
275 t_instance->raw_replace( t_index , t_instance->get_index_bytes( t_index ) , Container( cp ) );
276 return *this;
277 }
278 raw_codepoint_reference& operator=( const raw_codepoint_reference& ref ) noexcept(TINY_UTF8_NOEXCEPT) { return *this = (typename Container::value_type)ref; }
279 };
280
281 // Codepoint-based iterator base
282 template<typename Container, bool Raw>
284 {
285 template<typename, typename, typename>
286 friend class basic_string;
287
288 public:
289
290 typedef typename Container::value_type value_type;
291 typedef typename Container::difference_type difference_type;
293 typedef void* pointer;
294 typedef std::random_access_iterator_tag iterator_category;
295
296 bool operator==( const iterator_base& it ) const noexcept { return t_index == it.t_index; }
297 bool operator!=( const iterator_base& it ) const noexcept { return t_index != it.t_index; }
298
300 iterator_base( difference_type index , Container* instance ) noexcept :
301 t_index( index )
302 , t_instance( instance )
303 {}
304
306 iterator_base() noexcept = default;
307 iterator_base( const iterator_base& ) noexcept = default;
308 iterator_base& operator=( const iterator_base& ) noexcept = default;
309
311 Container* get_instance() const noexcept { return t_instance; }
312
313 // Getter for the iterator index
314 difference_type get_index() const noexcept { return t_index; }
315
317 difference_type get_raw_index() const noexcept { return t_instance->get_num_bytes_from_start( t_index ); }
318
320 reference get_reference() const noexcept { return t_instance->at( t_index , std::nothrow ); }
321
323 value_type get_value() const noexcept { return static_cast<const Container*>(t_instance)->at( t_index ); }
324
325 protected:
326
328 Container* t_instance = nullptr;
329
330 protected:
331
333 void advance( difference_type n ) noexcept { t_index += n; }
334
336 void increment() noexcept { t_index++; }
337
339 void decrement() noexcept { t_index--; }
340 };
341
342 // (Raw) Byte-based iterator base
343 template<typename Container>
344 struct iterator_base<Container, true>
345 {
346 template<typename, typename, typename>
347 friend class basic_string;
348
349 public:
350
351 typedef typename Container::value_type value_type;
352 typedef typename Container::difference_type difference_type;
354 typedef void* pointer;
355 typedef std::bidirectional_iterator_tag iterator_category;
356
357 bool operator==( const iterator_base& it ) const noexcept { return t_index == it.t_index; }
358 bool operator!=( const iterator_base& it ) const noexcept { return t_index != it.t_index; }
359
361 iterator_base( difference_type index , Container* instance ) noexcept :
362 t_index( index )
363 , t_instance( instance )
364 {}
365
367 iterator_base() noexcept = default;
368 iterator_base( const iterator_base& ) noexcept = default;
369 iterator_base& operator=( const iterator_base& ) noexcept = default;
370
372 Container* get_instance() const noexcept { return t_instance; }
373
376 t_index( other.get_raw_index() )
377 , t_instance( other.get_instance() )
378 {}
379
381 operator iterator_base<Container, false>() const noexcept { return { this->get_index() , t_instance }; }
382
383 // Getter for the iterator index
384 difference_type get_index() const noexcept { return t_instance->get_num_codepoints( 0 , t_index ); }
385
387 difference_type get_raw_index() const noexcept { return t_index; }
388
390 reference get_reference() const noexcept { return t_instance->raw_at( t_index , std::nothrow ); }
391
393 value_type get_value() const noexcept { return static_cast<const Container*>(t_instance)->raw_at( t_index ); }
394
395 protected:
396
398 Container* t_instance = nullptr;
399
400 protected:
401
403 void advance( difference_type n ) noexcept {
404 if( n > 0 )
405 do
406 increment();
407 while( --n > 0 );
408 else
409 while( n++ < 0 )
410 decrement();
411 }
412
414 void increment() noexcept { t_index += t_instance->get_index_bytes( t_index ); }
415
417 void decrement() noexcept { t_index -= t_instance->get_index_pre_bytes( t_index ); }
418 };
419
420 template<typename Container, bool Raw> struct iterator;
421 template<typename Container, bool Raw> struct const_iterator;
422 template<typename Container, bool Raw> struct reverse_iterator;
423 template<typename Container, bool Raw> struct const_reverse_iterator;
424
425 template<typename Container, bool Raw = false>
426 struct iterator : iterator_base<Container, Raw>
427 {
429 iterator( typename iterator_base<Container, Raw>::difference_type index , Container* instance ) noexcept :
430 iterator_base<Container, Raw>( index , instance )
431 {}
432 iterator( const iterator<Container, !Raw>& other ) noexcept :
434 {}
435
437 iterator() noexcept = default;
438 iterator( const iterator& ) noexcept = default;
439 iterator& operator=( const iterator& ) noexcept = default;
440
442 iterator( const const_iterator<Container, Raw>& ) = delete;
443 iterator( const const_reverse_iterator<Container, Raw>& ) = delete;
444
446 iterator& operator++() noexcept { // prefix ++iter
447 this->increment();
448 return *this;
449 }
450 iterator operator++( int ) noexcept { // postfix iter++
451 iterator tmp{ this->t_index , this->t_instance };
452 this->increment();
453 return tmp;
454 }
455
457 iterator& operator--() noexcept { // prefix --iter
458 this->decrement();
459 return *this;
460 }
461 iterator operator--( int ) noexcept { // postfix iter--
462 iterator tmp{ this->t_index , this->t_instance };
463 this->decrement();
464 return tmp;
465 }
466
469 iterator it{*this};
470 it.advance( n );
471 return it;
472 }
474 this->advance( n );
475 return *this;
476 }
477
480 iterator it{*this};
481 it.advance( -n );
482 return it;
483 }
485 this->advance( -n );
486 return *this;
487 }
488
490 typename iterator::reference operator*() const noexcept { return this->get_reference(); }
491 };
492
493 template<typename Container, bool Raw>
494 struct const_iterator : iterator<Container, Raw>
495 {
497 const_iterator( typename iterator_base<Container, Raw>::difference_type index , const Container* instance ) noexcept :
498 iterator<Container, Raw>( index , const_cast<Container*>(instance) )
499 {}
500
502 const_iterator( const iterator<Container, Raw>& other ) noexcept :
504 {}
505 const_iterator( const iterator<Container, !Raw>& other ) noexcept :
507 {}
508
510 const_iterator() noexcept = default;
511 const_iterator( const const_iterator& ) noexcept = default;
512 const_iterator& operator=( const const_iterator& ) noexcept = default;
513
515 typename iterator<Container, Raw>::value_type operator*() const noexcept { return this->get_value(); }
516 };
517
518 template<typename Container, bool Raw>
519 struct reverse_iterator : iterator_base<Container, Raw>
520 {
522 reverse_iterator( typename iterator_base<Container, Raw>::difference_type index , Container* instance ) noexcept :
523 iterator_base<Container, Raw>( index , instance )
524 {}
525
529 {}
532 {}
533
535 reverse_iterator() noexcept = default;
536 reverse_iterator( const reverse_iterator& ) noexcept = default;
537 reverse_iterator& operator=( const reverse_iterator& ) noexcept = default;
538
540 reverse_iterator( const const_iterator<Container, Raw>& ) = delete;
541 reverse_iterator( const const_reverse_iterator<Container, Raw>& ) = delete;
542
544 reverse_iterator& operator++() noexcept { // prefix ++iter
545 this->decrement();
546 return *this;
547 }
548 reverse_iterator operator++( int ) noexcept { // postfix iter++
549 reverse_iterator tmp{ this->t_index , this->t_instance };
550 this->decrement();
551 return tmp;
552 }
553
555 reverse_iterator& operator--() noexcept { // prefix --iter
556 this->increment();
557 return *this;
558 }
559 reverse_iterator operator--( int ) noexcept { // postfix iter--
560 reverse_iterator tmp{ this->t_index , this->t_instance };
561 this->increment();
562 return tmp;
563 }
564
567 reverse_iterator it{*this};
568 it.advance( -n );
569 return it;
570 }
572 this->advance( -n );
573 return *this;
574 }
575
578 reverse_iterator it{*this};
579 it.advance( n );
580 return it;
581 }
583 this->advance( n );
584 return *this;
585 }
586
588 typename iterator<Container, Raw>::reference operator*() const noexcept { return this->get_reference(); }
589
591 iterator<Container, Raw> base() const noexcept { return { this->t_index , this->t_instance }; }
592 };
593
594 template<typename Container, bool Raw>
596 {
598 const_reverse_iterator( typename iterator_base<Container, Raw>::difference_type index , const Container* instance ) noexcept :
599 reverse_iterator<Container, Raw>( index , const_cast<Container*>(instance) )
600 {}
601
609
617
619 const_reverse_iterator() noexcept = default;
620 const_reverse_iterator( const const_reverse_iterator& ) noexcept = default;
621 const_reverse_iterator& operator=( const const_reverse_iterator& ) noexcept = default;
622
624 typename iterator<Container, Raw>::value_type operator*() const noexcept { return this->get_value(); }
625
627 const_iterator<Container, Raw> base() const noexcept { return { this->t_index , this->t_instance }; }
628 };
629
630
632 // Non-raw and "don't care"
633 template<typename Container, bool Raw>
634 static inline bool operator>( const const_iterator<Container, false>& lhs , const const_iterator<Container, Raw>& rhs ) noexcept { return lhs.get_index() > rhs.get_index(); }
635 template<typename Container, bool Raw>
636 static inline bool operator>( const const_reverse_iterator<Container, false>& lhs , const const_reverse_iterator<Container, Raw>& rhs ) noexcept { return lhs.get_index() < rhs.get_index(); }
637 template<typename Container, bool Raw>
638 static inline bool operator>=( const const_iterator<Container, false>& lhs , const const_iterator<Container, Raw>& rhs ) noexcept { return lhs.get_index() >= rhs.get_index(); }
639 template<typename Container, bool Raw>
640 static inline bool operator>=( const const_reverse_iterator<Container, false>& lhs , const const_reverse_iterator<Container, Raw>& rhs ) noexcept { return lhs.get_index() <= rhs.get_index(); }
641 template<typename Container, bool Raw>
642 static inline bool operator<( const const_iterator<Container, false>& lhs , const const_iterator<Container, Raw>& rhs ) noexcept { return lhs.get_index() < rhs.get_index(); }
643 template<typename Container, bool Raw>
644 static inline bool operator<( const const_reverse_iterator<Container, false>& lhs , const const_reverse_iterator<Container, Raw>& rhs ) noexcept { return lhs.get_index() > rhs.get_index(); }
645 template<typename Container, bool Raw>
646 static inline bool operator<=( const const_iterator<Container, false>& lhs , const const_iterator<Container, Raw>& rhs ) noexcept { return lhs.get_index() <= rhs.get_index(); }
647 template<typename Container, bool Raw>
648 static inline bool operator<=( const const_reverse_iterator<Container, false>& lhs , const const_reverse_iterator<Container, Raw>& rhs ) noexcept { return lhs.get_index() >= rhs.get_index(); }
649 // Raw and "don't care"
650 template<typename Container, bool Raw>
651 static inline bool operator>( const const_iterator<Container, true>& lhs , const const_iterator<Container, Raw>& rhs ) noexcept { return lhs.get_raw_index() > rhs.get_raw_index(); }
652 template<typename Container, bool Raw>
653 static inline bool operator>( const const_reverse_iterator<Container, true>& lhs , const const_reverse_iterator<Container, Raw>& rhs ) noexcept { return lhs.get_raw_index() < rhs.get_raw_index(); }
654 template<typename Container, bool Raw>
655 static inline bool operator>=( const const_iterator<Container, true>& lhs , const const_iterator<Container, Raw>& rhs ) noexcept { return lhs.get_raw_index() >= rhs.get_raw_index(); }
656 template<typename Container, bool Raw>
657 static inline bool operator>=( const const_reverse_iterator<Container, true>& lhs , const const_reverse_iterator<Container, Raw>& rhs ) noexcept { return lhs.get_raw_index() <= rhs.get_raw_index(); }
658 template<typename Container, bool Raw>
659 static inline bool operator<( const const_iterator<Container, true>& lhs , const const_iterator<Container, Raw>& rhs ) noexcept { return lhs.get_raw_index() < rhs.get_raw_index(); }
660 template<typename Container, bool Raw>
661 static inline bool operator<( const const_reverse_iterator<Container, true>& lhs , const const_reverse_iterator<Container, Raw>& rhs ) noexcept { return lhs.get_raw_index() > rhs.get_raw_index(); }
662 template<typename Container, bool Raw>
663 static inline bool operator<=( const const_iterator<Container, true>& lhs , const const_iterator<Container, Raw>& rhs ) noexcept { return lhs.get_raw_index() <= rhs.get_raw_index(); }
664 template<typename Container, bool Raw>
665 static inline bool operator<=( const const_reverse_iterator<Container, true>& lhs , const const_reverse_iterator<Container, Raw>& rhs ) noexcept { return lhs.get_raw_index() >= rhs.get_raw_index(); }
666
670 // Non-raw iterators
671 template<typename Container>
673 return lhs.get_index() - rhs.get_index();
674 }
675 template<typename Container>
677 return rhs.get_index() - lhs.get_index();
678 }
679 // Raw Iterators
680 template<typename Container>
682 typename iterator<Container, true>::difference_type minIndex = std::min( lhs.get_raw_index() , rhs.get_raw_index() );
683 typename iterator<Container, true>::difference_type max_index = std::max( lhs.get_raw_index() , rhs.get_raw_index() );
684 typename iterator<Container, true>::difference_type num_codepoints = lhs.get_instance()->get_num_codepoints( minIndex , max_index - minIndex );
685 return max_index == lhs.get_raw_index() ? num_codepoints : -num_codepoints;
686 }
687 template<typename Container>
689 typename reverse_iterator<Container, true>::difference_type minIndex = std::min( lhs.get_raw_index() , rhs.get_raw_index() );
690 typename reverse_iterator<Container, true>::difference_type max_index = std::max( lhs.get_raw_index() , rhs.get_raw_index() );
691 typename reverse_iterator<Container, true>::difference_type num_codepoints = lhs.get_instance()->get_num_codepoints( minIndex , max_index - minIndex );
692 return max_index == rhs.get_raw_index() ? num_codepoints : -num_codepoints;
693 }
694
695
696 // Base class for basic_string
697 template<
698 typename ValueType
699 , typename DataType
700 , typename Allocator
701 >
702 class basic_string : private Allocator
703 {
704 public:
705
706 typedef DataType data_type;
707 typedef typename std::allocator_traits<Allocator>::size_type size_type;
708 typedef typename std::allocator_traits<Allocator>::difference_type difference_type;
709 typedef ValueType value_type;
715 typedef std::uint_fast8_t width_type; // Data type capable of holding the number of code units in a codepoint
724 typedef Allocator allocator_type;
725 typedef size_type indicator_type; // Typedef for the lut indicator. Note: Don't change this, because else the buffer will not be a multiple of sizeof(size_type)
726 enum : size_type{ npos = (size_type)-1 };
727
728 protected:
729
730 /*
731 * To determine, which layout is active, read either t_sso.data_len, or the last byte of t_non_sso.buffer_size:
732 * LSB == 0 => SSO
733 * LSB == 1 => NON-SSO
734 */
735
736 // Layout used, if sso is inactive
737 struct NON_SSO
738 {
739 data_type* data; // Points to [ <data::char>... | '0'::char | <index::rle>... | <lut_indicator::size_type> ]
740 size_type data_len; // In bytes, excluding the trailing '\0'
741 size_type buffer_size; // Indicates the size of '::data' minus 'lut_width'
742 size_type string_len; // Shadows data_len on the last byte
743 };
744
745 // Layout used, if sso is active
746 struct SSO
747 {
748 enum : size_type{ size = sizeof(NON_SSO)-1 };
750 unsigned char data_len; // This field holds ( size - num_characters ) << 1
751
752 SSO( data_type value ) noexcept :
753 data{ value , '\0' }
754 , data_len( (unsigned char)( size - 1u ) << 1 )
755 {}
756 SSO() noexcept :
757 data{ '\0' }
758 , data_len( (unsigned char)( size - 0 ) << 1 )
759 {}
760 };
761
762 protected:
763
764 union{
767 };
768
769 protected:
770
772 static constexpr inline size_type get_sso_capacity() noexcept { return SSO::size; }
773
775 template<size_type L>
776 using enable_if_small_string = typename std::enable_if<( L <= SSO::size ), bool>::type;
777 template<size_type L>
778 using enable_if_not_small_string = typename std::enable_if<( L > SSO::size ), bool>::type;
779
780 // Template to enable overloads, if the supplied type T is a character array without known bounds
781 template<typename T, typename CharType, typename _DataType = bool>
782 using enable_if_ptr = typename std::enable_if<
783 std::is_pointer<typename std::remove_reference<T>::type>::value
784 &&
785 std::is_same<
786 CharType
787 , typename std::remove_cv<
788 typename std::remove_pointer<
789 typename std::remove_reference<T>::type
790 >::type
791 >::type
792 >::value
793 , _DataType
794 >::type;
795
797 static inline bool is_lut_active( const data_type* lut_base_ptr ) noexcept { return *((const unsigned char*)lut_base_ptr) & 0x1; }
798
800 static inline size_type round_up_to_align( size_type val ) noexcept {
801 return ( val + sizeof(size_type) - 1 ) & ~( sizeof(size_type) - 1 );
802 }
803
805 static inline data_type* get_lut_base_ptr( data_type* buffer , size_type buffer_size ) noexcept { return buffer + buffer_size; }
806 static inline const data_type* get_lut_base_ptr( const data_type* buffer , size_type buffer_size ) noexcept { return buffer + buffer_size; }
807
809 static inline void set_lut_indiciator( data_type* lut_base_ptr , bool active , size_type lut_len = 0 ) noexcept {
810 *(indicator_type*)lut_base_ptr = active ? ( lut_len << 1 ) | 0x1 : 0;
811 }
813 static inline void copy_lut_indicator( data_type* dest , const data_type* source ) noexcept {
814 *(indicator_type*)dest = *(indicator_type*)source;
815 }
816
819 static inline width_type get_lut_width( size_type buffer_size ) noexcept {
820 return buffer_size <= (size_type)std::numeric_limits<std::uint8_t>::max() + 1
821 ? sizeof(std::uint8_t)
822 : buffer_size <= (size_type)std::numeric_limits<std::uint16_t>::max() + 1
823 ? sizeof(std::uint16_t)
824 : buffer_size <= (size_type)std::numeric_limits<std::uint32_t>::max() + 1
825 ? sizeof(std::uint32_t)
826 : sizeof(std::uint64_t)
827 ;
828 }
829
831 static inline bool is_lut_worth( size_type pot_lut_len , size_type string_len , bool lut_present , bool biased = true ) noexcept {
832 size_type threshold = biased ? ( lut_present ? string_len / 3u : string_len / 6u ) : string_len / 4u;
833 // Note pot_lut_len is supposed to underflow at '0'
834 return size_type( pot_lut_len - 1 ) < threshold;
835 }
836
838 static inline size_type determine_main_buffer_size( size_type data_len , size_type lut_len , width_type* lut_width ) noexcept {
839 size_type width_guess = get_lut_width( ++data_len ); // Don't forget, we need a terminating '\0', distinct from the lut indicator
840 data_len += lut_len * width_guess; // Add the estimated number of bytes from the lut
841 data_len += lut_len * ( ( *lut_width = get_lut_width( data_len ) ) - width_guess ); // Adjust the added bytes from the lut
842 return round_up_to_align( data_len ); // Make the buffer size_type-aligned
843 }
845 static inline size_type determine_main_buffer_size( size_type data_len , size_type lut_len , width_type lut_width ) noexcept {
846 return round_up_to_align( data_len + 1 + lut_len * lut_width ); // Compute the size_type-aligned buffer size
847 }
849 static inline size_type determine_main_buffer_size( size_type data_len ) noexcept {
850 return round_up_to_align( data_len + 1 ); // Make the buffer size_type-aligned
851 }
852
854 static inline size_type determine_total_buffer_size( size_type main_buffer_size ) noexcept {
855 return main_buffer_size + sizeof(indicator_type); // Add the lut indicator
856 }
857
859 static inline size_type get_lut( const data_type* iter , width_type lut_width ) noexcept {
860 switch( lut_width ){
861 case sizeof(std::uint8_t): return *(const std::uint8_t*)iter;
862 case sizeof(std::uint16_t): return *(const std::uint16_t*)iter;
863 case sizeof(std::uint32_t): return *(const std::uint32_t*)iter;
864 }
865 return (size_type)*(const std::uint64_t*)iter;
866 }
867 static inline void set_lut( data_type* iter , width_type lut_width , size_type value ) noexcept {
868 switch( lut_width ){
869 case sizeof(std::uint8_t): *(std::uint8_t*)iter = (std::uint8_t)value; break;
870 case sizeof(std::uint16_t): *(std::uint16_t*)iter = (std::uint16_t)value; break;
871 case sizeof(std::uint32_t): *(std::uint32_t*)iter = (std::uint32_t)value; break;
872 case sizeof(std::uint64_t): *(std::uint64_t*)iter = (std::uint64_t)value; break;
873 }
874 }
875
877 static inline size_type get_lut_len( const data_type* lut_base_ptr ) noexcept {
878 return *(indicator_type*)lut_base_ptr >> 1;
879 }
880
884 // Data left is the number of bytes left in the buffer INCLUDING this one
885 #if TINY_UTF8_HAS_CLZ
886 static inline width_type get_codepoint_bytes( data_type first_byte , size_type data_left ) noexcept
887 {
888 if( first_byte ){
889 // Before counting the leading one's we need to shift the byte into the most significant part of the integer
890 size_type codepoint_bytes = tiny_utf8_detail::clz( ~((unsigned int)first_byte << (sizeof(unsigned int)-1)*8 ) );
891
892 // The test below would actually be ( codepoint_bytes <= data_left && codepoint_bytes ),
893 // but codepoint_bytes is unsigned and thus wraps around zero, which makes the following faster:
894 if( size_type( codepoint_bytes - 1 ) < size_type(data_left) )
895 return (width_type)codepoint_bytes;
896 }
897 return 1;
898 }
899 #else
900 static width_type get_codepoint_bytes( data_type first_byte , size_type data_left ) noexcept ; // Defined in source file
901 #endif
902
906 static inline width_type get_codepoint_bytes( value_type cp ) noexcept
907 {
908 #if TINY_UTF8_HAS_CLZ
909 if( !cp )
910 return 1;
911 static const width_type lut[32] = {
912 1 , 1 , 1 , 1 , 1 , 1 , 1 , 2 , 2 , 2 , 2 , 3 , 3 , 3 , 3 , 3
913 , 4 , 4 , 4 , 4 , 4 , 5 , 5 , 5 , 5 , 5 , 6 , 6 , 6 , 6 , 6 , 7
914 };
915 return lut[ 31 - tiny_utf8_detail::clz( cp ) ];
916 #else
917 if( cp <= 0x7F )
918 return 1;
919 else if( cp <= 0x7FF )
920 return 2;
921 else if( cp <= 0xFFFF )
922 return 3;
923 else if( cp <= 0x1FFFFF )
924 return 4;
925 else if( cp <= 0x3FFFFFF )
926 return 5;
927 else if( cp <= 0x7FFFFFFF )
928 return 6;
929 else
930 return 7;
931 #endif
932 }
933
935 static width_type get_num_bytes_of_utf8_char_before( const data_type* data_start , size_type index ) noexcept ;
936
938 static inline value_type decode_utf8( const data_type* data , width_type num_bytes ) noexcept {
939 value_type cp = (unsigned char)*data;
940 if( num_bytes > 1 ){
941 cp &= 0x7F >> num_bytes; // Mask out the header bits
942 for( width_type i = 1 ; i < num_bytes ; i++ )
943 cp = ( cp << 6 ) | ( (unsigned char)data[i] & 0x3F );
944 }
945 return cp;
946 }
947
952 static inline width_type decode_utf8_and_len( const data_type* data , value_type& dest , size_type data_left ) noexcept {
953 // See 'get_codepoint_bytes' for 'data_left'
954 width_type num_bytes = basic_string::get_codepoint_bytes( *data , data_left );
955 dest = decode_utf8( data , num_bytes );
956 return num_bytes;
957 }
958
963 inline static void encode_utf8( value_type cp , data_type* dest , width_type cp_bytes ) noexcept {
964 switch( cp_bytes ){
965 case 7: dest[cp_bytes-6] = 0x80 | ((cp >> 30) & 0x3F); TINY_UTF8_FALLTHROUGH
966 case 6: dest[cp_bytes-5] = 0x80 | ((cp >> 24) & 0x3F); TINY_UTF8_FALLTHROUGH
967 case 5: dest[cp_bytes-4] = 0x80 | ((cp >> 18) & 0x3F); TINY_UTF8_FALLTHROUGH
968 case 4: dest[cp_bytes-3] = 0x80 | ((cp >> 12) & 0x3F); TINY_UTF8_FALLTHROUGH
969 case 3: dest[cp_bytes-2] = 0x80 | ((cp >> 6) & 0x3F); TINY_UTF8_FALLTHROUGH
970 case 2: dest[cp_bytes-1] = 0x80 | ((cp >> 0) & 0x3F);
971 dest[0] = (unsigned char)( ( std::uint_least16_t(0xFF00uL) >> cp_bytes ) | ( cp >> ( 6 * cp_bytes - 6 ) ) );
972 break;
973 case 1:
974 dest[0] = (unsigned char)cp;
975 break;
976 }
977 }
978
983 inline static width_type encode_utf8( value_type cp , data_type* dest ) noexcept {
984 width_type width = get_codepoint_bytes( cp );
985 basic_string::encode_utf8( cp , dest , width );
986 return width;
987 }
988
989 protected:
990
992 inline void set_non_sso_string_len( size_type string_len ) noexcept
993 {
994 // Check, if NON_SSO is larger than its members, in which case it's not ambiguated by SSO::data_len
995 if TINY_UTF8_CPP17(constexpr) ( offsetof(SSO, data_len) > offsetof(NON_SSO, string_len) + sizeof(NON_SSO::string_len) - 1 ){
996 t_non_sso.string_len = string_len;
997 t_sso.data_len = 0x1; // Manually set flag to deactivate SSO
998 }
1001 lb.number = string_len;
1002 lb.bytes.last <<= 1;
1003 lb.bytes.last |= 0x1;
1005 }
1006 else
1007 t_non_sso.string_len = ( string_len << 1 ) | size_type(0x1);
1008 }
1009
1011 inline size_type get_non_sso_string_len() const noexcept {
1012 // Check, if NON_SSO is larger than its members, in which case it's not ambiguated by SSO::data_len
1013 if( offsetof(SSO, data_len) > offsetof(NON_SSO, string_len) + sizeof(NON_SSO::string_len) - 1 )
1014 return t_non_sso.string_len;
1018 lb.bytes.last >>= 1;
1019 return lb.number;
1020 }
1021 else
1022 return t_non_sso.string_len >> 1;
1023 }
1024
1026 inline void set_sso_data_len( unsigned char data_len = 0 ) noexcept {
1027 t_sso.data_len = (unsigned char)( SSO::size - data_len ) << 1;
1028 }
1029
1031 inline size_type get_sso_data_len() const noexcept { return get_sso_capacity() - ( t_sso.data_len >> 1 ); }
1032
1034 size_type get_non_sso_capacity() const noexcept ;
1035
1037 inline bool sso_inactive() const noexcept { return t_sso.data_len & 0x1; }
1038
1039 // Helper for requires_unicode_sso that generates masks of the form 10000000 10000000...
1040 template<typename T>
1041 static constexpr T get_msb_mask( width_type bytes = sizeof(T) ) noexcept { return bytes ? ( T(1) << ( 8 * bytes - 1 ) ) | get_msb_mask<T>( bytes - 1 ) : T(0); }
1042
1044 bool requires_unicode_sso() const noexcept ;
1045
1047 inline const data_type* get_buffer() const noexcept { return sso_inactive() ? t_non_sso.data : t_sso.data; }
1048 inline data_type* get_buffer() noexcept { return sso_inactive() ? t_non_sso.data : t_sso.data; }
1049
1051 inline size_type get_buffer_size() const noexcept {
1053 }
1054
1056 std::basic_string<data_type> cpp_str_bom() const noexcept ;
1057
1059 inline data_type* allocate( size_type total_buffer_size ) const noexcept {
1060 using appropriate_allocator = typename std::allocator_traits<Allocator>::template rebind_alloc<size_type>;
1061 appropriate_allocator casted_allocator = (const Allocator&)*this;
1062 return reinterpret_cast<data_type*>(
1063 std::allocator_traits<appropriate_allocator>::allocate(
1064 casted_allocator
1065 , total_buffer_size / sizeof(size_type) * sizeof(data_type)
1066 )
1067 );
1068 }
1069
1071 inline void deallocate( data_type* buffer , size_type buffer_size ) const noexcept {
1072 using appropriate_allocator = typename std::allocator_traits<Allocator>::template rebind_alloc<size_type>;
1073 appropriate_allocator casted_allocator = (const Allocator&)*this;
1074 std::allocator_traits<appropriate_allocator>::deallocate(
1075 casted_allocator
1076 , reinterpret_cast<size_type*>( buffer )
1077 , basic_string::determine_total_buffer_size( buffer_size ) / sizeof(size_type) * sizeof(data_type)
1078 );
1079 }
1080
1084
1085 public:
1086
1093 noexcept(TINY_UTF8_NOEXCEPT && std::is_nothrow_default_constructible<Allocator>())
1094 : Allocator()
1095 , t_sso()
1096 {}
1102 explicit basic_string( const allocator_type& alloc )
1103 noexcept(TINY_UTF8_NOEXCEPT && std::is_nothrow_copy_constructible<Allocator>())
1104 : Allocator( alloc )
1105 , t_sso()
1106 {}
1116 template<typename T>
1117 inline basic_string( T&& str , const allocator_type& alloc = allocator_type() , enable_if_ptr<T, data_type>* = {} )
1118 noexcept(TINY_UTF8_NOEXCEPT)
1119 : basic_string( str , 0 , basic_string::npos , basic_string::npos , alloc , tiny_utf8_detail::read_codepoints_tag() )
1120 {}
1129 inline basic_string( const data_type* str , size_type len , const allocator_type& alloc = allocator_type() )
1130 noexcept(TINY_UTF8_NOEXCEPT)
1131 : basic_string( str , 0 , len , basic_string::npos , alloc , tiny_utf8_detail::read_codepoints_tag() )
1132 {}
1142 inline basic_string( const data_type* str , size_type pos , size_type len , const allocator_type& alloc = allocator_type() )
1143 noexcept(TINY_UTF8_NOEXCEPT)
1144 : basic_string( str , pos , len , basic_string::npos , alloc , tiny_utf8_detail::read_codepoints_tag() )
1145 {}
1155 template<size_type LITLEN>
1156 inline basic_string( const data_type (&str)[LITLEN] , const allocator_type& alloc = allocator_type() , enable_if_small_string<LITLEN> = {} )
1157 noexcept(TINY_UTF8_NOEXCEPT && std::is_nothrow_copy_constructible<Allocator>())
1158 : Allocator( alloc )
1159 {
1160 std::memcpy( t_sso.data , str , LITLEN );
1161 if( str[LITLEN-1] ){
1162 t_sso.data[LITLEN] = '\0';
1163 set_sso_data_len( LITLEN );
1164 }
1165 else
1166 set_sso_data_len( LITLEN - 1 );
1167 }
1168 template<size_type LITLEN>
1169 inline basic_string( const data_type (&str)[LITLEN] , const allocator_type& alloc = allocator_type() , enable_if_not_small_string<LITLEN> = {} )
1170 noexcept(TINY_UTF8_NOEXCEPT)
1171 : basic_string( str , LITLEN - ( str[LITLEN-1] ? 0 : 1 ) , alloc , tiny_utf8_detail::read_bytes_tag() )
1172 {}
1181 template<size_type LITLEN>
1182 inline basic_string( const data_type (&str)[LITLEN] , size_type len , const allocator_type& alloc = allocator_type() )
1183 noexcept(TINY_UTF8_NOEXCEPT)
1184 : basic_string( str , 0 , len , LITLEN - ( str[LITLEN-1] ? 0 : 1 ) , alloc , tiny_utf8_detail::read_codepoints_tag() )
1185 {}
1195 template<size_type LITLEN>
1196 inline basic_string( const data_type (&str)[LITLEN] , size_type pos , size_type len , const allocator_type& alloc = allocator_type() )
1197 noexcept(TINY_UTF8_NOEXCEPT)
1198 : basic_string( str , pos , len , LITLEN - ( str[LITLEN-1] ? 0 : 1 ) , alloc , tiny_utf8_detail::read_codepoints_tag() )
1199 {}
1207 template<typename C, typename A>
1208 inline basic_string( std::basic_string<data_type, C, A> str , const allocator_type& alloc = allocator_type() )
1209 noexcept(TINY_UTF8_NOEXCEPT)
1210 : basic_string( str.data() , str.size() , alloc , tiny_utf8_detail::read_bytes_tag() )
1211 {}
1220 template<typename C, typename A>
1221 inline basic_string( std::basic_string<data_type, C, A> str , size_type len , const allocator_type& alloc = allocator_type() )
1222 noexcept(TINY_UTF8_NOEXCEPT)
1223 : basic_string( str.data() , 0 , len , str.size() , alloc , tiny_utf8_detail::read_codepoints_tag() )
1224 {}
1225 template<typename C, typename A>
1226 inline basic_string( std::basic_string<data_type, C, A> str , size_type pos , size_type len , const allocator_type& alloc = allocator_type() )
1227 noexcept(TINY_UTF8_NOEXCEPT)
1228 : basic_string( str.data() , pos , len , str.size() , alloc , tiny_utf8_detail::read_codepoints_tag() )
1229 {}
1257 noexcept(TINY_UTF8_NOEXCEPT)
1258 : basic_string( str.substr( pos , count ) , alloc )
1259 {}
1268 template<typename InputIt>
1269 basic_string( InputIt first , InputIt last , const allocator_type& alloc = allocator_type() )
1270 noexcept(TINY_UTF8_NOEXCEPT)
1271 : Allocator( alloc )
1272 , t_sso()
1273 {
1274 while( first != last ) push_back( *first++ );
1275 }
1283 noexcept(TINY_UTF8_NOEXCEPT)
1284 : Allocator( (const allocator_type&)str )
1285 {
1286 std::memcpy( (void*)&this->t_sso , (void*)&str.t_sso , sizeof(SSO) ); // Copy data
1287
1288 // Create a new buffer, if sso is not active
1289 if( str.sso_inactive() ){
1291 t_non_sso.data = this->allocate( total_buffer_size );
1292 std::memcpy( t_non_sso.data , str.t_non_sso.data , total_buffer_size );
1293 }
1294 }
1302 basic_string( const basic_string& str , const allocator_type& alloc )
1303 noexcept(TINY_UTF8_NOEXCEPT)
1304 : Allocator( alloc )
1305 {
1306 std::memcpy( (void*)&this->t_sso , (void*)&str.t_sso , sizeof(SSO) ); // Copy data
1307
1308 // Create a new buffer, if sso is not active
1309 if( str.sso_inactive() ){
1311 t_non_sso.data = this->allocate( total_buffer_size );
1312 std::memcpy( t_non_sso.data , str.t_non_sso.data , total_buffer_size );
1313 }
1314 }
1323 basic_string( const value_type* str , size_type len , const allocator_type& alloc = allocator_type() ) noexcept(TINY_UTF8_NOEXCEPT) ;
1324 template<typename T>
1325 basic_string( T&& str , const allocator_type& alloc = allocator_type() , enable_if_ptr<T, value_type>* = {} )
1326 noexcept(TINY_UTF8_NOEXCEPT)
1327 : basic_string( str , basic_string::npos , alloc )
1328 {}
1329 template<size_type LITLEN>
1330 inline basic_string( const value_type (&str)[LITLEN] , const allocator_type& alloc = allocator_type() )
1331 noexcept(TINY_UTF8_NOEXCEPT)
1332 : basic_string( str , LITLEN - ( str[LITLEN-1] ? 0 : 1 ) , alloc )
1333 {}
1340 inline basic_string( std::initializer_list<value_type> ilist , const allocator_type& alloc = allocator_type() )
1341 noexcept(TINY_UTF8_NOEXCEPT)
1342 : basic_string( ilist.begin() , ilist.end() , alloc )
1343 {}
1350 explicit inline basic_string( value_type cp , const allocator_type& alloc = allocator_type() )
1351 noexcept(TINY_UTF8_NOEXCEPT)
1352 : Allocator( alloc )
1353 {
1354 t_sso.data[ cp = encode_utf8( cp , t_sso.data ) ] = '\0';
1355 set_sso_data_len( cp );
1356 }
1363 explicit inline basic_string( data_type ch , const allocator_type& alloc = allocator_type() )
1364 noexcept(TINY_UTF8_NOEXCEPT)
1365 : Allocator( alloc )
1366 , t_sso( ch )
1367 {}
1376 noexcept(TINY_UTF8_NOEXCEPT && std::is_nothrow_move_constructible<Allocator>())
1377 : Allocator( (allocator_type&&)str )
1378 {
1379 std::memcpy( (void*)&this->t_sso , (void*)&str.t_sso , sizeof(SSO) ); // Copy data
1380 str.set_sso_data_len( 0u ); // Reset old string and enable its SSO-mode (which makes it not care about the buffer anymore)
1381 }
1389 inline basic_string( basic_string&& str , const allocator_type& alloc )
1390 noexcept(TINY_UTF8_NOEXCEPT && std::is_nothrow_copy_constructible<Allocator>())
1391 : Allocator( alloc )
1392 {
1393 std::memcpy( (void*)&this->t_sso , (void*)&str.t_sso , sizeof(SSO) ); // Copy data
1394 str.set_sso_data_len( 0u ); // Reset old string and enable its SSO-mode (which makes it not care about the buffer anymore)
1395 }
1396
1397
1403 inline ~basic_string() noexcept { clear(); }
1404
1405
1422 inline basic_string& operator=( basic_string&& str ) noexcept(TINY_UTF8_NOEXCEPT && std::is_nothrow_move_assignable<Allocator>()) {
1423 if( &str != this ){
1424 clear(); // Reset old data
1425 (allocator_type&)*this = (allocator_type&&)str; // Move allocator
1426 std::memcpy( (void*)&this->t_sso , (void*)&str.t_sso , sizeof(SSO) ); // Copy data
1427 str.set_sso_data_len(0); // Reset old string and enable its SSO-mode (which makes it not care about the buffer anymore)
1428 }
1429 return *this;
1430 }
1431
1432
1438 inline void clear() noexcept {
1439 if( sso_inactive() )
1441 set_sso_data_len( 0 );
1442 t_sso.data[0] = 0;
1443 }
1444
1445
1451 allocator_type get_allocator() const noexcept(TINY_UTF8_NOEXCEPT && std::is_nothrow_copy_constructible<Allocator>()) { return (const allocator_type&)*this; }
1452
1453
1457 void shrink_to_fit() noexcept(TINY_UTF8_NOEXCEPT) ;
1458
1459
1466 inline void swap( basic_string& str ) noexcept(TINY_UTF8_NOEXCEPT && std::is_nothrow_move_assignable<Allocator>()) {
1467 if( &str != this ){
1468 data_type tmp[sizeof(SSO)];
1469 std::memcpy( &tmp , (void*)&str.t_sso , sizeof(SSO) );
1470 std::memcpy( (void*)&str , (void*)&this->t_sso , sizeof(SSO) );
1471 std::memcpy( (void*)&this->t_sso , &tmp , sizeof(SSO) );
1472 std::swap( (allocator_type&)*this , (allocator_type&)str ); // Swap Allocators
1473 }
1474 }
1475
1476
1482 inline size_type capacity() const noexcept {
1484 }
1485
1486
1493 inline value_type at( size_type n ) const noexcept(TINY_UTF8_NOEXCEPT) {
1494 return raw_at( get_num_bytes_from_start( n ) );
1495 }
1496 inline value_type at( size_type n , std::nothrow_t ) const noexcept {
1497 return raw_at( get_num_bytes_from_start( n ) , std::nothrow );
1498 }
1499 inline checked_reference at( size_type n ) noexcept(TINY_UTF8_NOEXCEPT) { return { n , this }; }
1500 inline reference at( size_type n , std::nothrow_t ) noexcept { return { n , this }; }
1509 inline raw_checked_reference raw_at( size_type byte_index ) noexcept(TINY_UTF8_NOEXCEPT) { return { byte_index , this }; }
1510 inline raw_reference raw_at( size_type byte_index , std::nothrow_t ) noexcept { return { byte_index , this }; }
1511 value_type raw_at( size_type byte_index ) const noexcept(TINY_UTF8_NOEXCEPT) {
1512 size_type size = this->size();
1513 if( byte_index >= size ){
1514 TINY_UTF8_THROW( "tiny_utf8::basic_string::(raw_)at" , byte_index >= size );
1515 return 0;
1516 }
1517 const data_type* pos = get_buffer() + byte_index;
1518 return *pos ? decode_utf8( pos , basic_string::get_codepoint_bytes( *pos , size - byte_index ) ) : 0;
1519 }
1520 value_type raw_at( size_type byte_index , std::nothrow_t ) const noexcept {
1521 const data_type* pos = get_buffer() + byte_index;
1522 return *pos ? decode_utf8( pos , basic_string::get_codepoint_bytes( *pos , size() - byte_index ) ) : 0;
1523 }
1524
1525
1532 inline iterator get( size_type n ) noexcept { return { (difference_type)n , this }; }
1533 inline const_iterator get( size_type n ) const noexcept { return { (difference_type)n , this }; }
1542 inline raw_iterator raw_get( size_type n ) noexcept { return { (difference_type)n , this }; }
1543 inline raw_const_iterator raw_get( size_type n ) const noexcept { return { (difference_type)n , this }; }
1544
1545
1552 inline reverse_iterator rget( size_type n ) noexcept { return { (difference_type)n , this }; }
1553 inline const_reverse_iterator rget( size_type n ) const noexcept { return { (difference_type)n , this }; }
1562 inline raw_reverse_iterator raw_rget( size_type n ) noexcept { return { (difference_type)n , this }; }
1563 inline raw_const_reverse_iterator raw_rget( size_type n ) const noexcept { return { (difference_type)n , this }; }
1564
1565
1572 inline reference operator[]( size_type n ) noexcept { return { n , this }; }
1573 inline value_type operator[]( size_type n ) const noexcept { return at( n , std::nothrow ); }
1582 inline raw_reference operator()( size_type n ) noexcept { return { n , this }; }
1583 inline value_type operator()( size_type n ) const noexcept { return raw_at( n , std::nothrow ); }
1584
1585
1592 inline const data_type* c_str() const noexcept { return get_buffer(); }
1593 inline const data_type* data() const noexcept { return get_buffer(); }
1594 inline data_type* data() noexcept { return get_buffer(); }
1595
1596
1604 inline size_type length() const noexcept { return sso_inactive() ? get_non_sso_string_len() : get_num_codepoints( 0 , get_sso_data_len() ); }
1605
1606
1614 inline size_type size() const noexcept { return sso_inactive() ? t_non_sso.data_len : get_sso_data_len(); }
1615
1616
1623 inline bool empty() const noexcept { return sso_inactive() ? !t_non_sso.data_len : t_sso.data_len == (get_sso_capacity() << 1); }
1624
1625
1626
1627
1628
1634 inline iterator begin() noexcept { return { 0 , this }; }
1635 inline const_iterator begin() const noexcept { return { 0 , this }; }
1636 inline raw_iterator raw_begin() noexcept { return { 0 , this }; }
1637 inline raw_const_iterator raw_begin() const noexcept { return { 0 , this }; }
1643 inline iterator end() noexcept { return { (difference_type)length() , this }; }
1644 inline const_iterator end() const noexcept { return { (difference_type)length() , this }; }
1645 inline raw_iterator raw_end() noexcept { return { (difference_type)size() , this }; }
1646 inline raw_const_iterator raw_end() const noexcept { return { (difference_type)size() , this }; }
1647
1654 inline reverse_iterator rbegin() noexcept { return { (difference_type)length() - 1 , this }; }
1655 inline const_reverse_iterator rbegin() const noexcept { return { (difference_type)length() - 1 , this }; }
1656 inline raw_reverse_iterator raw_rbegin() noexcept { return { (difference_type)raw_back_index() , this }; }
1657 inline raw_const_reverse_iterator raw_rbegin() const noexcept { return { (difference_type)raw_back_index() , this }; }
1664 inline reverse_iterator rend() noexcept { return { -1 , this }; }
1665 inline const_reverse_iterator rend() const noexcept { return { -1 , this }; }
1666 inline raw_reverse_iterator raw_rend() noexcept { return { -1 , this }; }
1667 inline raw_const_reverse_iterator raw_rend() const noexcept { return { -1 , this }; }
1668
1669
1676 inline const_iterator cbegin() const noexcept { return { 0 , this }; }
1677 inline raw_const_iterator raw_cbegin() const noexcept { return { 0 , this }; }
1684 inline const_iterator cend() const noexcept { return { (difference_type)length() , this }; }
1685 inline raw_const_iterator raw_cend() const noexcept { return { (difference_type)size() , this }; }
1686
1687
1694 inline const_reverse_iterator crbegin() const noexcept { return { (difference_type)length() - 1 , this }; }
1695 inline raw_const_reverse_iterator raw_crbegin() const noexcept { return { (difference_type)raw_back_index() , this }; }
1702 inline const_reverse_iterator crend() const noexcept { return { -1 , this }; }
1703 inline raw_const_reverse_iterator raw_crend() const noexcept { return { -1 , this }; }
1704
1705
1711 inline raw_reference front() noexcept { return { 0 , this }; }
1712 inline value_type front() const noexcept { return raw_at( 0 , std::nothrow ); }
1718 inline raw_reference back() noexcept { return { raw_back_index() , this }; }
1719 inline value_type back() const noexcept {
1720 size_type my_size = size();
1721 const data_type* buffer = get_buffer();
1722 width_type bytes = get_num_bytes_of_utf8_char_before( buffer , my_size );
1723 return decode_utf8( buffer + my_size - bytes , bytes );
1724 }
1725
1726
1736 inline basic_string& replace( size_type index , value_type repl , size_type n = 1 ) noexcept(TINY_UTF8_NOEXCEPT) {
1737 return replace( index , 1 , repl , n );
1738 }
1748 inline basic_string& replace( size_type index , size_type len , value_type repl , size_type n ) noexcept(TINY_UTF8_NOEXCEPT) {
1749 return replace( index , len , basic_string( n , repl ) );
1750 }
1751 inline basic_string& replace( size_type index , size_type len , value_type repl ) noexcept(TINY_UTF8_NOEXCEPT) {
1752 return replace( index , len , basic_string( repl ) );
1753 }
1764 return raw_replace( first.get_raw_index() , last.get_raw_index() - first.get_raw_index() , basic_string( n , repl ) );
1765 }
1767 return raw_replace( first.get_raw_index() , last.get_raw_index() - first.get_raw_index() , basic_string( repl ) );
1768 }
1769 inline basic_string& replace( raw_iterator first , iterator last , value_type repl , size_type n ) noexcept(TINY_UTF8_NOEXCEPT) { return replace( first , (raw_iterator)last , repl , n ); }
1770 inline basic_string& replace( iterator first , raw_iterator last , value_type repl , size_type n ) noexcept(TINY_UTF8_NOEXCEPT) { return replace( (raw_iterator)first , last , repl , n ); }
1771 inline basic_string& replace( iterator first , iterator last , value_type repl , size_type n ) noexcept(TINY_UTF8_NOEXCEPT) { return replace( (raw_iterator)first , (raw_iterator)last , repl , n ); }
1772 inline basic_string& replace( raw_iterator first , iterator last , value_type repl ) noexcept(TINY_UTF8_NOEXCEPT) { return replace( first , (raw_iterator)last , repl ); }
1773 inline basic_string& replace( iterator first , raw_iterator last , value_type repl ) noexcept(TINY_UTF8_NOEXCEPT) { return replace( (raw_iterator)first , last , repl ); }
1774 inline basic_string& replace( iterator first , iterator last , value_type repl ) noexcept(TINY_UTF8_NOEXCEPT) { return replace( (raw_iterator)first , (raw_iterator)last , repl ); }
1783 inline basic_string& replace( raw_iterator first , raw_iterator last , const basic_string& repl ) noexcept(TINY_UTF8_NOEXCEPT) {
1784 return raw_replace( first.get_raw_index() , last.get_raw_index() - first.get_raw_index() , repl );
1785 }
1786 inline basic_string& replace( raw_iterator first , iterator last , const basic_string& repl ) noexcept(TINY_UTF8_NOEXCEPT) { return replace( first , (raw_iterator)last , repl ); }
1787 inline basic_string& replace( iterator first , raw_iterator last , const basic_string& repl ) noexcept(TINY_UTF8_NOEXCEPT) { return replace( (raw_iterator)first , last , repl ); }
1788 inline basic_string& replace( iterator first , iterator last , const basic_string& repl ) noexcept(TINY_UTF8_NOEXCEPT) { return replace( (raw_iterator)first , (raw_iterator)last , repl ); }
1797 inline basic_string& replace( size_type index , size_type count , const basic_string& repl ) noexcept(TINY_UTF8_NOEXCEPT) {
1798 size_type start_byte = get_num_bytes_from_start( index );
1799 return raw_replace(
1800 start_byte
1801 , count == basic_string::npos ? basic_string::npos : get_num_bytes( start_byte , count )
1802 , repl
1803 );
1804 }
1815 basic_string& raw_replace( size_type start_byte , size_type byte_count , const basic_string& repl ) noexcept(TINY_UTF8_NOEXCEPT) ;
1816
1817
1824 inline basic_string& prepend( const basic_string& prependix ) noexcept(TINY_UTF8_NOEXCEPT) { return raw_insert( 0 , prependix ); }
1825
1832 basic_string& append( const basic_string& appendix ) noexcept(TINY_UTF8_NOEXCEPT) ;
1833 inline basic_string& operator+=( const basic_string& appendix ) noexcept(TINY_UTF8_NOEXCEPT) { return append( appendix ); }
1834
1835
1842 inline basic_string& push_back( value_type cp ) noexcept(TINY_UTF8_NOEXCEPT) { return append( basic_string( cp ) ); }
1843 inline basic_string& operator+=( value_type cp ) noexcept(TINY_UTF8_NOEXCEPT) { return append( basic_string( cp ) ); }
1844
1845
1852 // with basic_string in both operands
1853 inline basic_string operator+( basic_string summand ) const & noexcept(TINY_UTF8_NOEXCEPT) { summand.prepend( *this ); return summand; }
1854 inline basic_string operator+( const basic_string& summand ) && noexcept(TINY_UTF8_NOEXCEPT) { append( summand ); return static_cast<basic_string&&>( *this ); }
1855
1856 // with basic_string as first operand
1857 friend inline basic_string operator+( basic_string lhs , data_type rhs ) noexcept(TINY_UTF8_NOEXCEPT) { lhs.push_back( rhs ); return lhs; }
1858 friend inline basic_string operator+( basic_string lhs , value_type rhs ) noexcept(TINY_UTF8_NOEXCEPT) { lhs.push_back( rhs ); return lhs; }
1859 template<typename T> friend inline enable_if_ptr<T, data_type, basic_string> operator+( basic_string lhs , T&& rhs ) noexcept(TINY_UTF8_NOEXCEPT) { lhs.append( basic_string( rhs ) ); return lhs; }
1860 template<typename T> friend inline enable_if_ptr<T, value_type, basic_string> operator+( basic_string lhs , T&& rhs ) noexcept(TINY_UTF8_NOEXCEPT) { lhs.append( basic_string( rhs ) ); return lhs; }
1861 template<size_type LITLEN> friend inline basic_string operator+( basic_string lhs , const data_type (&rhs)[LITLEN] ) noexcept(TINY_UTF8_NOEXCEPT) { lhs.append( basic_string( rhs ) ); return lhs; }
1862 template<size_type LITLEN> friend inline basic_string operator+( basic_string lhs , const value_type (&rhs)[LITLEN] ) noexcept(TINY_UTF8_NOEXCEPT) { lhs.append( basic_string( rhs ) ); return lhs; }
1863
1864 // With basic_string as second operand
1865 friend inline basic_string operator+( data_type lhs , basic_string rhs ) noexcept(TINY_UTF8_NOEXCEPT) { rhs.raw_insert( 0 , lhs ); return rhs; }
1866 friend inline basic_string operator+( value_type lhs , basic_string rhs ) noexcept(TINY_UTF8_NOEXCEPT) { rhs.raw_insert( 0 , lhs ); return rhs; }
1867 template<typename T> friend inline enable_if_ptr<T, data_type, basic_string> operator+( T&& lhs , basic_string rhs ) noexcept(TINY_UTF8_NOEXCEPT) { rhs.prepend( basic_string( lhs ) ); return rhs; }
1868 template<typename T> friend inline enable_if_ptr<T, value_type, basic_string> operator+( T&& lhs , basic_string rhs ) noexcept(TINY_UTF8_NOEXCEPT) { rhs.prepend( basic_string( lhs ) ); return rhs; }
1869 template<size_type LITLEN> friend inline basic_string operator+( const data_type (&lhs)[LITLEN] , basic_string rhs ) noexcept(TINY_UTF8_NOEXCEPT) { rhs.prepend( basic_string( lhs ) ); return rhs; }
1870 template<size_type LITLEN> friend inline basic_string operator+( const value_type (&lhs)[LITLEN] , basic_string rhs ) noexcept(TINY_UTF8_NOEXCEPT) { rhs.prepend( basic_string( lhs ) ); return rhs; }
1871
1872
1881 return *this = basic_string( count , cp );
1882 }
1889 inline basic_string& assign( const basic_string& str ) noexcept(TINY_UTF8_NOEXCEPT) {
1890 return *this = str;
1891 }
1900 inline basic_string& assign( const basic_string& str , size_type pos , size_type count ) noexcept(TINY_UTF8_NOEXCEPT) {
1901 return *this = basic_string( str , pos , count );
1902 }
1909 inline basic_string& assign( basic_string&& str ) noexcept(TINY_UTF8_NOEXCEPT && std::is_nothrow_move_assignable<Allocator>()) {
1910 return *this = std::move(str);
1911 }
1918 template<typename T>
1920 return *this = basic_string( str );
1921 }
1922 inline basic_string& assign( const data_type* str , size_type len ) noexcept(TINY_UTF8_NOEXCEPT) {
1923 return *this = basic_string( str , len );
1924 }
1930 template<size_type LITLEN>
1931 inline basic_string& assign( const data_type (&str)[LITLEN] ) noexcept(TINY_UTF8_NOEXCEPT) {
1932 return *this = basic_string( str );
1933 }
1940 template<typename T>
1942 return *this = basic_string( str );
1943 }
1944 inline basic_string& assign( const value_type* str , size_type len ) noexcept(TINY_UTF8_NOEXCEPT) {
1945 return *this = basic_string( str , len );
1946 }
1952 template<size_type LITLEN>
1953 inline basic_string& assign( const value_type (&str)[LITLEN] ) noexcept(TINY_UTF8_NOEXCEPT) {
1954 return *this = basic_string( str );
1955 }
1963 template<typename InputIt>
1964 inline basic_string& assign( InputIt first , InputIt last ) noexcept(TINY_UTF8_NOEXCEPT) {
1965 return *this = basic_string( first , last );
1966 }
1973 inline basic_string& assign( std::initializer_list<value_type> ilist ) noexcept(TINY_UTF8_NOEXCEPT) {
1974 return *this = basic_string( std::move(ilist) );
1975 }
1976
1977
1986 return raw_insert( get_num_bytes_from_start( pos ) , cp );
1987 }
1995 inline basic_string& insert( size_type pos , const basic_string& str ) noexcept(TINY_UTF8_NOEXCEPT) {
1996 return raw_insert( get_num_bytes_from_start( pos ) , str );
1997 }
2006 return raw_insert( it.get_raw_index() , basic_string( cp ) );
2007 }
2015 inline basic_string& insert( raw_iterator it , const basic_string& str ) noexcept(TINY_UTF8_NOEXCEPT) {
2016 return raw_insert( it.get_raw_index() , str );
2017 }
2038 return raw_insert( pos , basic_string( cp ) );
2039 }
2040
2041
2044 size_type pos = raw_back_index();
2045 return raw_erase( pos , get_index_bytes( pos ) );
2046 }
2047
2048
2056 return raw_erase( pos.get_raw_index() , get_index_bytes( pos.get_raw_index() ) );
2057 }
2066 return raw_erase( first.get_raw_index() , last.get_raw_index() - first.get_raw_index() );
2067 }
2068 inline basic_string& erase( raw_iterator first , iterator last ) noexcept(TINY_UTF8_NOEXCEPT) { return erase( first , (raw_iterator)last ); }
2069 inline basic_string& erase( iterator first , raw_iterator last ) noexcept(TINY_UTF8_NOEXCEPT) { return erase( (raw_iterator)first , last ); }
2070 inline basic_string& erase( iterator first , iterator last ) noexcept(TINY_UTF8_NOEXCEPT) { return erase( (raw_iterator)first , (raw_iterator)last ); }
2078 inline basic_string& erase( size_type pos , size_type len = 1 ) noexcept(TINY_UTF8_NOEXCEPT) {
2079 size_type start_byte = get_num_bytes_from_start( pos );
2080 return raw_erase( start_byte , get_num_bytes( start_byte , len ) );
2081 }
2092
2093
2101 inline basic_string substr( raw_iterator first , raw_iterator last ) const noexcept(TINY_UTF8_NOEXCEPT) {
2102 size_type byte_count = last.get_raw_index() - first.get_raw_index();
2103 return raw_substr( first.get_raw_index() , byte_count );
2104 }
2105 inline basic_string substr( raw_iterator first , iterator last ) noexcept(TINY_UTF8_NOEXCEPT) { return substr( first , (raw_iterator)last ); }
2106 inline basic_string substr( iterator first , raw_iterator last ) noexcept(TINY_UTF8_NOEXCEPT) { return substr( (raw_iterator)first , last ); }
2107 inline basic_string substr( iterator first , iterator last ) noexcept(TINY_UTF8_NOEXCEPT) { return substr( (raw_iterator)first , (raw_iterator)last ); }
2116 size_type byte_start = get_num_bytes_from_start( pos );
2117 if( len == basic_string::npos )
2118 return raw_substr( byte_start , basic_string::npos );
2119 size_type byte_count = get_num_bytes( byte_start , len );
2120 return raw_substr( byte_start , byte_count );
2121 }
2131 basic_string raw_substr( size_type start_byte , size_type byte_count ) const noexcept(TINY_UTF8_NOEXCEPT) ;
2132
2133
2141 size_type find( value_type cp , size_type start_codepoint = 0 ) const noexcept {
2142 if( sso_inactive() && start_codepoint >= length() ) // length() is only O(1), if sso is inactive
2143 return basic_string::npos;
2144 for( const_iterator it = get(start_codepoint) , end = cend() ; it != end ; ++it, ++start_codepoint )
2145 if( *it == cp )
2146 return start_codepoint;
2147 return basic_string::npos;
2148 }
2156 size_type find( const basic_string& pattern , size_type start_codepoint = 0 ) const noexcept {
2157 if( sso_inactive() && start_codepoint >= length() ) // length() is only O(1), if sso is inactive
2158 return basic_string::npos;
2159 size_type actual_start = get_num_bytes_from_start( start_codepoint );
2160 const data_type* buffer = get_buffer();
2161 const data_type* result = std::strstr( buffer + actual_start , pattern.data() );
2162 if( !result )
2163 return basic_string::npos;
2164 return start_codepoint + get_num_codepoints( actual_start , result - ( buffer + actual_start ) );
2165 }
2173 size_type find( const data_type* pattern , size_type start_codepoint = 0 ) const noexcept {
2174 if( sso_inactive() && start_codepoint >= length() ) // length() is only O(1), if sso is inactive
2175 return basic_string::npos;
2176 size_type actual_start = get_num_bytes_from_start( start_codepoint );
2177 const data_type* buffer = get_buffer();
2178 const data_type* result = std::strstr( buffer + actual_start , pattern );
2179 if( !result )
2180 return basic_string::npos;
2181 return start_codepoint + get_num_codepoints( actual_start , result - ( buffer + actual_start ) );
2182 }
2190 size_type raw_find( value_type cp , size_type start_byte = 0 ) const noexcept {
2191 size_type my_size = size();
2192 if( start_byte >= my_size )
2193 return basic_string::npos;
2194 for( const_iterator it = raw_get(start_byte) , end = raw_get(my_size) ; it != end ; ++it )
2195 if( *it == cp )
2196 return it - begin();
2197 return basic_string::npos;
2198 }
2206 size_type raw_find( const basic_string& pattern , size_type start_byte = 0 ) const noexcept {
2207 if( start_byte >= size() )
2208 return basic_string::npos;
2209 const data_type* buffer = get_buffer();
2210 const data_type* result = std::strstr( buffer + start_byte , pattern.data() );
2211 if( !result )
2212 return basic_string::npos;
2213 return result - buffer;
2214 }
2222 size_type raw_find( const data_type* pattern , size_type start_byte = 0 ) const noexcept {
2223 if( start_byte >= size() )
2224 return basic_string::npos;
2225 const data_type* buffer = get_buffer();
2226 const data_type* result = std::strstr( buffer + start_byte , pattern );
2227 if( !result )
2228 return basic_string::npos;
2229 return result - buffer;
2230 }
2231
2240 size_type rfind( value_type cp , size_type start_codepoint = basic_string::npos ) const noexcept {
2242 size_type string_len = length();
2243 if( start_codepoint >= string_len )
2244 it = crbegin(), start_codepoint = string_len - 1;
2245 else
2246 it = rget( start_codepoint );
2247 for( ; it != end ; ++it, --start_codepoint )
2248 if( *it == cp )
2249 return start_codepoint;
2250 return basic_string::npos;
2251 }
2260 size_type raw_rfind( value_type cp , size_type start_byte = basic_string::npos ) const noexcept ;
2261
2263 size_type find_first_of( const value_type* str , size_type start_codepoint = 0 ) const noexcept ;
2264 size_type raw_find_first_of( const value_type* str , size_type start_byte = 0 ) const noexcept ;
2265 size_type find_last_of( const value_type* str , size_type start_codepoint = basic_string::npos ) const noexcept ;
2266 size_type raw_find_last_of( const value_type* str , size_type start_byte = basic_string::npos ) const noexcept ;
2267
2269 size_type find_first_not_of( const value_type* str , size_type start_codepoint = 0 ) const noexcept ;
2270 size_type raw_find_first_not_of( const value_type* str , size_type start_byte = 0 ) const noexcept ;
2271 size_type find_last_not_of( const value_type* str , size_type start_codepoint = basic_string::npos ) const noexcept ;
2272 size_type raw_find_last_not_of( const value_type* str , size_type start_byte = basic_string::npos ) const noexcept ;
2273
2274
2281 inline bool starts_with( const basic_string& str ) const noexcept {
2282 size_type my_size = size(), str_size = str.size();
2283 return my_size >= str_size && std::memcmp( data() , str.data() , str_size ) == 0;
2284 }
2291 inline bool starts_with( const std::string& str ) const noexcept {
2292 size_type my_size = size(), str_size = str.size();
2293 return my_size >= str_size && std::memcmp( data() , str.data() , str_size ) == 0;
2294 }
2301 inline bool starts_with( value_type cp ) const noexcept {
2302 return !empty() && front() == cp;
2303 }
2310 template<typename T>
2311 bool starts_with( T str , enable_if_ptr<T, data_type>* = {} ) const noexcept {
2312 size_type my_size = size(), str_size = std::strlen( str );
2313 if( my_size < str_size )
2314 return false;
2315 for( const data_type* my_data = data() ; *str && *str == *my_data ; ++str, ++my_data );
2316 return !*str;
2317 }
2324 template<size_type LITLEN>
2325 bool starts_with( const data_type (&str)[LITLEN] ) const noexcept {
2326 size_type my_size = size(), str_size = str[LITLEN-1] ? LITLEN : LITLEN-1;
2327 return my_size >= str_size && std::memcmp( data() , str , str_size ) == 0;
2328 }
2335 template<typename T>
2336 bool starts_with( T str , enable_if_ptr<T, value_type>* = {} ) const noexcept {
2337 for( const_iterator it = cbegin(), end = cend() ; *str && it != end && *str == *it ; ++str, ++it );
2338 return !*str;
2339 }
2346 template<size_type LITLEN>
2347 bool starts_with( const value_type (&str)[LITLEN] ) const noexcept {
2348 size_type str_len = str[LITLEN-1] ? LITLEN : LITLEN-1;
2349 const_iterator it = cbegin(), end = cend();
2350 while( it != end && str_len ){
2351 if( *it != *str )
2352 return false;
2353 ++it, ++str, --str_len;
2354 }
2355 return !str_len;
2356 }
2357
2358
2365 inline bool ends_with( const basic_string& str ) const noexcept {
2366 size_type my_size = size(), str_size = str.size();
2367 return my_size >= str_size && std::memcmp( data() + my_size - str_size , str.data() , str_size ) == 0;
2368 }
2375 inline bool ends_with( const std::string& str ) const noexcept {
2376 size_type my_size = size(), str_size = str.size();
2377 return my_size >= str_size && std::memcmp( data() + my_size - str_size , str.data() , str_size ) == 0;
2378 }
2385 inline bool ends_with( value_type cp ) const noexcept {
2386 return !empty() && back() == cp;
2387 }
2394 template<typename T>
2395 bool ends_with( T str , enable_if_ptr<T, data_type>* = {} ) const noexcept {
2396 size_type my_size = size(), str_size = std::strlen(str);
2397 return my_size >= str_size && std::memcmp( data() + my_size - str_size , str , str_size ) == 0;
2398 }
2405 template<size_type LITLEN>
2406 bool ends_with( const data_type (&str)[LITLEN] ) const noexcept {
2407 size_type my_size = size(), str_size = str[LITLEN-1] ? LITLEN : LITLEN-1;
2408 return my_size >= str_size && std::memcmp( data() + my_size - str_size , str , str_size ) == 0;
2409 }
2416 template<typename T>
2417 bool ends_with( T str , enable_if_ptr<T, value_type>* = {} ) const noexcept {
2418 size_type str_len = tiny_utf8_detail::strlen( str );
2420 while( it != end && str_len ){
2421 if( *it != str[--str_len] )
2422 return false;
2423 ++it;
2424 }
2425 return !str_len;
2426 }
2433 template<size_type LITLEN>
2434 bool ends_with( const value_type (&str)[LITLEN] ) const noexcept {
2435 size_type str_len = str[LITLEN-1] ? LITLEN : LITLEN-1;
2437 while( it != end && str_len ){
2438 if( *it != str[--str_len] )
2439 return false;
2440 ++it;
2441 }
2442 return !str_len;
2443 }
2444
2445
2456 inline int compare( const basic_string& str ) const noexcept {
2457 size_type my_size = size(), str_size = str.size();
2458 int result = std::memcmp( data() , str.data() , my_size < str_size ? my_size : str_size );
2459 if( !result && my_size != str_size )
2460 result = my_size < str_size ? -1 : 1;
2461 return result;
2462 }
2473 inline int compare( const std::string& str ) const noexcept {
2474 size_type my_size = size(), str_size = str.size();
2475 int result = std::memcmp( data() , str.data() , my_size < str_size ? my_size : str_size );
2476 if( !result && my_size != str_size )
2477 result = my_size < str_size ? -1 : 1;
2478 return result;
2479 }
2492 template<typename T>
2493 int compare( T str , enable_if_ptr<T, data_type>* = {} ) const noexcept {
2494 const data_type* it = data(), *end = it + size();
2495 while( it != end && *str ){
2496 if( *it != *str )
2497 return *it < *str ? -1 : 1;
2498 ++it, ++str;
2499 }
2500 return *str ? -1 : it == end ? 0 : 1;
2501 }
2512 template<size_type LITLEN>
2513 int compare( const data_type (&str)[LITLEN] ) const noexcept {
2514 const data_type* it = data(), *end = it + size();
2515 size_type index = 0, length = str[LITLEN-1] ? LITLEN : LITLEN-1;
2516 while( it != end && index < length ){
2517 if( *it != str[index] )
2518 return *it < str[index] ? -1 : 1;
2519 ++it, ++index;
2520 }
2521 return index < length ? -1 : it == end ? 0 : 1;
2522 }
2535 template<typename T>
2536 int compare( T str , enable_if_ptr<T, value_type>* = {} ) const noexcept {
2537 const_iterator it = cbegin(), end = cend();
2538 while( it != end && *str ){
2539 if( *it != *str )
2540 return *it < *str ? -1 : 1;
2541 ++it, ++str;
2542 }
2543 return *str ? -1 : it == end ? 0 : 1;
2544 }
2555 template<size_type LITLEN>
2556 int compare( const value_type (&str)[LITLEN] ) const noexcept {
2557 const_iterator it = cbegin(), end = cend();
2558 size_type index = 0, length = str[LITLEN-1] ? LITLEN : LITLEN-1;
2559 while( it != end && index < length ){
2560 if( *it != str[index] )
2561 return *it < str[index] ? -1 : 1;
2562 ++it, ++index;
2563 }
2564 return index < length ? -1 : it == end ? 0 : 1;
2565 }
2566
2568 inline bool operator==( const basic_string& str ) const noexcept { return compare( str ) == 0; }
2569 inline bool operator!=( const basic_string& str ) const noexcept { return compare( str ) != 0; }
2570 inline bool operator==( const std::string& str ) const noexcept { return compare( str ) == 0; }
2571 inline bool operator!=( const std::string& str ) const noexcept { return compare( str ) != 0; }
2572 template<typename T> inline enable_if_ptr<T, data_type> operator==( T&& str ) const noexcept { return compare( str ) == 0; }
2573 template<typename T> inline enable_if_ptr<T, data_type> operator!=( T&& str ) const noexcept { return compare( str ) != 0; }
2574 template<typename T> inline enable_if_ptr<T, value_type> operator==( T&& str ) const noexcept { return compare( str ) == 0; }
2575 template<typename T> inline enable_if_ptr<T, value_type> operator!=( T&& str ) const noexcept { return compare( str ) != 0; }
2576 template<size_type LITLEN> inline bool operator==( const data_type (&str)[LITLEN] ) const noexcept { return compare( str ) == 0; }
2577 template<size_type LITLEN> inline bool operator!=( const data_type (&str)[LITLEN] ) const noexcept { return compare( str ) != 0; }
2578 template<size_type LITLEN> inline bool operator==( const value_type (&str)[LITLEN] ) const noexcept { return compare( str ) == 0; }
2579 template<size_type LITLEN> inline bool operator!=( const value_type (&str)[LITLEN] ) const noexcept { return compare( str ) != 0; }
2580
2582 inline bool operator>( const basic_string& str ) const noexcept { return compare( str ) > 0; }
2583 inline bool operator>=( const basic_string& str ) const noexcept { return compare( str ) >= 0; }
2584 inline bool operator<( const basic_string& str ) const noexcept { return compare( str ) < 0; }
2585 inline bool operator<=( const basic_string& str ) const noexcept { return compare( str ) <= 0; }
2586 inline bool operator>( const std::string& str ) const noexcept { return compare( str ) > 0; }
2587 inline bool operator>=( const std::string& str ) const noexcept { return compare( str ) >= 0; }
2588 inline bool operator<( const std::string& str ) const noexcept { return compare( str ) < 0; }
2589 inline bool operator<=( const std::string& str ) const noexcept { return compare( str ) <= 0; }
2590 template<typename T> inline enable_if_ptr<T, data_type> operator>( T&& str ) const noexcept { return compare( str ) > 0; }
2591 template<typename T> inline enable_if_ptr<T, data_type> operator>=( T&& str ) const noexcept { return compare( str ) >= 0; }
2592 template<typename T> inline enable_if_ptr<T, data_type> operator<( T&& str ) const noexcept { return compare( str ) < 0; }
2593 template<typename T> inline enable_if_ptr<T, data_type> operator<=( T&& str ) const noexcept { return compare( str ) <= 0; }
2594 template<typename T> inline enable_if_ptr<T, value_type> operator>( T&& str ) const noexcept { return compare( str ) > 0; }
2595 template<typename T> inline enable_if_ptr<T, value_type> operator>=( T&& str ) const noexcept { return compare( str ) >= 0; }
2596 template<typename T> inline enable_if_ptr<T, value_type> operator<( T&& str ) const noexcept { return compare( str ) < 0; }
2597 template<typename T> inline enable_if_ptr<T, value_type> operator<=( T&& str ) const noexcept { return compare( str ) <= 0; }
2598 template<size_type LITLEN> inline bool operator>( const data_type (&str)[LITLEN] ) const noexcept { return compare( str ) > 0; }
2599 template<size_type LITLEN> inline bool operator>=( const data_type (&str)[LITLEN] ) const noexcept { return compare( str ) >= 0; }
2600 template<size_type LITLEN> inline bool operator<( const data_type (&str)[LITLEN] ) const noexcept { return compare( str ) < 0; }
2601 template<size_type LITLEN> inline bool operator<=( const data_type (&str)[LITLEN] ) const noexcept { return compare( str ) <= 0; }
2602 template<size_type LITLEN> inline bool operator>( const value_type (&str)[LITLEN] ) const noexcept { return compare( str ) > 0; }
2603 template<size_type LITLEN> inline bool operator>=( const value_type (&str)[LITLEN] ) const noexcept { return compare( str ) >= 0; }
2604 template<size_type LITLEN> inline bool operator<( const value_type (&str)[LITLEN] ) const noexcept { return compare( str ) < 0; }
2605 template<size_type LITLEN> inline bool operator<=( const value_type (&str)[LITLEN] ) const noexcept { return compare( str ) <= 0; }
2606
2607
2609 inline width_type get_index_bytes( size_type byte_index ) const noexcept {
2610 return get_codepoint_bytes( get_buffer()[byte_index] , size() - byte_index );
2611 }
2612 inline width_type get_codepoint_bytes( size_type codepoint_index ) const noexcept {
2613 return get_index_bytes( get_num_bytes_from_start( codepoint_index ) );
2614 }
2615
2616
2618 inline width_type get_index_pre_bytes( size_type byte_index ) const noexcept {
2619 const data_type* buffer = get_buffer();
2620 return get_num_bytes_of_utf8_char_before( buffer , byte_index );
2621 }
2622 inline width_type get_codepoint_pre_bytes( size_type codepoint_index ) const noexcept {
2623 return get_index_pre_bytes( get_num_bytes_from_start( codepoint_index ) );
2624 }
2625
2626
2628 inline size_type raw_back_index() const noexcept { size_type s = size(); return s - get_index_pre_bytes( s ); }
2629
2634 size_type get_num_codepoints( size_type byte_start , size_type byte_count ) const noexcept ;
2635
2640 size_type get_num_bytes( size_type byte_start , size_type cp_count ) const noexcept ;
2641 size_type get_num_bytes_from_start( size_type cp_count ) const noexcept ;
2642
2643
2644 public:
2645
2646
2654 inline bool requires_unicode() const noexcept {
2656 }
2657
2658
2664 inline bool sso_active() const noexcept { return sso_inactive() == false; }
2665
2666
2673
2674
2681 void to_wide_literal( value_type* dest ) const noexcept {
2682 for( const data_type* data = get_buffer(), * data_end = data + size() ; data < data_end ; )
2683 data += decode_utf8_and_len( data , *dest++ , data_end - data );
2684 *dest = 0;
2685 }
2686
2687
2694 inline std::basic_string<data_type> cpp_str( bool prepend_bom = false ) const noexcept(TINY_UTF8_NOEXCEPT) { return prepend_bom ? cpp_str_bom() : std::basic_string<DataType>( c_str() , size() ); }
2695 };
2696} // Namespace 'tiny_utf8'
2697
2698
2700namespace std
2701{
2702 template<typename V, typename D, typename A>
2703 struct hash<tiny_utf8::basic_string<V, D, A> >
2704 {
2705 std::size_t operator()( const tiny_utf8::basic_string<V, D, A>& string ) const noexcept {
2706 using data_type = typename tiny_utf8::basic_string<V, D, A>::data_type;
2707 std::hash<data_type> hasher;
2708 std::size_t size = string.size();
2709 std::size_t result = 0;
2710 const data_type* buffer = string.data();
2711 for( std::size_t iterator = 0 ; iterator < size ; ++iterator )
2712 result = result * 31u + hasher( buffer[iterator] );
2713 return result;
2714 }
2715 };
2716}
2717
2719template<typename V, typename D, typename A>
2720std::ostream& operator<<( std::ostream& stream , const tiny_utf8::basic_string<V, D, A>& str ) noexcept(TINY_UTF8_NOEXCEPT) {
2721 return stream << str.cpp_str();
2722}
2723template<typename V, typename D, typename A>
2724std::istream& operator>>( std::istream& stream , tiny_utf8::basic_string<V, D, A>& str ) noexcept(TINY_UTF8_NOEXCEPT) {
2725 std::string tmp;
2726 stream >> tmp;
2727 str = move(tmp);
2728 return stream;
2729}
2730
2731
2732// Implementation
2733namespace tiny_utf8
2734{
2735 template<typename V, typename D, typename A>
2737 noexcept(TINY_UTF8_NOEXCEPT)
2738 : A( alloc )
2739 , t_sso()
2740 {
2741 if( !count )
2742 return;
2743
2744 width_type num_bytes_per_cp = get_codepoint_bytes( cp );
2745 size_type data_len = num_bytes_per_cp * count;
2746 data_type* buffer;
2747
2748 // Need to allocate a buffer?
2749 if( data_len > basic_string::get_sso_capacity() )
2750 {
2751 // Determine the buffer size
2752 size_type buffer_size = determine_main_buffer_size( data_len );
2753 buffer = this->allocate( determine_total_buffer_size( buffer_size ) );
2754 #if defined(TINY_UTF8_NOEXCEPT)
2755 if( !buffer )
2756 return;
2757 #endif
2758 t_non_sso.data = buffer;
2759
2760 // Set Attributes
2761 set_lut_indiciator( buffer + buffer_size , num_bytes_per_cp == 1 , 0 ); // Set lut indicator
2762 t_non_sso.buffer_size = buffer_size;
2763 t_non_sso.data_len = data_len;
2764 set_non_sso_string_len( count ); // This also disables SSO
2765 }
2766 else{
2767 buffer = t_sso.data;
2768
2769 // Set Attributes
2770 set_sso_data_len( (unsigned char)data_len );
2771 }
2772
2773 // Fill the buffer
2774 if( num_bytes_per_cp > 1 ){
2775 basic_string::encode_utf8( cp , buffer , num_bytes_per_cp );
2776 for( data_type* buffer_iter = buffer ; --count > 0 ; )
2777 std::memcpy( buffer_iter += num_bytes_per_cp , buffer , num_bytes_per_cp );
2778 }
2779 else
2780 std::memset( buffer , cp , count );
2781
2782 // Set trailling zero (in case sso is inactive, this trailing zero also indicates, that no multibyte-tracking is enabled)
2783 buffer[data_len] = 0;
2784 }
2785
2786 template<typename V, typename D, typename A>
2788 noexcept(TINY_UTF8_NOEXCEPT)
2789 : A( alloc )
2790 , t_sso()
2791 {
2792 if( !count )
2793 return;
2794
2795 data_type* buffer;
2796
2797 // Need to allocate a buffer?
2798 if( count > basic_string::get_sso_capacity() )
2799 {
2800 size_type buffer_size = determine_main_buffer_size( count );
2801 buffer = this->allocate( determine_total_buffer_size( buffer_size ) );
2802 #if defined(TINY_UTF8_NOEXCEPT)
2803 if( !buffer )
2804 return;
2805 #endif
2806 t_non_sso.data = buffer;
2807
2808 // Set Attributes
2809 set_lut_indiciator( buffer + buffer_size , true , 0 ); // Set lut indicator
2810 t_non_sso.buffer_size = buffer_size;
2811 t_non_sso.data_len = count;
2812 set_non_sso_string_len( count ); // This also disables SSO
2813 }
2814 else{
2815 buffer = t_sso.data;
2816 set_sso_data_len( (unsigned char)count );
2817 }
2818
2819 // Fill the buffer
2820 std::memset( buffer , cp , count );
2821
2822 // Set trailling zero (in case sso is inactive, this trailing zero also indicates, that no multibyte-tracking is enabled)
2823 buffer[count] = 0;
2824 }
2825
2826 template<typename V, typename D, typename A>
2827 basic_string<V, D, A>::basic_string( const data_type* str , size_type pos , size_type count , size_type data_left , const typename basic_string<V, D, A>::allocator_type& alloc , tiny_utf8_detail::read_codepoints_tag )
2828 noexcept(TINY_UTF8_NOEXCEPT)
2829 : basic_string( alloc )
2830 {
2831 if( !count )
2832 return;
2833
2834 size_type num_multibytes = 0;
2835 size_type data_len = 0;
2836 size_type string_len = 0;
2837 data_type* buffer;
2838
2839 // Iterate to the nth codepoint marking the start of the string to copy
2840 while( *str && string_len < pos && data_left != 0u ){
2841 width_type bytes = get_codepoint_bytes( str[data_len] , data_left ); // Read number of bytes of current codepoint
2842 data_left -= bytes;
2843 str += bytes;
2844 ++string_len;
2845 }
2846 string_len = 0;
2847
2848 // Count bytes, multibytes and string length
2849 while( str[data_len] && string_len < count )
2850 {
2851 // Read number of bytes of current codepoint
2852 width_type bytes = get_codepoint_bytes( str[data_len] , data_left );
2853 data_len += bytes; // Increase number of bytes
2854 data_left -= bytes; // Decrease amount of bytes left
2855 string_len += 1; // Increase number of codepoints
2856 num_multibytes += bytes > 1 ? 1 : 0; // Increase number of occoured multibytes?
2857 }
2858
2859 // Need heap memory?
2860 if( data_len > basic_string::get_sso_capacity() )
2861 {
2862 if( basic_string::is_lut_worth( num_multibytes , string_len , false , false ) )
2863 {
2864 // Determine the buffer size (excluding the lut indicator) and the lut width
2865 width_type lut_width;
2866 size_type buffer_size = determine_main_buffer_size( data_len , num_multibytes , &lut_width );
2867 buffer = this->allocate( determine_total_buffer_size( buffer_size ) );
2868 #if defined(TINY_UTF8_NOEXCEPT)
2869 if( !buffer )
2870 return;
2871 #endif
2872 t_non_sso.data = buffer;
2873
2874 // Set up LUT
2875 data_type* lut_iter = basic_string::get_lut_base_ptr( buffer , buffer_size );
2876 basic_string::set_lut_indiciator( lut_iter , true , num_multibytes ); // Set the LUT indicator
2877
2878 // Fill the lut and copy bytes
2879 data_type* buffer_iter = buffer;
2880 const data_type* str_iter = str;
2881 const data_type* str_end = str + data_len; // Compute End of the string
2882 while( str_iter < str_end )
2883 {
2884 width_type bytes = get_codepoint_bytes( *str_iter , str_end - str_iter );
2885 switch( bytes )
2886 {
2887 case 7: buffer_iter[6] = str_iter[6]; TINY_UTF8_FALLTHROUGH // Copy data byte
2888 case 6: buffer_iter[5] = str_iter[5]; TINY_UTF8_FALLTHROUGH // Copy data byte
2889 case 5: buffer_iter[4] = str_iter[4]; TINY_UTF8_FALLTHROUGH // Copy data byte
2890 case 4: buffer_iter[3] = str_iter[3]; TINY_UTF8_FALLTHROUGH // Copy data byte
2891 case 3: buffer_iter[2] = str_iter[2]; TINY_UTF8_FALLTHROUGH // Copy data byte
2892 case 2: buffer_iter[1] = str_iter[1]; // Copy data byte
2893 // Set next entry in the LUT!
2894 basic_string::set_lut( lut_iter -= lut_width , lut_width , str_iter - str );
2896 case 1: buffer_iter[0] = str_iter[0]; break; // Copy data byte
2897 }
2898 buffer_iter += bytes;
2899 str_iter += bytes;
2900 }
2901 *buffer_iter = '\0'; // Set trailing '\0'
2902
2903 // Set Attributes
2904 t_non_sso.buffer_size = buffer_size;
2905 t_non_sso.data_len = data_len;
2906 set_non_sso_string_len( string_len ); // This also disables SSO
2907
2908 return; // We have already done everything!
2909 }
2910
2911 size_type buffer_size = determine_main_buffer_size( data_len );
2912 buffer = t_non_sso.data = this->allocate( determine_total_buffer_size( buffer_size ) );
2913
2914 // Set up LUT
2915 data_type* lut_iter = basic_string::get_lut_base_ptr( buffer , buffer_size );
2916 basic_string::set_lut_indiciator( lut_iter , num_multibytes == 0 , 0 ); // Set the LUT indicator
2917
2918 // Set Attributes
2919 t_non_sso.buffer_size = buffer_size;
2920 t_non_sso.data_len = data_len;
2921 set_non_sso_string_len( string_len );
2922 }
2923 else{
2924 buffer = t_sso.data;
2925
2926 // Set Attrbutes
2927 set_sso_data_len( (unsigned char)data_len );
2928
2929 // Set up LUT: Not necessary, since the LUT is automatically inactive,
2930 // since SSO is active and the LUT indicator shadows 't_sso.data_len', which has the LSB = 0 (=> LUT inactive).
2931 }
2932
2934 std::memcpy( buffer , str , data_len );
2935 buffer[data_len] = '\0';
2936 }
2937
2938 template<typename V, typename D, typename A>
2939 basic_string<V, D, A>::basic_string( const data_type* str , size_type data_len , const typename basic_string<V, D, A>::allocator_type& alloc , tiny_utf8_detail::read_bytes_tag )
2940 noexcept(TINY_UTF8_NOEXCEPT)
2941 : basic_string( alloc )
2942 {
2943 if( !data_len )
2944 return;
2945
2946 size_type num_multibytes = 0;
2947 size_type index = 0;
2948 size_type string_len = 0;
2949
2950 // Count bytes, multibytes and string length
2951 while( index < data_len )
2952 {
2953 // Read number of bytes of current codepoint
2954 width_type bytes = get_codepoint_bytes( str[index] , basic_string::npos );
2955 index += bytes; // Increase number of bytes
2956 string_len += 1; // Increase number of codepoints
2957 num_multibytes += bytes > 1 ? 1 : 0; // Increase number of occoured multibytes?
2958 }
2959
2960 data_type* buffer;
2961
2962 // Need heap memory?
2963 if( data_len > basic_string::get_sso_capacity() )
2964 {
2965 if( basic_string::is_lut_worth( num_multibytes , string_len , false , false ) )
2966 {
2967 // Determine the buffer size (excluding the lut indicator) and the lut width
2968 width_type lut_width;
2969 size_type buffer_size = determine_main_buffer_size( data_len , num_multibytes , &lut_width );
2970 buffer = this->allocate( determine_total_buffer_size( buffer_size ) );
2971 #if defined(TINY_UTF8_NOEXCEPT)
2972 if( !buffer )
2973 return;
2974 #endif
2975 t_non_sso.data = buffer;
2976
2977 // Set up LUT
2978 data_type* lut_iter = basic_string::get_lut_base_ptr( buffer , buffer_size );
2979 basic_string::set_lut_indiciator( lut_iter , true , num_multibytes ); // Set the LUT indicator
2980
2981 // Fill the lut and copy bytes
2982 data_type* buffer_iter = buffer;
2983 const data_type* str_iter = str;
2984 const data_type* str_end = str + data_len; // Compute End of the string
2985 while( str_iter < str_end )
2986 {
2987 width_type bytes = get_codepoint_bytes( *str_iter , str_end - str_iter );
2988 switch( bytes )
2989 {
2990 case 7: buffer_iter[6] = str_iter[6]; TINY_UTF8_FALLTHROUGH // Copy data byte
2991 case 6: buffer_iter[5] = str_iter[5]; TINY_UTF8_FALLTHROUGH // Copy data byte
2992 case 5: buffer_iter[4] = str_iter[4]; TINY_UTF8_FALLTHROUGH // Copy data byte
2993 case 4: buffer_iter[3] = str_iter[3]; TINY_UTF8_FALLTHROUGH // Copy data byte
2994 case 3: buffer_iter[2] = str_iter[2]; TINY_UTF8_FALLTHROUGH // Copy data byte
2995 case 2: buffer_iter[1] = str_iter[1]; // Copy data byte
2996 // Set next entry in the LUT!
2997 basic_string::set_lut( lut_iter -= lut_width , lut_width , str_iter - str );
2999 case 1: buffer_iter[0] = str_iter[0]; break; // Copy data byte
3000 }
3001 buffer_iter += bytes;
3002 str_iter += bytes;
3003 }
3004 *buffer_iter = '\0'; // Set trailing '\0'
3005
3006 // Set Attributes
3007 t_non_sso.buffer_size = buffer_size;
3008 t_non_sso.data_len = data_len;
3009 set_non_sso_string_len( string_len ); // This also disables SSO
3010
3011 return; // We have already done everything!
3012 }
3013
3014 size_type buffer_size = determine_main_buffer_size( data_len );
3015 buffer = t_non_sso.data = this->allocate( determine_total_buffer_size( buffer_size ) );
3016
3017 // Set up LUT
3018 data_type* lut_iter = basic_string::get_lut_base_ptr( buffer , buffer_size );
3019 basic_string::set_lut_indiciator( lut_iter , num_multibytes == 0 , 0 ); // Set the LUT indicator
3020
3021 // Set Attributes
3022 t_non_sso.buffer_size = buffer_size;
3023 t_non_sso.data_len = data_len;
3024 set_non_sso_string_len( string_len );
3025 }
3026 else{
3027 buffer = t_sso.data;
3028
3029 // Set Attrbutes
3030 set_sso_data_len( (unsigned char)data_len );
3031
3032 // Set up LUT: Not necessary, since the LUT is automatically inactive,
3033 // since SSO is active and the LUT indicator shadows 't_sso.data_len', which has the LSB = 0 (=> LUT inactive).
3034 }
3035
3037 std::memcpy( buffer , str , data_len );
3038 buffer[data_len] = '\0';
3039 }
3040
3041 template<typename V, typename D, typename A>
3042 basic_string<V, D, A>::basic_string( const value_type* str , size_type len , const typename basic_string<V, D, A>::allocator_type& alloc )
3043 noexcept(TINY_UTF8_NOEXCEPT)
3044 : basic_string( alloc )
3045 {
3046 if( !len )
3047 return;
3048
3049 size_type num_multibytes = 0;
3050 size_type data_len = 0;
3051 size_type string_len = 0;
3052
3053 // Count bytes, mutlibytes and string length
3054 while( string_len < len && ( str[string_len] || len != basic_string::npos ) )
3055 {
3056 // Read number of bytes of current codepoint
3057 width_type bytes = get_codepoint_bytes( str[string_len] );
3058
3059 data_len += bytes; // Increase number of bytes
3060 string_len += 1; // Increase number of codepoints
3061 num_multibytes += bytes > 1 ; // Increase number of occoured multibytes?
3062 }
3063
3064 data_type* buffer;
3065
3066 // Need heap memory?
3067 if( data_len > basic_string::get_sso_capacity() )
3068 {
3069 if( basic_string::is_lut_worth( num_multibytes , string_len , false , false ) )
3070 {
3071 // Determine the buffer size (excluding the lut indicator) and the lut width
3072 width_type lut_width;
3073 size_type buffer_size = determine_main_buffer_size( data_len , num_multibytes , &lut_width );
3074 buffer = this->allocate( determine_total_buffer_size( buffer_size ) );
3075 #if defined(TINY_UTF8_NOEXCEPT)
3076 if( !buffer )
3077 return;
3078 #endif
3079 t_non_sso.data = buffer;
3080
3082 data_type* lut_iter = basic_string::get_lut_base_ptr( buffer , buffer_size );
3083 data_type* buffer_iter = buffer;
3084 basic_string::set_lut_indiciator( lut_iter , true , num_multibytes ); // Set the LUT indicator
3085
3086 // Iterate through wide char literal
3087 for( size_type i = 0 ; i < string_len ; i++ )
3088 {
3089 // Encode wide char to utf8
3090 width_type codepoint_bytes = basic_string::encode_utf8( str[i] , buffer_iter );
3091
3092 // Push position of character to 'indices'
3093 if( codepoint_bytes > 1 )
3094 basic_string::set_lut( lut_iter -= lut_width , lut_width , buffer_iter - buffer );
3095
3096 // Step forward with copying to internal utf8 buffer
3097 buffer_iter += codepoint_bytes;
3098 }
3099 *buffer_iter = '\0'; // Set trailing '\0'
3100
3101 // Set Attributes
3102 t_non_sso.buffer_size = buffer_size;
3103 t_non_sso.data_len = data_len;
3104 set_non_sso_string_len( string_len );
3105
3106 return; // We have already done everything!
3107 }
3108
3109 size_type buffer_size = determine_main_buffer_size( data_len );
3110 buffer = t_non_sso.data = this->allocate( determine_total_buffer_size( buffer_size ) );
3111
3112 // Set up LUT
3113 data_type* lut_iter = basic_string::get_lut_base_ptr( buffer , buffer_size );
3114 basic_string::set_lut_indiciator( lut_iter , num_multibytes == 0 , 0 ); // Set the LUT indicator
3115
3116 // Set Attributes
3117 t_non_sso.buffer_size = buffer_size;
3118 t_non_sso.data_len = data_len;
3119 set_non_sso_string_len( string_len );
3120 }
3121 else{
3122 buffer = t_sso.data;
3123
3124 // Set Attrbutes
3125 set_sso_data_len( (unsigned char)data_len );
3126
3127 // Set up LUT: Not necessary, since the LUT is automatically inactive,
3128 // since SSO is active and the LUT indicator shadows 't_sso.data_len', which has the LSB = 0 (=> LUT inactive).
3129 }
3130
3131 data_type* buffer_iter = buffer;
3132
3133 // Iterate through wide char literal
3134 for( size_type i = 0 ; i < string_len ; i++ )
3135 // Encode wide char to utf8 and step forward the number of bytes it took
3136 buffer_iter += basic_string::encode_utf8( str[i] , buffer_iter );
3137
3138 *buffer_iter = '\0'; // Set trailing '\0'
3139 }
3140
3141 template<typename V, typename D, typename A>
3143 {
3144 data_start += index;
3145 // Only Check the possibilities, that could appear
3146 switch( index )
3147 {
3148 default:
3149 if( ((unsigned char)data_start[-7] & 0xFE ) == 0xFC ) // 11111110 seven bytes
3150 return 7;
3152 case 6:
3153 if( ((unsigned char)data_start[-6] & 0xFE ) == 0xFC ) // 1111110X six bytes
3154 return 6;
3156 case 5:
3157 if( ((unsigned char)data_start[-5] & 0xFC ) == 0xF8 ) // 111110XX five bytes
3158 return 5;
3160 case 4:
3161 if( ((unsigned char)data_start[-4] & 0xF8 ) == 0xF0 ) // 11110XXX four bytes
3162 return 4;
3164 case 3:
3165 if( ((unsigned char)data_start[-3] & 0xF0 ) == 0xE0 ) // 1110XXXX three bytes
3166 return 3;
3168 case 2:
3169 if( ((unsigned char)data_start[-2] & 0xE0 ) == 0xC0 ) // 110XXXXX two bytes
3170 return 2;
3172 case 1:
3173 case 0:
3174 return 1;
3175 }
3176 }
3177
3178 #if !TINY_UTF8_HAS_CLZ
3179 template<typename V, typename D, typename A>
3181 {
3182 // Only Check the possibilities, that could appear
3183 switch( data_left )
3184 {
3185 default:
3186 if( ( (unsigned char)first_byte & 0xFFu ) == 0xFEu ) // 11111110 -> seven bytes
3187 return 7;
3188 case 6:
3189 if( ( (unsigned char)first_byte & 0xFEu ) == 0xFCu ) // 1111110X -> six bytes
3190 return 6;
3191 case 5:
3192 if( ( (unsigned char)first_byte & 0xFCu ) == 0xF8u ) // 111110XX -> five bytes
3193 return 5;
3194 case 4:
3195 if( ( (unsigned char)first_byte & 0xF8u ) == 0xF0u ) // 11110XXX -> four bytes
3196 return 4;
3197 case 3:
3198 if( ( (unsigned char)first_byte & 0xF0u ) == 0xE0u ) // 1110XXXX -> three bytes
3199 return 3;
3200 case 2:
3201 if( ( (unsigned char)first_byte & 0xE0u ) == 0xC0u ) // 110XXXXX -> two bytes
3202 return 2;
3203 case 1:
3204 case 0:
3205 return 1; // one byte
3206 }
3207 }
3208 #endif // !TINY_UTF8_HAS_CLZ
3209
3210 template<typename V, typename D, typename A>
3211 basic_string<V, D, A>& basic_string<V, D, A>::operator=( const basic_string<V, D, A>& str ) noexcept(TINY_UTF8_NOEXCEPT)
3212 {
3213 // Note: Self assignment is expected to be very rare. We tolerate overhead in this situation.
3214 // Therefore, we right away check for sso states in 'this' and 'str'.
3215 // If they are equal, perform the check then.
3216
3217 switch( sso_inactive() + str.sso_inactive() * 2 )
3218 {
3219 case 3: // [sso-inactive] = [sso-inactive]
3220 {
3221 if( &str == this )
3222 return *this;
3223 const data_type* str_lut_base_ptr = basic_string::get_lut_base_ptr( str.t_non_sso.data , str.t_non_sso.buffer_size );
3224 if( basic_string::is_lut_active( str_lut_base_ptr ) )
3225 {
3226 width_type lut_width = get_lut_width( t_non_sso.buffer_size ); // Lut width, if the current buffer is used
3227 size_type str_lut_len = basic_string::get_lut_len( str_lut_base_ptr );
3228
3229 // Can the current buffer hold the data and the lut?
3230 if( basic_string::determine_main_buffer_size( str.t_non_sso.data_len , str_lut_len , lut_width ) < t_non_sso.buffer_size )
3231 {
3232 width_type str_lut_width = get_lut_width( str.t_non_sso.buffer_size );
3233
3234 // How to copy indices?
3235 if( lut_width == str_lut_width ){
3236 str_lut_len *= str_lut_width; // Compute the size in bytes of the lut
3237 std::memcpy(
3238 basic_string::get_lut_base_ptr( t_non_sso.data , t_non_sso.buffer_size ) - str_lut_len
3239 , str_lut_base_ptr - str_lut_len
3240 , str_lut_len + sizeof(indicator_type) // Also copy lut indicator
3241 );
3242 }
3243 else{
3244 data_type* lut_iter = basic_string::get_lut_base_ptr( t_non_sso.data , t_non_sso.buffer_size );
3245 for( ; str_lut_len > 0 ; --str_lut_len )
3247 lut_iter -= lut_width
3248 , lut_width
3249 , basic_string::get_lut( str_lut_base_ptr -= str_lut_width , str_lut_width )
3250 );
3251 }
3252 }
3253 else
3254 goto lbl_replicate_whole_buffer;
3255 }
3256 // Is the current buffer too small to hold just the data?
3257 else if( basic_string::determine_main_buffer_size( str.t_non_sso.data_len ) > t_non_sso.buffer_size )
3258 goto lbl_replicate_whole_buffer;
3259
3260 // Copy data and lut indicator only
3261 std::memcpy( t_non_sso.data , str.t_non_sso.data , str.t_non_sso.data_len + 1 );
3263 basic_string::get_lut_base_ptr( t_non_sso.data , t_non_sso.buffer_size )
3264 , str_lut_base_ptr
3265 );
3266 t_non_sso.data_len = str.t_non_sso.data_len;
3267 t_non_sso.string_len = str.t_non_sso.string_len; // Copy the string_len bit pattern
3268 (allocator_type&)*this = (const allocator_type&)str; // Copy allocator
3269 return *this;
3270
3271 lbl_replicate_whole_buffer: // Replicate the whole buffer
3272 this->deallocate( t_non_sso.data , t_non_sso.buffer_size );
3273 }
3275 case 2: // [sso-active] = [sso-inactive]
3276 (allocator_type&)*this = (const allocator_type&)str; // Copy allocator
3277 t_non_sso.data = this->allocate( basic_string::determine_total_buffer_size( str.t_non_sso.buffer_size ) );
3278 std::memcpy( t_non_sso.data , str.t_non_sso.data , str.t_non_sso.buffer_size + sizeof(indicator_type) ); // Copy data
3279 t_non_sso.buffer_size = str.t_non_sso.buffer_size;
3280 t_non_sso.data_len = str.t_non_sso.data_len;
3281 t_non_sso.string_len = str.t_non_sso.string_len; // This also disables SSO
3282 return *this;
3283 case 1: // [sso-inactive] = [sso-active]
3284 this->deallocate( t_non_sso.data , t_non_sso.buffer_size );
3286 case 0: // [sso-active] = [sso-active]
3287 if( &str != this ){
3288 (allocator_type&)*this = (const allocator_type&)str; // Copy allocator
3289 std::memcpy( (void*)&this->t_sso , &str.t_sso , sizeof(basic_string::SSO) ); // Copy data
3290 }
3291 return *this;
3292 }
3293 return *this;
3294 }
3295
3296 template<typename V, typename D, typename A>
3298 {
3299 if( sso_active() )
3300 return;
3301
3302 size_type data_len = size();
3303
3304 if( !data_len )
3305 return;
3306
3307 size_type buffer_size = get_buffer_size();
3308 data_type* buffer = get_buffer();
3309 data_type* lut_base_ptr = basic_string::get_lut_base_ptr( buffer , buffer_size );
3310 size_type required_buffer_size;
3311
3312 if( is_lut_active( lut_base_ptr ) )
3313 {
3314 size_type lut_len = get_lut_len( lut_base_ptr );
3315 width_type new_lut_width;
3316 required_buffer_size = determine_main_buffer_size( data_len , lut_len , &new_lut_width );
3317
3319 if( buffer_size < std::max<size_type>( required_buffer_size + 10 , required_buffer_size >> 2 ) )
3320 return;
3321
3322 // Allocate new buffer
3323 t_non_sso.data = this->allocate( determine_total_buffer_size( required_buffer_size ) );
3324 width_type old_lut_width = basic_string::get_lut_width( buffer_size );
3325 data_type* new_lut_base_ptr = basic_string::get_lut_base_ptr( t_non_sso.data , required_buffer_size );
3326
3327 // Does the data type width change?
3328 if( old_lut_width != new_lut_width ){ // Copy indices one at a time
3329 basic_string::set_lut_indiciator( new_lut_base_ptr , true , lut_len );
3330 for( size_type i = 0 ; i < lut_len ; i++ )
3331 set_lut(
3332 new_lut_base_ptr -= new_lut_width
3333 , new_lut_width
3334 , get_lut( lut_base_ptr -= old_lut_width , old_lut_width )
3335 );
3336 }
3337 else{ // Copy the lut as well as the lut indicator in one action
3338 size_type lut_size = lut_len * old_lut_width;
3339 std::memcpy( new_lut_base_ptr - lut_size , lut_base_ptr - lut_size , lut_size + sizeof(indicator_type) );
3340 }
3341 }
3342 else
3343 {
3344 required_buffer_size = determine_main_buffer_size( data_len );
3345
3347 if( buffer_size < std::max<size_type>( required_buffer_size + 10 , required_buffer_size >> 2 ) )
3348 return;
3349
3350 t_non_sso.data = this->allocate( determine_total_buffer_size( required_buffer_size ) ); // Allocate new buffer
3351 }
3352
3353 // Copy BUFFER
3354 std::memcpy( t_non_sso.data , buffer , data_len + 1 );
3355 t_non_sso.buffer_size = required_buffer_size; // Set new buffer size
3356
3357 // Delete old buffer
3358 this->deallocate( buffer , buffer_size );
3359 }
3360
3361 template<typename V, typename D, typename A>
3363 {
3364 size_type data_len = t_non_sso.data_len;
3365 size_type buffer_size = t_non_sso.buffer_size;
3366
3367 // If empty, assume an average number of bytes per codepoint of '1' (and an empty lut)
3368 if( !data_len )
3369 return buffer_size - 1;
3370
3371 const data_type* buffer = t_non_sso.data;
3372 size_type string_len = get_non_sso_string_len();
3373 const data_type* lut_base_ptr = basic_string::get_lut_base_ptr( buffer , buffer_size );
3374
3375 // If the lut is active, add the number of additional bytes to the current data length
3376 if( basic_string::is_lut_active( lut_base_ptr ) )
3377 data_len += basic_string::get_lut_width( buffer_size ) * basic_string::get_lut_len( lut_base_ptr );
3378
3379 // Return the buffer size (excluding the potential trailing '\0') divided by the average number of bytes per codepoint
3380 return ( buffer_size - 1 ) * string_len / data_len;
3381 }
3382
3383 template<typename V, typename D, typename A>
3385 {
3386 constexpr size_type mask = get_msb_mask<size_type>();
3387 size_type data_len = get_sso_data_len();
3388 size_type i = 0;
3389
3390 // Search sizeof(size_type) bytes at once
3391 for( ; i < data_len / sizeof(size_type) ; i++ )
3392 if( ((size_type*)t_sso.data)[i] & mask )
3393 return true;
3394
3395 // Search byte-wise
3396 i *= sizeof(size_type);
3397 for( ; i < data_len ; i++ )
3398 if( t_sso.data[i] & 0x80 )
3399 return true;
3400
3401 return false;
3402 }
3403
3404 template<typename V, typename D, typename A>
3405 std::basic_string<typename basic_string<V, D, A>::data_type> basic_string<V, D, A>::cpp_str_bom() const noexcept
3406 {
3407 // Create std::string
3408 std::basic_string<data_type> result = std::basic_string<data_type>( size() + 3 , ' ' );
3409 data_type* tmp_buffer = const_cast<data_type*>( result.data() );
3410
3411 // Write BOM
3412 tmp_buffer[0] = static_cast<data_type>(0xEF);
3413 tmp_buffer[1] = static_cast<data_type>(0xBB);
3414 tmp_buffer[2] = static_cast<data_type>(0xBF);
3415
3416 // Copy my data into it
3417 std::memcpy( tmp_buffer + 3 , get_buffer() , size() + 1 );
3418
3419 return result;
3420 }
3421
3422 template<typename V, typename D, typename A>
3424 {
3425 const data_type* buffer;
3426 size_type data_len;
3427
3428 if( sso_inactive() )
3429 {
3430 buffer = t_non_sso.data;
3431 data_len = t_non_sso.data_len;
3432 size_type buffer_size = t_non_sso.buffer_size;
3433 const data_type* lut_iter = basic_string::get_lut_base_ptr( buffer , buffer_size );
3434
3435 // Is the LUT active?
3436 if( basic_string::is_lut_active( lut_iter ) )
3437 {
3438 size_type lut_len = basic_string::get_lut_len( lut_iter );
3439
3440 if( !lut_len )
3441 return byte_count;
3442
3443 width_type lut_width = basic_string::get_lut_width( buffer_size );
3444 const data_type* lut_begin = lut_iter - lut_len * lut_width;
3445 size_type end_index = index + byte_count;
3446
3447 // Iterate to the start of the relevant part of the multibyte table
3448 while( lut_iter >= lut_begin ){
3449 lut_iter -= lut_width; // Move cursor to the next lut entry
3450 if( basic_string::get_lut( lut_iter , lut_width ) >= index )
3451 break;
3452 }
3453
3454 // Iterate over relevant multibyte indices
3455 while( lut_iter >= lut_begin ){
3456 size_type multibyte_index = basic_string::get_lut( lut_iter , lut_width );
3457 if( multibyte_index >= end_index )
3458 break;
3459 byte_count -= basic_string::get_codepoint_bytes( buffer[multibyte_index] , data_len - multibyte_index ) - 1; // Subtract only the utf8 data bytes
3460 lut_iter -= lut_width; // Move cursor to the next lut entry
3461 }
3462
3463 // Now byte_count is the number of codepoints
3464 return byte_count;
3465 }
3466 }
3467 else{
3468 buffer = t_sso.data;
3469 data_len = get_sso_data_len();
3470 }
3471
3472 // Procedure: Reduce the byte count by the number of data bytes within multibytes
3473 const data_type* buffer_iter = buffer + index;
3474 const data_type* fragment_end = buffer_iter + byte_count;
3475
3476 // Iterate the data byte by byte...
3477 while( buffer_iter < fragment_end ){
3478 width_type bytes = basic_string::get_codepoint_bytes( *buffer_iter , fragment_end - buffer_iter );
3479 buffer_iter += bytes;
3480 byte_count -= bytes - 1;
3481 }
3482
3483 // Now byte_count is the number of codepoints
3484 return byte_count;
3485 }
3486
3487 template<typename V, typename D, typename A>
3489 {
3490 const data_type* buffer;
3491 size_type data_len;
3492
3493 if( sso_inactive() )
3494 {
3495 buffer = t_non_sso.data;
3496 data_len = t_non_sso.data_len;
3497 size_type buffer_size = t_non_sso.buffer_size;
3498 const data_type* lut_iter = basic_string::get_lut_base_ptr( buffer , buffer_size );
3499
3500 // Is the lut active?
3501 if( basic_string::is_lut_active( lut_iter ) )
3502 {
3503 // Reduce the byte count by the number of data bytes within multibytes
3504 width_type lut_width = basic_string::get_lut_width( buffer_size );
3505
3506 // Iterate over relevant multibyte indices
3507 for( size_type lut_len = basic_string::get_lut_len( lut_iter ) ; lut_len-- > 0 ; )
3508 {
3509 size_type multibyte_index = basic_string::get_lut( lut_iter -= lut_width , lut_width );
3510 if( multibyte_index >= cp_count )
3511 break;
3512 cp_count += basic_string::get_codepoint_bytes( buffer[multibyte_index] , data_len - multibyte_index ) - 1; // Subtract only the utf8 data bytes
3513 }
3514
3515 return cp_count;
3516 }
3517 }
3518 else{
3519 buffer = t_sso.data;
3520 data_len = get_sso_data_len();
3521 }
3522
3523 size_type num_bytes = 0;
3524 while( cp_count-- > 0 && num_bytes <= data_len )
3525 num_bytes += get_codepoint_bytes( buffer[num_bytes] , data_len - num_bytes );
3526
3527 return num_bytes;
3528 }
3529
3530 template<typename V, typename D, typename A>
3532 {
3533 size_type potential_end_index = index + cp_count;
3534 const data_type* buffer;
3535 size_type data_len;
3536
3537 // Procedure: Reduce the byte count by the number of utf8 data bytes
3538 if( sso_inactive() )
3539 {
3540 buffer = t_non_sso.data;
3541 data_len = t_non_sso.data_len;
3542 size_type buffer_size = t_non_sso.buffer_size;
3543 const data_type* lut_iter = basic_string::get_lut_base_ptr( buffer , buffer_size );
3544
3545 // 'potential_end_index < index' is needed because of potential integer overflow in sum
3546 if( potential_end_index > data_len || potential_end_index < index )
3547 return data_len - index;
3548
3549 // Is the lut active?
3550 if( basic_string::is_lut_active( lut_iter ) )
3551 {
3552 size_type orig_index = index;
3553 size_type lut_len = basic_string::get_lut_len( lut_iter );
3554
3555 if( !lut_len )
3556 return cp_count;
3557
3558 // Reduce the byte count by the number of data bytes within multibytes
3559 width_type lut_width = basic_string::get_lut_width( buffer_size );
3560 const data_type* lut_begin = lut_iter - lut_len * lut_width;
3561
3562 // Iterate to the start of the relevant part of the multibyte table
3563 for( lut_iter -= lut_width /* Move to first entry */ ; lut_iter >= lut_begin ; lut_iter -= lut_width )
3564 if( basic_string::get_lut( lut_iter , lut_width ) >= index )
3565 break;
3566
3567 // Add at least as many bytes as codepoints
3568 index += cp_count;
3569
3570 // Iterate over relevant multibyte indices
3571 while( lut_iter >= lut_begin ){
3572 size_type multibyte_index = basic_string::get_lut( lut_iter , lut_width );
3573 if( multibyte_index >= index )
3574 break;
3575 index += basic_string::get_codepoint_bytes( buffer[multibyte_index] , data_len - multibyte_index ) - 1; // Subtract only the utf8 data bytes
3576 lut_iter -= lut_width; // Move cursor to the next lut entry
3577 }
3578
3579 return index - orig_index;
3580 }
3581 }
3582 else{
3583 buffer = t_sso.data;
3584 data_len = get_sso_data_len();
3585
3586 // 'potential_end_index < index' is needed because of potential integer overflow in sum
3587 if( potential_end_index > data_len || potential_end_index < index )
3588 return data_len - index;
3589 }
3590
3591 size_type orig_index = index;
3592
3593 // Procedure: Reduce the byte count by the number of utf8 data bytes
3594 while( cp_count-- > 0 && index <= data_len )
3595 index += get_codepoint_bytes( buffer[index] , data_len - index );
3596
3597 return index - orig_index;
3598 }
3599
3600 template<typename V, typename D, typename A>
3602 {
3603 // Bound checks...
3604 size_type data_len = size();
3605 if( index > data_len ){
3606 TINY_UTF8_THROW( "tiny_utf8::basic_string::(raw_)substr" , index > data_len );
3607 return {};
3608 }
3609 size_type end_index = index + byte_count;
3610 if( end_index > data_len || end_index < index ){ // 'end_index < index' is needed because of potential integer overflow in sum
3611 end_index = data_len;
3612 byte_count = end_index - index;
3613 }
3614
3615 // If the substring is not a substring
3616 if( byte_count == data_len )
3617 return *this;
3618
3619 // If the new string is a small string
3620 if( byte_count <= basic_string::get_sso_capacity() )
3621 {
3622 basic_string result;
3623
3624 // Copy data
3625 std::memcpy( result.t_sso.data , get_buffer() + index , byte_count );
3626 result.t_sso.data[byte_count] = '\0';
3627
3628 // Set length
3629 result.set_sso_data_len( (unsigned char)byte_count );
3630
3631 return result;
3632 }
3633
3634 // At this point, sso must be inactive, because a substring can only be smaller than this string
3635
3636 size_type mb_index;
3637 size_type substr_cps;
3638 size_type substr_mbs = 0;
3639 size_type buffer_size = t_non_sso.buffer_size;
3640 const data_type* buffer = t_non_sso.data;
3641 const data_type* lut_base_ptr = basic_string::get_lut_base_ptr( buffer , buffer_size );
3642 bool lut_active = basic_string::is_lut_active( lut_base_ptr );
3643 width_type lut_width; // Ignore uninitialized warning, see [5]
3644
3645 // Count the number of SUBSTRING Multibytes and codepoints
3646 if( lut_active )
3647 {
3648 lut_width = basic_string::get_lut_width( buffer_size );
3649 mb_index = 0;
3650 const data_type* lut_begin = lut_base_ptr - lut_width * basic_string::get_lut_len( lut_base_ptr );
3651 const data_type* lut_iter = lut_base_ptr;
3652 for( lut_iter -= lut_width; lut_iter >= lut_begin ; lut_iter -= lut_width ){
3653 if( basic_string::get_lut( lut_iter , lut_width ) >= index )
3654 break;
3655 mb_index++;
3656 }
3657 substr_cps = byte_count; // Add at least as many bytes as codepoints
3658 for( ; lut_iter >= lut_begin ; lut_iter -= lut_width ){ // Iterate over relevant multibyte indices
3659 size_type multibyte_index = basic_string::get_lut( lut_iter , lut_width );
3660 if( multibyte_index >= end_index )
3661 break;
3662 substr_cps -= basic_string::get_codepoint_bytes( buffer[multibyte_index] , data_len - multibyte_index ); // Actually '- 1', but see[4]
3663 ++substr_mbs;
3664 }
3665 substr_cps += substr_mbs; // [4]: We subtracted all bytes of the relevant multibytes. We therefore need to re-add substr_mbs codepoints.
3666 }
3667 else
3668 {
3669 substr_cps = 0;
3670 size_type iter = index;
3671 while( iter < end_index ){ // Count REPLACED multibytes and codepoints
3672 width_type bytes = get_codepoint_bytes( buffer[iter] , data_len - iter );
3673 substr_mbs += bytes > 1; iter += bytes; ++substr_cps;
3674 }
3675 }
3676
3677 size_type substr_buffer_size;
3678 width_type substr_lut_width;
3679
3680 // Indices Table worth the memory loss?
3681 if( size_type( substr_mbs - 1 ) < size_type( substr_cps / 2 ) ) // Note: substr_mbs is intended to underflow at '0'
3682 substr_buffer_size = determine_main_buffer_size( byte_count , substr_mbs , &substr_lut_width );
3683 else{
3684 substr_lut_width = 0;
3685 substr_buffer_size = determine_main_buffer_size( byte_count );
3686 }
3687
3688 data_type* substr_buffer = this->allocate( determine_total_buffer_size( substr_buffer_size ) );
3689 data_type* substr_lut_base_ptr = basic_string::get_lut_base_ptr( substr_buffer , substr_buffer_size );
3690
3691 // Copy requested BUFFER part
3692 std::memcpy( substr_buffer , buffer + index , byte_count );
3693 substr_buffer[byte_count] = '\0'; // Add trailing '\0'
3694
3695 if( substr_lut_width )
3696 {
3697 // Set new lut size and mode
3698 basic_string::set_lut_indiciator( substr_lut_base_ptr , true , substr_mbs );
3699
3700 // Reuse old indices?
3701 if( lut_active )
3702 {
3703 // Can we do a plain copy of indices?
3704 if( index == 0 && substr_lut_width == lut_width ) // [5]: lut_width is initialized, as soon as 'lut_active' is true
3705 std::memcpy(
3706 substr_lut_base_ptr - substr_mbs * lut_width
3707 , lut_base_ptr - ( mb_index + substr_mbs ) * lut_width // mb_index is initialized, as soon as 'lut_active' is true
3708 , substr_mbs * lut_width
3709 );
3710 else
3711 for( const data_type* lut_iter = lut_base_ptr - mb_index * lut_width; substr_mbs-- > 0 ; )
3713 substr_lut_base_ptr -= substr_lut_width
3714 , substr_lut_width
3715 , basic_string::get_lut( lut_iter -= lut_width , lut_width ) - index
3716 );
3717 }
3718 else // Fill the lut by iterating over the substrings data
3719 for( size_type substr_iter = 0 ; substr_iter < byte_count ; ){
3720 width_type bytes = get_codepoint_bytes( substr_buffer[substr_iter] , byte_count - substr_iter );
3721 if( bytes > 1 )
3722 basic_string::set_lut( substr_lut_base_ptr -= substr_lut_width , substr_lut_width , substr_iter );
3723 substr_iter += bytes;
3724 }
3725 }
3726 else // Set substring lut mode
3727 basic_string::set_lut_indiciator( substr_lut_base_ptr , substr_mbs == 0 , 0 );
3728
3729 // Prepare result
3730 basic_string result;
3731 result.t_non_sso.data = substr_buffer;
3732 result.t_non_sso.data_len = byte_count;
3733 result.t_non_sso.buffer_size = substr_buffer_size;
3734 result.set_non_sso_string_len( substr_cps );
3735
3736 return result;
3737 }
3738
3739 template<typename V, typename D, typename A>
3741 {
3742 // Will add nothing?
3743 bool app_sso_inactive = app.sso_inactive();
3744 size_type app_data_len = app_sso_inactive ? app.t_non_sso.data_len : app.get_sso_data_len();
3745 if( app_data_len == 0 )
3746 return *this;
3747
3748 // Compute some metrics
3749 size_type old_data_len = size();
3750 size_type new_data_len = old_data_len + app_data_len;
3751
3752 // Will be sso string?
3753 if( new_data_len <= basic_string::get_sso_capacity() ){
3754 std::memcpy( t_sso.data + old_data_len , app.t_sso.data , app_data_len ); // Copy APPENDIX (must have sso active as well)
3755 t_sso.data[new_data_len] = '\0'; // Trailing '\0'
3756 set_sso_data_len( (unsigned char)new_data_len ); // Adjust size
3757 return *this;
3758 }
3759
3761
3762
3763 // Count codepoints and multibytes of insertion
3764 bool app_lut_active;
3765 const data_type* app_buffer;
3766 const data_type* app_lut_base_ptr;
3767 size_type app_buffer_size;
3768 size_type app_string_len;
3769 size_type app_lut_len;
3770 if( app.sso_inactive() )
3771 {
3772 app_buffer_size = app.t_non_sso.buffer_size;
3773 app_buffer = app.t_non_sso.data;
3774 app_string_len = app.get_non_sso_string_len();
3775
3776 // Compute the number of multibytes
3777 app_lut_base_ptr = basic_string::get_lut_base_ptr( app_buffer , app_buffer_size );
3778 app_lut_active = basic_string::is_lut_active( app_lut_base_ptr );
3779 if( app_lut_active )
3780 app_lut_len = basic_string::get_lut_len( app_lut_base_ptr );
3781 else{
3782 app_lut_len = 0;
3783 for( size_type iter = 0 ; iter < app_data_len ; ){
3784 width_type bytes = get_codepoint_bytes( app_buffer[iter] , app_data_len - iter );
3785 app_lut_len += bytes > 1; iter += bytes;
3786 }
3787 }
3788 }
3789 else
3790 {
3791 app_lut_active = false;
3792 app_string_len = 0;
3793 app_buffer = app.t_sso.data;
3794 app_buffer_size = basic_string::get_sso_capacity();
3795 app_lut_len = 0;
3796 for( size_type iter = 0 ; iter < app_data_len ; ){
3797 width_type bytes = get_codepoint_bytes( app_buffer[iter] , app_data_len - iter );
3798 app_lut_len += bytes > 1; iter += bytes; ++app_string_len;
3799 }
3800 }
3801
3802 // Count codepoints and multibytes of this string
3803 data_type* old_buffer;
3804 data_type* old_lut_base_ptr; // Ignore uninitialized warning, see [3]
3805 size_type old_buffer_size;
3806 size_type old_string_len;
3807 bool old_lut_active;
3808 size_type old_lut_len;
3809 bool old_sso_inactive = sso_inactive();
3810 if( old_sso_inactive )
3811 {
3812 old_buffer_size = t_non_sso.buffer_size;
3813 old_buffer = t_non_sso.data;
3814 old_string_len = get_non_sso_string_len();
3815
3816 // Count TOTAL multibytes
3817 old_lut_base_ptr = basic_string::get_lut_base_ptr( old_buffer , old_buffer_size );
3818 old_lut_active = basic_string::is_lut_active( old_lut_base_ptr );
3819 if( old_lut_active )
3820 old_lut_len = basic_string::get_lut_len( old_lut_base_ptr );
3821 else{
3822 old_lut_len = 0;
3823 for( size_type iter = 0 ; iter < old_data_len ; ){
3824 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
3825 old_lut_len += bytes > 1; iter += bytes;
3826 }
3827 }
3828 }
3829 else
3830 {
3831 old_buffer = t_sso.data;
3832 old_buffer_size = basic_string::get_sso_capacity();
3833 old_string_len = 0;
3834 old_lut_len = 0;
3835 size_type iter = 0;
3836 old_lut_active = false;
3837 while( iter < old_data_len ){ // Count multibytes and codepoints
3838 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
3839 old_lut_len += bytes > 1; iter += bytes; ++old_string_len;
3840 }
3841 }
3842
3843
3844 // Compute updated metrics
3845 size_type new_lut_len = old_lut_len + app_lut_len;
3846 size_type new_string_len = old_string_len + app_string_len;
3847 size_type new_buffer_size;
3848 width_type new_lut_width; // [2] ; 0 signalizes, that we don't need a lut
3849
3850 // Indices Table worth the memory loss?
3851 // If the ratio of indices/codepoints is lower 5/8 and we have a LUT -> keep it
3852 // If we don't have a LUT, it has to drop below 3/8 for us to start one
3853 if( basic_string::is_lut_worth( new_lut_len , new_string_len , old_lut_active , old_sso_inactive ) )
3854 new_buffer_size = determine_main_buffer_size( new_data_len , new_lut_len , &new_lut_width );
3855 else{
3856 new_lut_width = 0;
3857 new_buffer_size = determine_main_buffer_size( new_data_len );
3858 }
3859
3860 // Can we reuse the old buffer?
3861 if( new_buffer_size <= old_buffer_size )
3862 {
3863 // [3] At this point, 'old_sso_inactive' MUST be true, because else,
3864 // the resulting string would have sso active (which is handled way above)
3865
3866 // Need to fill the lut? (see [2])
3867 if( new_lut_width )
3868 {
3869 // Make sure, the lut width stays the same, because we still have the same buffer size
3870 new_lut_width = basic_string::get_lut_width( old_buffer_size );
3871
3872 // Append new INDICES
3873 data_type* lut_dest_iter = old_lut_base_ptr - old_lut_len * new_lut_width; // 'old_lut_base_ptr' is initialized as 'old_sso_inactive' is true (see [3])
3874 if( app_lut_active )
3875 {
3876 width_type app_lut_width = basic_string::get_lut_width( app_buffer_size );
3877 const data_type* app_lut_iter = app_lut_base_ptr; // 'app_lut_base_ptr' is initialized as soon as 'app_lut_active' is set to true
3878 while( app_lut_len-- > 0 )
3880 lut_dest_iter -= new_lut_width
3881 , new_lut_width
3882 , basic_string::get_lut( app_lut_iter -= app_lut_width , app_lut_width ) + old_data_len
3883 );
3884 }
3885 else{
3886 size_type iter = 0;
3887 while( iter < app_data_len ){
3888 width_type bytes = get_codepoint_bytes( app_buffer[iter] , app_data_len - iter );
3889 if( bytes > 1 )
3890 basic_string::set_lut( lut_dest_iter -= new_lut_width , new_lut_width , iter + old_data_len );
3891 iter += bytes;
3892 }
3893 }
3894
3895 // Set new lut mode
3896 basic_string::set_lut_indiciator( old_lut_base_ptr , true , new_lut_len );
3897 }
3898 else // Set new lut mode
3899 basic_string::set_lut_indiciator( old_lut_base_ptr , new_lut_len == 0 , 0 );
3900
3901 // Update buffer and data_len
3902 std::memcpy( old_buffer + old_data_len , app_buffer , app_data_len ); // Copy BUFFER of the insertion
3903 old_buffer[new_data_len] = '\0'; // Trailing '\0'
3904 }
3905 else // No, apparently we have to allocate a new buffer...
3906 {
3907 new_buffer_size <<= 1; // Allocate twice as much, in order to amortize allocations (keeping in mind alignment)
3908 data_type* new_buffer = this->allocate( determine_total_buffer_size( new_buffer_size ) );
3909 data_type* new_lut_base_ptr = basic_string::get_lut_base_ptr( new_buffer , new_buffer_size );
3910
3911 // Write NEW BUFFER
3912 std::memcpy( new_buffer , old_buffer , old_data_len ); // Copy current BUFFER
3913 std::memcpy( new_buffer + old_data_len , app_buffer , app_data_len ); // Copy BUFFER of appendix
3914 new_buffer[new_data_len] = '\0'; // Trailing '\0'
3915
3916 // Need to fill the lut? (see [2])
3917 if( new_lut_width )
3918 {
3919 // Update the lut width, since we doubled the buffer size a couple of lines above
3920 new_lut_width = basic_string::get_lut_width( new_buffer_size );
3921
3922 // Reuse indices from old lut?
3923 if( old_lut_active )
3924 {
3925 width_type old_lut_width = basic_string::get_lut_width( old_buffer_size );
3926
3927 // Copy all old INDICES
3928 if( new_lut_width != old_lut_width )
3929 {
3930 data_type* lut_iter = old_lut_base_ptr;
3931 data_type* new_lut_iter = new_lut_base_ptr;
3932 size_type num_indices = old_lut_len;
3933 while( num_indices-- > 0 )
3935 new_lut_iter -= new_lut_width
3936 , new_lut_width
3937 , basic_string::get_lut( lut_iter -= old_lut_width , old_lut_width )
3938 );
3939 }
3940 else // Plain copy of them
3941 std::memcpy(
3942 new_lut_base_ptr - old_lut_len * new_lut_width
3943 , old_lut_base_ptr - old_lut_len * old_lut_width
3944 , old_lut_len * old_lut_width
3945 );
3946 }
3947 else // We need to fill these indices manually...
3948 {
3949 data_type* new_lut_iter = new_lut_base_ptr;
3950 size_type iter = 0;
3951 while( iter < old_data_len ){ // Fill lut with indices BEFORE insertion
3952 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
3953 if( bytes > 1 )
3954 basic_string::set_lut( new_lut_iter -= new_lut_width , new_lut_width , iter );
3955 iter += bytes;
3956 }
3957 }
3958
3959 // Copy INDICES of the insertion
3960 data_type* lut_dest_iter = new_lut_base_ptr - old_lut_len * new_lut_width;
3961 if( app_lut_active )
3962 {
3963 width_type app_lut_width = basic_string::get_lut_width( app_buffer_size );
3964 const data_type* app_lut_iter = app_lut_base_ptr;
3965 while( app_lut_len-- > 0 )
3967 lut_dest_iter -= new_lut_width
3968 , new_lut_width
3969 , basic_string::get_lut( app_lut_iter -= app_lut_width , app_lut_width ) + old_data_len
3970 );
3971 }
3972 else{
3973 size_type app_iter = 0;
3974 while( app_iter < app_data_len ){
3975 width_type bytes = get_codepoint_bytes( app_buffer[app_iter] , app_data_len - app_iter );
3976 if( bytes > 1 )
3977 basic_string::set_lut( lut_dest_iter -= new_lut_width , new_lut_width , app_iter + old_data_len );
3978 app_iter += bytes;
3979 }
3980 }
3981
3982 basic_string::set_lut_indiciator( new_lut_base_ptr , true , new_lut_len ); // Set new lut mode and len
3983 }
3984 else // Set new lut mode
3985 basic_string::set_lut_indiciator( new_lut_base_ptr , new_lut_len == 0 , 0 );
3986
3987 // Delete the old buffer?
3988 if( old_sso_inactive )
3989 this->deallocate( old_buffer , old_buffer_size );
3990
3991 // Set new Attributes
3992 t_non_sso.data = new_buffer;
3993 t_non_sso.buffer_size = new_buffer_size;
3994 }
3995
3996 // Adjust Attributes
3997 t_non_sso.data_len = new_data_len;
3998 set_non_sso_string_len( new_string_len );
3999
4000 return *this;
4001 }
4002
4003 template<typename V, typename D, typename A>
4005 {
4006 // Bound checks...
4007 size_type old_data_len = size();
4008 if( index > old_data_len ){
4009 TINY_UTF8_THROW( "tiny_utf8::basic_string::(raw_)insert" , index > old_data_len );
4010 return *this;
4011 }
4012
4013 // Compute the updated metrics
4014 size_type str_data_len = str.size();
4015 size_type new_data_len = old_data_len + str_data_len;
4016
4017 // Will be empty?
4018 if( str_data_len == 0 )
4019 return *this;
4020
4021 // Will be sso string?
4022 if( new_data_len <= basic_string::get_sso_capacity() )
4023 {
4024 // Copy AFTER inserted part, if it has moved in position
4025 std::memmove( t_sso.data + index + str_data_len , t_sso.data + index , old_data_len - index );
4026
4027 // Copy INSERTION (Note: Since the resulting string is small, the insertion must be small as well!)
4028 std::memcpy( t_sso.data + index , str.t_sso.data , str_data_len );
4029
4030 // Finish the new string object
4031 t_sso.data[new_data_len] = '\0'; // Trailing '\0'
4032 set_sso_data_len( (unsigned char)new_data_len );
4033
4034 return *this;
4035 }
4036
4038
4039
4040 // Count codepoints and multibytes of insertion
4041 bool str_lut_active;
4042 const data_type* str_buffer;
4043 const data_type* str_lut_base_ptr;
4044 size_type str_buffer_size;
4045 size_type str_string_len;
4046 size_type str_lut_len;
4047 if( str.sso_inactive() )
4048 {
4049 str_buffer_size = str.t_non_sso.buffer_size;
4050 str_buffer = str.t_non_sso.data;
4051 str_string_len = str.get_non_sso_string_len();
4052
4053 // Compute the number of multibytes
4054 str_lut_base_ptr = basic_string::get_lut_base_ptr( str_buffer , str_buffer_size );
4055 str_lut_active = basic_string::is_lut_active( str_lut_base_ptr );
4056 if( str_lut_active )
4057 str_lut_len = basic_string::get_lut_len( str_lut_base_ptr );
4058 else{
4059 str_lut_len = 0;
4060 for( size_type iter = 0 ; iter < str_data_len ; ){
4061 width_type bytes = get_codepoint_bytes( str_buffer[iter] , str_data_len - iter );
4062 str_lut_len += bytes > 1; iter += bytes;
4063 }
4064 }
4065 }
4066 else
4067 {
4068 str_lut_active = false;
4069 str_string_len = 0;
4070 str_buffer = str.t_sso.data;
4071 str_buffer_size = basic_string::get_sso_capacity();
4072 str_lut_len = 0;
4073 for( size_type iter = 0 ; iter < str_data_len ; ){
4074 width_type bytes = get_codepoint_bytes( str_buffer[iter] , str_data_len - iter );
4075 str_lut_len += bytes > 1; iter += bytes; ++str_string_len;
4076 }
4077 }
4078
4079 // Count codepoints and multibytes of this string
4080 data_type* old_buffer;
4081 data_type* old_lut_base_ptr; // Ignore uninitialized warning, see [3]
4082 size_type old_buffer_size;
4083 size_type old_string_len;
4084 bool old_lut_active;
4085 size_type mb_index = 0;
4086 size_type old_lut_len;
4087 bool old_sso_inactive = sso_inactive();
4088 if( old_sso_inactive )
4089 {
4090 old_buffer_size = t_non_sso.buffer_size;
4091 old_buffer = t_non_sso.data;
4092 old_string_len = get_non_sso_string_len();
4093 size_type iter = 0;
4094 while( iter < index ){ // Count multibytes and codepoints BEFORE insertion
4095 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4096 mb_index += bytes > 1; iter += bytes;
4097 }
4098 // Count TOTAL multibytes
4099 old_lut_base_ptr = basic_string::get_lut_base_ptr( old_buffer , old_buffer_size );
4100 old_lut_active = basic_string::is_lut_active( old_lut_base_ptr );
4101 if( old_lut_active )
4102 old_lut_len = basic_string::get_lut_len( old_lut_base_ptr );
4103 else{
4104 old_lut_len = mb_index;
4105 while( iter < old_data_len ){
4106 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4107 old_lut_len += bytes > 1; iter += bytes;
4108 }
4109 }
4110 }
4111 else
4112 {
4113 old_buffer = t_sso.data;
4114 old_buffer_size = basic_string::get_sso_capacity();
4115 old_string_len = 0;
4116 old_lut_len = 0;
4117 size_type iter = 0;
4118 old_lut_active = false;
4119 while( iter < index ){ // Count multibytes and codepoints BEFORE insertion
4120 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4121 mb_index += bytes > 1; iter += bytes; ++old_string_len;
4122 }
4123 old_lut_len = mb_index;
4124 while( iter < old_data_len ){ // Count multibytes and codepoints AFTER insertion
4125 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4126 old_lut_len += bytes > 1; iter += bytes; ++old_string_len;
4127 }
4128 }
4129
4130
4131 // Compute updated metrics
4132 size_type new_lut_len = old_lut_len + str_lut_len;
4133 size_type new_string_len = old_string_len + str_string_len;
4134 size_type new_buffer_size;
4135 width_type new_lut_width; // [2] ; 0 signalizes, that we don't need a lut
4136
4137 // Indices Table worth the memory loss?
4138 if( basic_string::is_lut_worth( new_lut_len , new_string_len , old_lut_active , old_sso_inactive ) )
4139 new_buffer_size = determine_main_buffer_size( new_data_len , new_lut_len , &new_lut_width );
4140 else{
4141 new_lut_width = 0;
4142 new_buffer_size = determine_main_buffer_size( new_data_len );
4143 }
4144
4145 // Can we reuse the old buffer?
4146 if( new_buffer_size <= old_buffer_size )
4147 {
4148 // [3] At this point, 'old_sso_inactive' MUST be true, because else,
4149 // the resulting string would have sso active (which is handled way above)
4150
4151 // Need to fill the lut? (see [2])
4152 if( new_lut_width )
4153 {
4154 // Make sure, the lut width stays the same, because we still have the same buffer size
4155 new_lut_width = basic_string::get_lut_width( old_buffer_size );
4156
4157 // Reuse indices from old lut?
4158 if( old_lut_active )
4159 {
4160 // Offset all indices
4161 data_type* lut_iter = old_lut_base_ptr - mb_index * new_lut_width; // 'old_lut_base_ptr' is initialized as soon as 'old_lut_active' is set to true
4162 size_type num_indices = old_lut_len - mb_index;
4163 while( num_indices-- > 0 ){
4164 lut_iter -= new_lut_width;
4165 basic_string::set_lut( lut_iter , new_lut_width , basic_string::get_lut( lut_iter , new_lut_width ) + str_data_len );
4166 }
4167
4168 // Copy INDICES from AFTER insertion
4169 // We only need to copy them, if the number of multibytes in the inserted part has changed
4170 if( str_lut_len )
4171 {
4172 // Move the indices!
4173 std::memmove(
4174 old_lut_base_ptr - new_lut_len * new_lut_width
4175 , old_lut_base_ptr - old_lut_len * new_lut_width
4176 , ( old_lut_len - mb_index ) * new_lut_width
4177 );
4178 basic_string::set_lut_indiciator( old_lut_base_ptr , true , new_lut_len ); // Set new lut size
4179 }
4180 }
4181 else // We need to fill the lut manually...
4182 {
4183 // Fill INDICES BEFORE insertion
4184 size_type iter = 0;
4185 data_type* lut_iter = old_lut_base_ptr;
4186 while( iter < index ){ // Fill lut with indices BEFORE insertion
4187 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4188 if( bytes > 1 )
4189 basic_string::set_lut( lut_iter -= new_lut_width , new_lut_width , iter );
4190 iter += bytes;
4191 }
4192
4193 // Fill INDICES AFTER insertion
4194 lut_iter -= str_lut_len * new_lut_width;
4195 while( iter < old_data_len ){
4196 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4197 if( bytes > 1 )
4198 basic_string::set_lut( lut_iter -= new_lut_width , new_lut_width , iter + str_data_len );
4199 iter += bytes;
4200 }
4201
4202 basic_string::set_lut_indiciator( old_lut_base_ptr , true , new_lut_len ); // Set lut size
4203 }
4204
4205 // Copy INDICES of the insertion
4206 data_type* lut_dest_iter = old_lut_base_ptr - mb_index * new_lut_width;
4207 if( str_lut_active )
4208 {
4209 width_type str_lut_width = basic_string::get_lut_width( str_buffer_size );
4210 const data_type* str_lut_iter = str_lut_base_ptr; // 'str_lut_base_ptr' is initialized as soon as 'str_lut_active' is set to true
4211 while( str_lut_len-- > 0 )
4213 lut_dest_iter -= new_lut_width
4214 , new_lut_width
4215 , basic_string::get_lut( str_lut_iter -= str_lut_width , str_lut_width ) + index
4216 );
4217 }
4218 else{
4219 size_type iter = 0;
4220 while( iter < str_data_len ){
4221 width_type bytes = get_codepoint_bytes( str_buffer[iter] , str_data_len - iter );
4222 if( bytes > 1 )
4223 basic_string::set_lut( lut_dest_iter -= new_lut_width , new_lut_width , iter + index );
4224 iter += bytes;
4225 }
4226 }
4227 }
4228 else // Set new lut mode
4229 basic_string::set_lut_indiciator( old_lut_base_ptr , new_lut_len == 0 , 0 );
4230
4231 // Move BUFFER from AFTER the insertion (Note: We don't need to copy before it, because the buffer hasn't changed)
4232 std::memmove( old_buffer + index + str_data_len , old_buffer + index , old_data_len - index );
4233 old_buffer[new_data_len] = '\0'; // Trailing '\0'
4234
4235 // Copy BUFFER of the insertion
4236 std::memcpy( old_buffer + index , str_buffer , str_data_len );
4237 }
4238 else // No, apparently we have to allocate a new buffer...
4239 {
4240 new_buffer_size <<= 1; // Allocate twice as much, in order to amortize allocations (keeping in mind alignment)
4241 data_type* new_buffer = this->allocate( determine_total_buffer_size( new_buffer_size ) );
4242 data_type* new_lut_base_ptr = basic_string::get_lut_base_ptr( new_buffer , new_buffer_size );
4243
4244 // Copy BUFFER from BEFORE insertion
4245 std::memcpy( new_buffer , old_buffer , index );
4246
4247 // Copy BUFFER of insertion
4248 std::memcpy( new_buffer + index , str_buffer , str_data_len );
4249
4250 // Copy BUFFER from AFTER the insertion
4251 std::memcpy( new_buffer + index + str_data_len , old_buffer + index , old_data_len - index );
4252 new_buffer[new_data_len] = '\0'; // Trailing '\0'
4253
4254 // Need to fill the lut? (see [2])
4255 if( new_lut_width )
4256 {
4257 // Update the lut width, since we doubled the buffer size a couple of lines above
4258 new_lut_width = basic_string::get_lut_width( new_buffer_size );
4259
4260 // Reuse indices from old lut?
4261 if( old_lut_active )
4262 {
4263 width_type old_lut_width = basic_string::get_lut_width( old_buffer_size );
4264
4265 // Copy all INDICES BEFORE the insertion
4266 if( new_lut_width != old_lut_width )
4267 {
4268 data_type* lut_iter = old_lut_base_ptr;
4269 data_type* new_lut_iter = new_lut_base_ptr;
4270 size_type num_indices = mb_index;
4271 while( num_indices-- > 0 )
4273 new_lut_iter -= new_lut_width
4274 , new_lut_width
4275 , basic_string::get_lut( lut_iter -= old_lut_width , old_lut_width )
4276 );
4277 }
4278 else // Plain copy of them
4279 std::memcpy(
4280 new_lut_base_ptr - mb_index * new_lut_width
4281 , old_lut_base_ptr - mb_index * old_lut_width
4282 , mb_index * old_lut_width
4283 );
4284
4285 // Copy all INDICES AFTER the insertion
4286 // Need to offset all indices or translate the lut width? (This can be, if the insertion data has different size as the inserted data)
4287 data_type* lut_iter = old_lut_base_ptr - mb_index * old_lut_width;
4288 data_type* new_lut_iter = new_lut_base_ptr - ( mb_index + str_lut_len ) * new_lut_width;
4289 size_type num_indices = old_lut_len - mb_index;
4290 while( num_indices-- > 0 )
4292 new_lut_iter -= new_lut_width
4293 , new_lut_width
4294 , basic_string::get_lut( lut_iter -= old_lut_width , old_lut_width ) + str_data_len
4295 );
4296 }
4297 else // We need to fill the lut manually...
4298 {
4299 // Fill INDICES BEFORE insertion
4300 size_type iter = 0;
4301 data_type* lut_iter = new_lut_base_ptr;
4302 while( iter < index ){ // Fill lut with indices BEFORE insertion
4303 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4304 if( bytes > 1 )
4305 basic_string::set_lut( lut_iter -= new_lut_width , new_lut_width , iter );
4306 iter += bytes;
4307 }
4308
4309 // Fill INDICES AFTER insertion
4310 lut_iter -= str_lut_len * new_lut_width;
4311 while( iter < old_data_len ){
4312 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4313 if( bytes > 1 )
4314 basic_string::set_lut( lut_iter -= new_lut_width , new_lut_width , iter + str_data_len );
4315 iter += bytes;
4316 }
4317 }
4318
4319 // Copy INDICES of the insertion
4320 data_type* lut_dest_iter = new_lut_base_ptr - mb_index * new_lut_width;
4321 if( str_lut_active )
4322 {
4323 width_type str_lut_width = basic_string::get_lut_width( str_buffer_size );
4324 const data_type* str_lut_iter = str_lut_base_ptr;
4325 while( str_lut_len-- > 0 )
4327 lut_dest_iter -= new_lut_width
4328 , new_lut_width
4329 , basic_string::get_lut( str_lut_iter -= str_lut_width , str_lut_width ) + index
4330 );
4331 }
4332 else{
4333 size_type str_iter = 0;
4334 while( str_iter < str_data_len ){
4335 width_type bytes = get_codepoint_bytes( str_buffer[str_iter] , str_data_len - str_iter );
4336 if( bytes > 1 )
4337 basic_string::set_lut( lut_dest_iter -= new_lut_width , new_lut_width , str_iter + index );
4338 str_iter += bytes;
4339 }
4340 }
4341
4342 basic_string::set_lut_indiciator( new_lut_base_ptr , true , new_lut_len ); // Set new lut mode and len
4343 }
4344 else // Set new lut mode
4345 basic_string::set_lut_indiciator( new_lut_base_ptr , new_lut_len == 0 , 0 );
4346
4347 // Delete the old buffer?
4348 if( old_sso_inactive )
4349 this->deallocate( old_buffer , old_buffer_size );
4350
4351 // Set new Attributes
4352 t_non_sso.data = new_buffer;
4353 t_non_sso.buffer_size = new_buffer_size;
4354 }
4355
4356 // Adjust Attributes
4357 t_non_sso.data_len = new_data_len;
4358 set_non_sso_string_len( new_string_len );
4359
4360 return *this;
4361 }
4362
4363 template<typename V, typename D, typename A>
4365 {
4366 // Bound checks...
4367 size_type old_data_len = size();
4368 if( index > old_data_len ){
4369 TINY_UTF8_THROW( "tiny_utf8::basic_string::(raw_)replace" , index > old_data_len );
4370 return *this;
4371 }
4372 size_type end_index = index + replaced_len;
4373 if( end_index > old_data_len || end_index < index ){ // 'end_index < index' is needed because of potential integer overflow in sum
4374 end_index = old_data_len;
4375 replaced_len = end_index - index;
4376 }
4377
4378 // Compute the updated metrics
4379 size_type repl_data_len = repl.size();
4380 difference_type delta_len = repl_data_len - replaced_len;
4381 size_type new_data_len = old_data_len + delta_len;
4382
4383 // Will be empty?
4384 if( !new_data_len ){
4385 clear();
4386 return *this;
4387 }
4388 else if( replaced_len == 0 && repl_data_len == 0 )
4389 return *this;
4390
4391 // Will be sso string?
4392 bool old_sso_inactive = sso_inactive();
4393 if( new_data_len <= basic_string::get_sso_capacity() )
4394 {
4395 // Was the buffer on the heap and has to be moved now?
4396 if( old_sso_inactive )
4397 {
4398 data_type* old_buffer = t_non_sso.data; // Backup old buffer, since we override the pointer to it (see [1])
4399 size_type old_buffer_size = t_non_sso.buffer_size; // Backup old buffer size, since we override the pointer to it (see [1])
4400
4401 // Copy BEFORE replaced part
4402 std::memcpy( t_sso.data , old_buffer , index ); // [1]
4403
4404 // Copy AFTER replaced part
4405 std::memcpy( t_sso.data + index + repl_data_len , old_buffer + end_index , old_data_len - end_index );
4406
4407 this->deallocate( old_buffer , old_buffer_size ); // Delete the old buffer
4408 }
4409 // Copy AFTER replaced part, if it has moved in position
4410 else if( new_data_len != old_data_len )
4411 std::memmove( t_sso.data + index + repl_data_len , t_sso.data + index + replaced_len , old_data_len - index );
4412
4413 // Copy REPLACEMENT (Note: Since the resulting string is small, the replacement must be small as well!)
4414 std::memcpy( t_sso.data + index , repl.t_sso.data , repl_data_len );
4415
4416 // Finish the new string object
4417 t_sso.data[new_data_len] = '\0'; // Trailing '\0'
4418 set_sso_data_len( (unsigned char)new_data_len );
4419
4420 return *this;
4421 }
4422
4424
4425 // Count codepoints and multibytes of replacement
4426 bool repl_lut_active;
4427 const data_type* repl_buffer;
4428 const data_type* repl_lut_base_ptr;
4429 size_type repl_buffer_size;
4430 size_type repl_string_len;
4431 size_type repl_lut_len;
4432 if( repl.sso_inactive() )
4433 {
4434 repl_buffer_size = repl.t_non_sso.buffer_size;
4435 repl_buffer = repl.t_non_sso.data;
4436 repl_string_len = repl.get_non_sso_string_len();
4437
4438 // Compute the number of multibytes
4439 repl_lut_base_ptr = basic_string::get_lut_base_ptr( repl_buffer , repl_buffer_size );
4440 repl_lut_active = basic_string::is_lut_active( repl_lut_base_ptr );
4441 if( repl_lut_active )
4442 repl_lut_len = basic_string::get_lut_len( repl_lut_base_ptr );
4443 else{
4444 repl_lut_len = 0;
4445 for( size_type iter = 0 ; iter < repl_data_len ; ){
4446 width_type bytes = get_codepoint_bytes( repl_buffer[iter] , repl_data_len - iter );
4447 repl_lut_len += bytes > 1; iter += bytes;
4448 }
4449 }
4450 }
4451 else
4452 {
4453 repl_lut_active = false;
4454 repl_string_len = 0;
4455 repl_buffer = repl.t_sso.data;
4456 repl_buffer_size = basic_string::get_sso_capacity();
4457 repl_lut_len = 0;
4458 for( size_type iter = 0 ; iter < repl_data_len ; ){
4459 width_type bytes = get_codepoint_bytes( repl_buffer[iter] , repl_data_len - iter );
4460 repl_lut_len += bytes > 1; iter += bytes; ++repl_string_len;
4461 }
4462 }
4463
4464 // Count codepoints and multibytes of this string
4465 data_type* old_buffer;
4466 data_type* old_lut_base_ptr; // Ignore uninitialized warning, see [3]
4467 size_type old_buffer_size;
4468 size_type old_string_len;
4469 bool old_lut_active;
4470 size_type mb_index = 0;
4471 size_type replaced_mbs = 0;
4472 size_type replaced_cps = 0;
4473 size_type old_lut_len;
4474 if( old_sso_inactive )
4475 {
4476 old_buffer_size = t_non_sso.buffer_size;
4477 old_buffer = t_non_sso.data;
4478 old_string_len = get_non_sso_string_len();
4479 size_type iter = 0;
4480 while( iter < index ){ // Count multibytes and codepoints BEFORE replacement
4481 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4482 mb_index += bytes > 1; iter += bytes;
4483 }
4484 while( iter < end_index ){ // Count REPLACED multibytes and codepoints
4485 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4486 replaced_mbs += bytes > 1; iter += bytes; ++replaced_cps;
4487 }
4488 // Count TOTAL multibytes
4489 old_lut_base_ptr = basic_string::get_lut_base_ptr( old_buffer , old_buffer_size );
4490 old_lut_active = basic_string::is_lut_active( old_lut_base_ptr );
4491 if( old_lut_active )
4492 old_lut_len = basic_string::get_lut_len( old_lut_base_ptr );
4493 else{
4494 old_lut_len = mb_index + replaced_mbs;
4495 while( iter < old_data_len ){
4496 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4497 old_lut_len += bytes > 1; iter += bytes;
4498 }
4499 }
4500 }
4501 else
4502 {
4503 old_buffer = t_sso.data;
4504 old_buffer_size = basic_string::get_sso_capacity();
4505 old_string_len = 0;
4506 old_lut_len = 0;
4507 size_type iter = 0;
4508 old_lut_active = false;
4509 while( iter < index ){ // Count multibytes and codepoints BEFORE replacement
4510 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4511 mb_index += bytes > 1; iter += bytes; ++old_string_len;
4512 }
4513 while( iter < end_index ){ // Count REPLACED multibytes and codepoints
4514 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4515 replaced_mbs += bytes > 1; iter += bytes; ++replaced_cps;
4516 }
4517 old_lut_len = mb_index + replaced_mbs;
4518 while( iter < old_data_len ){ // Count multibytes and codepoints AFTER replacement
4519 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4520 old_lut_len += bytes > 1; iter += bytes; ++old_string_len;
4521 }
4522 old_string_len += replaced_cps;
4523 }
4524
4525
4526 // Compute updated metrics
4527 size_type new_lut_len = old_lut_len - replaced_mbs + repl_lut_len;
4528 size_type new_string_len = old_string_len - replaced_cps + repl_string_len;
4529 size_type new_buffer_size;
4530 width_type new_lut_width; // [2] ; 0 signalizes, that we don't need a lut
4531
4532
4533 // Indices Table worth the memory loss?
4534 if( basic_string::is_lut_worth( new_lut_len , new_string_len , old_lut_active , old_sso_inactive ) )
4535 new_buffer_size = determine_main_buffer_size( new_data_len , new_lut_len , &new_lut_width );
4536 else{
4537 new_lut_width = 0;
4538 new_buffer_size = determine_main_buffer_size( new_data_len );
4539 }
4540
4541 // Can we reuse the old buffer?
4542 if( new_buffer_size <= old_buffer_size )
4543 {
4544 // [3] At this point, 'old_sso_inactive' MUST be true, because else,
4545 // the resulting string would have sso active (which is handled way above)
4546
4547 // Need to fill the lut? (see [2])
4548 if( new_lut_width )
4549 {
4550 // Make sure, the lut width stays the same, because we still have the same buffer size
4551 new_lut_width = basic_string::get_lut_width( old_buffer_size );
4552
4553 // Reuse indices from old lut?
4554 if( old_lut_active )
4555 {
4556 size_type mb_end_index = mb_index + replaced_mbs;
4557
4558 // Need to offset all indices? (This can be, if the replacement data has different size as the replaced data)
4559 if( delta_len ){
4560 data_type* lut_iter = old_lut_base_ptr - mb_end_index * new_lut_width; // 'old_lut_base_ptr' is initialized as soon as 'old_lut_active' is set to true
4561 size_type num_indices = old_lut_len - mb_end_index;
4562 while( num_indices-- > 0 ){
4563 lut_iter -= new_lut_width;
4564 basic_string::set_lut( lut_iter , new_lut_width , basic_string::get_lut( lut_iter , new_lut_width ) + delta_len );
4565 }
4566 }
4567
4568 // Copy INDICES from AFTER replacement
4569 // We only need to copy them, if the number of multibytes in the replaced part has changed
4570 if( replaced_mbs != repl_lut_len )
4571 {
4572 // Move the indices!
4573 std::memmove(
4574 old_lut_base_ptr - new_lut_len * new_lut_width
4575 , old_lut_base_ptr - old_lut_len * new_lut_width
4576 , ( old_lut_len - mb_end_index ) * new_lut_width
4577 );
4578
4579 basic_string::set_lut_indiciator( old_lut_base_ptr , true , new_lut_len ); // Set new lut size
4580 }
4581 }
4582 else // We need to fill the lut manually...
4583 {
4584 // Fill INDICES BEFORE replacement
4585 size_type iter = 0;
4586 data_type* lut_iter = old_lut_base_ptr;
4587 while( iter < index ){ // Fill lut with indices BEFORE replacement
4588 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4589 if( bytes > 1 )
4590 basic_string::set_lut( lut_iter -= new_lut_width , new_lut_width , iter );
4591 iter += bytes;
4592 }
4593
4594 // Fill INDICES AFTER replacement
4595 iter += replaced_len;
4596 lut_iter -= repl_lut_len * new_lut_width;
4597 while( iter < old_data_len ){
4598 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4599 if( bytes > 1 )
4600 basic_string::set_lut( lut_iter -= new_lut_width , new_lut_width , iter + delta_len );
4601 iter += bytes;
4602 }
4603
4604 basic_string::set_lut_indiciator( old_lut_base_ptr , true , new_lut_len ); // Set lut size
4605 }
4606
4607 // Copy INDICES of the replacement
4608 data_type* lut_dest_iter = old_lut_base_ptr - mb_index * new_lut_width;
4609 if( repl_lut_active )
4610 {
4611 width_type repl_lut_width = basic_string::get_lut_width( repl_buffer_size );
4612 const data_type* repl_lut_iter = repl_lut_base_ptr; // 'repl_lut_base_ptr' is initialized as soon as repl_lut_active' is set to true
4613 while( repl_lut_len-- > 0 )
4615 lut_dest_iter -= new_lut_width
4616 , new_lut_width
4617 , basic_string::get_lut( repl_lut_iter -= repl_lut_width , repl_lut_width ) + index
4618 );
4619 }
4620 else{
4621 size_type iter = 0;
4622 while( iter < repl_data_len ){
4623 width_type bytes = get_codepoint_bytes( repl_buffer[iter] , repl_data_len - iter );
4624 if( bytes > 1 )
4625 basic_string::set_lut( lut_dest_iter -= new_lut_width , new_lut_width , iter + index );
4626 iter += bytes;
4627 }
4628 }
4629 }
4630 else // Set new lut mode
4631 basic_string::set_lut_indiciator( old_lut_base_ptr , new_lut_len == 0 , 0 );
4632
4633 // Move BUFFER from AFTER the replacement (Note: We don't need to copy before it, because the buffer hasn't changed)
4634 if( new_data_len != old_data_len ){
4635 std::memmove( old_buffer + index + repl_data_len , old_buffer + end_index , old_data_len - end_index );
4636 t_non_sso.data_len = new_data_len;
4637 old_buffer[new_data_len] = '\0'; // Trailing '\0'
4638 }
4639
4640 // Copy BUFFER of the replacement
4641 std::memcpy( old_buffer + index , repl_buffer , repl_data_len );
4642 }
4643 else // No, apparently we have to allocate a new buffer...
4644 {
4645 new_buffer_size <<= 1; // Allocate twice as much, in order to amortize allocations (keeping in mind alignment)
4646 data_type* new_buffer = this->allocate( determine_total_buffer_size( new_buffer_size ) );
4647 data_type* new_lut_base_ptr = basic_string::get_lut_base_ptr( new_buffer , new_buffer_size );
4648
4649 // Copy BUFFER from BEFORE replacement
4650 std::memcpy( new_buffer , old_buffer , index );
4651
4652 // Copy BUFFER of replacement
4653 std::memcpy( new_buffer + index , repl_buffer , repl_data_len );
4654
4655 // Copy BUFFER from AFTER the replacement
4656 std::memcpy( new_buffer + index + repl_data_len , old_buffer + end_index , old_data_len - end_index );
4657 new_buffer[new_data_len] = '\0'; // Trailing '\0'
4658
4659 // Need to fill the lut? (see [2])
4660 if( new_lut_width )
4661 {
4662 // Update the lut width, since we doubled the buffer size a couple of lines above
4663 new_lut_width = basic_string::get_lut_width( new_buffer_size );
4664
4665 // Reuse indices from old lut?
4666 if( old_lut_active )
4667 {
4668 size_type mb_end_index = mb_index + replaced_mbs;
4669 width_type old_lut_width = basic_string::get_lut_width( old_buffer_size );
4670
4671 // Copy all INDICES BEFORE the replacement
4672 if( new_lut_width != old_lut_width )
4673 {
4674 data_type* lut_iter = old_lut_base_ptr;
4675 data_type* new_lut_iter = new_lut_base_ptr;
4676 size_type num_indices = mb_index;
4677 while( num_indices-- > 0 )
4679 new_lut_iter -= new_lut_width
4680 , new_lut_width
4681 , basic_string::get_lut( lut_iter -= old_lut_width , old_lut_width )
4682 );
4683 }
4684 else // Plain copy of them
4685 std::memcpy(
4686 new_lut_base_ptr - mb_index * new_lut_width
4687 , old_lut_base_ptr - mb_index * old_lut_width
4688 , mb_index * old_lut_width
4689 );
4690
4691 // Copy all INDICES AFTER the replacement
4692 // Need to offset all indices or translate the lut width? (This can be, if the replacement data has different size as the replaced data)
4693 if( delta_len || new_lut_width != old_lut_width ){ // [Optimization possible here]
4694 data_type* lut_iter = old_lut_base_ptr - mb_end_index * old_lut_width;
4695 data_type* new_lut_iter = new_lut_base_ptr - ( mb_index + repl_lut_len ) * new_lut_width;
4696 size_type num_indices = old_lut_len - mb_end_index;
4697 while( num_indices-- > 0 )
4699 new_lut_iter -= new_lut_width
4700 , new_lut_width
4701 , basic_string::get_lut( lut_iter -= old_lut_width , old_lut_width ) + delta_len
4702 );
4703 }
4704 else // Plain copy of them
4705 std::memcpy(
4706 new_lut_base_ptr - new_lut_len * new_lut_width
4707 , old_lut_base_ptr - old_lut_len * old_lut_width
4708 , ( old_lut_len - mb_end_index ) * old_lut_width
4709 );
4710 }
4711 else // We need to fill the lut manually...
4712 {
4713 // Fill INDICES BEFORE replacement
4714 size_type iter = 0;
4715 data_type* lut_iter = new_lut_base_ptr;
4716 while( iter < index ){ // Fill lut with indices BEFORE replacement
4717 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4718 if( bytes > 1 )
4719 basic_string::set_lut( lut_iter -= new_lut_width , new_lut_width , iter );
4720 iter += bytes;
4721 }
4722
4723 // Fill INDICES AFTER replacement
4724 iter += replaced_len;
4725 lut_iter -= repl_lut_len * new_lut_width;
4726 while( iter < old_data_len ){
4727 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4728 if( bytes > 1 )
4729 basic_string::set_lut( lut_iter -= new_lut_width , new_lut_width , iter + delta_len );
4730 iter += bytes;
4731 }
4732 }
4733
4734 // Copy INDICES of the replacement
4735 data_type* lut_dest_iter = new_lut_base_ptr - mb_index * new_lut_width;
4736 if( repl_lut_active )
4737 {
4738 width_type repl_lut_width = basic_string::get_lut_width( repl_buffer_size );
4739 const data_type* repl_lut_iter = repl_lut_base_ptr;
4740 while( repl_lut_len-- > 0 )
4742 lut_dest_iter -= new_lut_width
4743 , new_lut_width
4744 , basic_string::get_lut( repl_lut_iter -= repl_lut_width , repl_lut_width ) + index
4745 );
4746 }
4747 else{
4748 size_type repl_iter = 0; // Todo
4749 while( repl_iter < repl_data_len ){
4750 width_type bytes = get_codepoint_bytes( repl_buffer[repl_iter] , repl_data_len - repl_iter );
4751 if( bytes > 1 )
4752 basic_string::set_lut( lut_dest_iter -= new_lut_width , new_lut_width , repl_iter + index );
4753 repl_iter += bytes;
4754 }
4755 }
4756
4757 basic_string::set_lut_indiciator( new_lut_base_ptr , true , new_lut_len ); // Set new lut mode and len
4758 }
4759 else // Set new lut mode
4760 basic_string::set_lut_indiciator( new_lut_base_ptr , new_lut_len == 0 , 0 );
4761
4762 // Delete the old buffer?
4763 if( old_sso_inactive )
4764 this->deallocate( old_buffer , old_buffer_size );
4765
4766 // Set new Attributes
4767 t_non_sso.data = new_buffer;
4768 t_non_sso.data_len = new_data_len;
4769 t_non_sso.buffer_size = new_buffer_size;
4770 }
4771
4772 // Adjust string length
4773 set_non_sso_string_len( new_string_len );
4774
4775 return *this;
4776 }
4777
4778 template<typename V, typename D, typename A>
4780 {
4781 // Bound checks...
4782 size_type old_data_len = size();
4783 if( index > old_data_len ){
4784 TINY_UTF8_THROW( "tiny_utf8::basic_string::(raw_)erase" , index > old_data_len );
4785 return *this;
4786 }
4787 if( !len )
4788 return *this;
4789 size_type end_index = index + len;
4790 if( end_index > old_data_len || end_index < index ){ // 'end_index < index' is needed because of potential integer overflow in sum
4791 end_index = old_data_len;
4792 len = end_index - index;
4793 }
4794
4795 // Compute the updated metrics
4796 size_type new_data_len = old_data_len - len;
4797
4798 // Will be empty?
4799 if( !new_data_len ){
4800 clear();
4801 return *this;
4802 }
4803
4804 // Will be sso string?
4805 bool old_sso_inactive = sso_inactive();
4806 if( new_data_len <= basic_string::get_sso_capacity() )
4807 {
4808 // Was the buffer on the heap and has to be moved now?
4809 if( old_sso_inactive )
4810 {
4811 data_type* old_buffer = t_non_sso.data; // Backup old buffer, since we override the pointer to it (see [1])
4812 size_type old_buffer_size = t_non_sso.buffer_size; // Backup old buffer size, since we override the pointer to it (see [1])
4813
4814 // Copy BEFORE replaced part
4815 std::memcpy( t_sso.data , old_buffer , index ); // [1]
4816
4817 // Copy AFTER replaced part
4818 std::memcpy( t_sso.data + index , old_buffer + end_index , old_data_len - end_index );
4819
4820 this->deallocate( old_buffer , old_buffer_size ); // Delete the old buffer
4821 }
4822 // Copy AFTER replaced part, if it has moved in position
4823 else if( new_data_len != old_data_len )
4824 std::memmove( t_sso.data + index , t_sso.data + index + len , old_data_len - index );
4825
4826 // Finish the new string object
4827 t_sso.data[new_data_len] = '\0'; // Trailing '\0'
4828 set_sso_data_len( (unsigned char)new_data_len );
4829
4830 return *this;
4831 }
4832
4835
4836 // Count codepoints and multibytes of this string
4837 data_type* old_buffer = t_non_sso.data;
4838 size_type old_buffer_size = t_non_sso.buffer_size;
4839 data_type* old_lut_base_ptr = basic_string::get_lut_base_ptr( old_buffer , old_buffer_size );
4840 bool old_lut_active = basic_string::is_lut_active( old_lut_base_ptr );
4841 size_type replaced_cps = 0;
4842
4843 // Adjust data length
4844 t_non_sso.data_len -= len;
4845
4846 // Was the lut active? => Keep it active and therefore update the data, the string length and the lut
4847 if( old_lut_active )
4848 {
4849 size_type old_lut_len = basic_string::get_lut_len( old_lut_base_ptr );
4850 width_type old_lut_width = basic_string::get_lut_width( old_buffer_size );
4851 size_type mb_end_index = 0;
4852 size_type replaced_mbs = 0;
4853 size_type iter = 0;
4854 while( iter < index ){ // Count multibytes and codepoints BEFORE erased part
4855 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4856 mb_end_index += bytes > 1; iter += bytes;
4857 }
4858 while( iter < end_index ){ // Count REPLACED multibytes and codepoints
4859 width_type bytes = get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4860 replaced_mbs += bytes > 1; iter += bytes; ++replaced_cps;
4861 }
4862 mb_end_index += replaced_mbs;
4863
4864 // Offset all indices
4865 data_type* lut_iter = old_lut_base_ptr - mb_end_index * old_lut_width;
4866 size_type num_indices = old_lut_len - mb_end_index;
4867 while( num_indices-- > 0 ){
4868 lut_iter -= old_lut_width;
4869 basic_string::set_lut( lut_iter , old_lut_width , basic_string::get_lut( lut_iter , old_lut_width ) - len );
4870 }
4871
4872 // Copy INDICES AFTER erased part
4873 // We only need to move them, if the number of multibytes in the replaced part has changed
4874 if( replaced_mbs )
4875 {
4876 size_type new_lut_len = old_lut_len - replaced_mbs;
4877 // Move the indices!
4878 std::memmove(
4879 old_lut_base_ptr - new_lut_len * old_lut_width
4880 , old_lut_base_ptr - old_lut_len * old_lut_width
4881 , ( old_lut_len - mb_end_index ) * old_lut_width
4882 );
4883
4884 basic_string::set_lut_indiciator( old_lut_base_ptr , true , new_lut_len ); // Set new lut size
4885 }
4886 }
4887 // The lut was inactive => only update the string length
4888 else{
4889 size_type iter = 0;
4890 while( iter < index )
4891 iter += get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4892 while( iter < end_index ){
4893 iter += get_codepoint_bytes( old_buffer[iter] , old_data_len - iter );
4894 ++replaced_cps;
4895 }
4896 }
4897
4898 // Move BUFFER AFTER the erased part forward
4899 std::memmove( old_buffer + index , old_buffer + end_index , old_data_len - end_index + 1 ); // +1 for the trailing '\0'
4900
4901 // Adjust string length
4902 set_non_sso_string_len( get_non_sso_string_len() - replaced_cps );
4903
4904 return *this;
4905 }
4906
4907 template<typename V, typename D, typename A>
4909 if( index >= size() )
4910 index = raw_back_index();
4911 for( difference_type it = index ; it >= 0 ; it -= get_index_pre_bytes( it ) )
4912 if( raw_at(it) == cp )
4913 return it;
4914 return basic_string::npos;
4915 }
4916
4917 template<typename V, typename D, typename A>
4919 {
4920 if( start_pos >= length() )
4921 return basic_string::npos;
4922
4923 for( const_iterator it = get( start_pos ), end = cend() ; it < end ; ++it, ++start_pos )
4924 {
4925 const value_type* tmp = str;
4926 value_type cur = *it;
4927 do{
4928 if( cur == *tmp )
4929 return start_pos;
4930 }while( *++tmp );
4931 }
4932
4933 return basic_string::npos;
4934 }
4935
4936 template<typename V, typename D, typename A>
4938 {
4939 if( index >= size() )
4940 return basic_string::npos;
4941
4942 for( const_iterator it = raw_get(index), end = cend() ; it < end ; ++it )
4943 {
4944 const value_type* tmp = str;
4945 value_type cur = *it;
4946 do{
4947 if( cur == *tmp )
4948 return it.get_index();
4949 }while( *++tmp );
4950 }
4951
4952 return basic_string::npos;
4953 }
4954
4955 template<typename V, typename D, typename A>
4957 {
4959 size_type string_len = length();
4960 if( start_pos >= string_len ){
4961 it = crbegin();
4962 start_pos = string_len - 1;
4963 }
4964 else
4965 it = rget( start_pos );
4966
4967 for( const_reverse_iterator rend = crend() ; it != rend ; ++it, --start_pos ){
4968 const value_type* tmp = str;
4969 value_type cur = *it;
4970 do{
4971 if( cur == *tmp )
4972 return start_pos;
4973 }while( *++tmp );
4974 }
4975
4976 return basic_string::npos;
4977 }
4978
4979 template<typename V, typename D, typename A>
4981 {
4982 if( empty() )
4983 return basic_string::npos;
4984
4985 if( index >= size() )
4986 index = raw_back_index();
4987
4988 for( difference_type it = index ; it >= 0 ; it -= get_index_pre_bytes( it ) ){
4989 const value_type* tmp = str;
4990 value_type cur = raw_at(it);
4991 do{
4992 if( cur == *tmp )
4993 return it;
4994 }while( *++tmp );
4995 }
4996
4997 return basic_string::npos;
4998 }
4999
5000 template<typename V, typename D, typename A>
5002 {
5003 if( start_pos >= length() )
5004 return basic_string::npos;
5005
5006 for( const_iterator it = get(start_pos) , end = cend() ; it != end ; ++it, ++start_pos ){
5007 const value_type* tmp = str;
5008 value_type cur = *it;
5009 do{
5010 if( cur == *tmp )
5011 goto continue2;
5012 }while( *++tmp );
5013 return start_pos;
5014 continue2:;
5015 }
5016
5017 return basic_string::npos;
5018 }
5019
5020 template<typename V, typename D, typename A>
5022 {
5023 if( index >= size() )
5024 return basic_string::npos;
5025
5026 for( const_iterator it = raw_get(index), end = cend() ; it < end ; ++it )
5027 {
5028 const value_type* tmp = str;
5029 value_type cur = *it;
5030 do{
5031 if( cur == *tmp )
5032 goto continue2;
5033 }while( *++tmp );
5034 return it.get_index();
5035 continue2:;
5036 }
5037
5038 return basic_string::npos;
5039 }
5040
5041 template<typename V, typename D, typename A>
5043 {
5044 if( empty() )
5045 return basic_string::npos;
5046
5047 const_reverse_iterator end = rend(), it;
5048 size_type string_len = length();
5049 if( start_pos >= string_len ){
5050 it = crbegin();
5051 start_pos = string_len - 1;
5052 }
5053 else
5054 it = rget( start_pos );
5055
5056 for( ; it < end ; ++it, --start_pos ){
5057 const value_type* tmp = str;
5058 value_type cur = *it;
5059 do{
5060 if( cur == *tmp )
5061 goto continue2;
5062 }while( *++tmp );
5063 return start_pos;
5064 continue2:;
5065 }
5066
5067 return basic_string::npos;
5068 }
5069
5070 template<typename V, typename D, typename A>
5072 {
5073 if( empty() )
5074 return basic_string::npos;
5075
5076 if( index >= size() )
5077 index = raw_back_index();
5078
5079 for( difference_type it = index ; it >= 0 ; it -= get_index_pre_bytes( it ) )
5080 {
5081 const value_type* tmp = str;
5082 value_type cur = raw_at(it);
5083
5084 do{
5085 if( cur == *tmp )
5086 goto continue2;
5087 }while( *++tmp );
5088
5089 return it; // It will be either non-negative or -1, which equals basic_string::npos!
5090
5091 continue2:;
5092 }
5093
5094 return basic_string::npos;
5095 }
5096
5097} // Namespace 'tiny_utf8'
5098
5099#if defined (__clang__)
5100#pragma clang diagnostic pop
5101#elif defined (__GNUC__)
5102#pragma GCC diagnostic pop
5103#elif defined (_MSC_VER)
5104#pragma warning(pop)
5105#endif
5106
5107#endif // _TINY_UTF8_H_
Definition tinyutf8.h:703
raw_const_iterator raw_end() const noexcept
Definition tinyutf8.h:1646
size_type get_num_bytes(size_type byte_start, size_type cp_count) const noexcept
Definition tinyutf8.h:3531
void set_sso_data_len(unsigned char data_len=0) noexcept
Set the data length (also enables SSO)
Definition tinyutf8.h:1026
basic_string(std::basic_string< data_type, C, A > str, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1208
basic_string & erase(iterator first, iterator last) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2070
const_iterator cend() const noexcept
Definition tinyutf8.h:1684
basic_string & assign(const data_type *str, size_type len) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1922
basic_string(T &&str, const allocator_type &alloc=allocator_type(), enable_if_ptr< T, data_type > *={}) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1117
static constexpr T get_msb_mask(width_type bytes=sizeof(T)) noexcept
Definition tinyutf8.h:1041
bool operator!=(const basic_string &str) const noexcept
Definition tinyutf8.h:2569
basic_string(value_type cp, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1350
static size_type determine_main_buffer_size(size_type data_len, size_type lut_len, width_type lut_width) noexcept
Determine the needed buffer size if the lut width is known (excluding the trailling LUT indicator)
Definition tinyutf8.h:845
basic_string operator+(basic_string summand) const &noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1853
basic_string substr(iterator first, iterator last) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2107
basic_string(basic_string &&str) noexcept(TINY_UTF8_NOEXCEPT &&std::is_nothrow_move_constructible< Allocator >())
Definition tinyutf8.h:1375
reverse_iterator rbegin() noexcept
Definition tinyutf8.h:1654
const data_type * c_str() const noexcept
Definition tinyutf8.h:1592
static bool is_lut_active(const data_type *lut_base_ptr) noexcept
Check, if the lut is active using the lut base ptr.
Definition tinyutf8.h:797
static size_type determine_total_buffer_size(size_type main_buffer_size) noexcept
Same as above but this time including the LUT indicator.
Definition tinyutf8.h:854
size_type raw_find_last_of(const value_type *str, size_type start_byte=basic_string::npos) const noexcept
Definition tinyutf8.h:4980
reverse_iterator rget(size_type n) noexcept
Definition tinyutf8.h:1552
basic_string & replace(raw_iterator first, raw_iterator last, value_type repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1766
basic_string & replace(raw_iterator first, raw_iterator last, const basic_string &repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1783
const_iterator begin() const noexcept
Definition tinyutf8.h:1635
basic_string & replace(raw_iterator first, raw_iterator last, value_type repl, size_type n) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1763
const_reverse_iterator rbegin() const noexcept
Definition tinyutf8.h:1655
tiny_utf8::iterator< basic_string, true > raw_iterator
Definition tinyutf8.h:720
bool operator!=(const std::string &str) const noexcept
Definition tinyutf8.h:2571
size_type raw_find(value_type cp, size_type start_byte=0) const noexcept
Definition tinyutf8.h:2190
int compare(T str, enable_if_ptr< T, data_type > *={}) const noexcept
Definition tinyutf8.h:2493
size_type raw_find_first_of(const value_type *str, size_type start_byte=0) const noexcept
Definition tinyutf8.h:4937
size_type find_last_not_of(const value_type *str, size_type start_codepoint=basic_string::npos) const noexcept
Definition tinyutf8.h:5042
bool operator==(const basic_string &str) const noexcept
Equality Comparison Operators.
Definition tinyutf8.h:2568
basic_string & replace(iterator first, raw_iterator last, const basic_string &repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1787
basic_string substr(raw_iterator first, raw_iterator last) const noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2101
bool operator>=(const value_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2603
int compare(T str, enable_if_ptr< T, value_type > *={}) const noexcept
Definition tinyutf8.h:2536
basic_string & erase(iterator first, raw_iterator last) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2069
basic_string(InputIt first, InputIt last, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1269
basic_string(const data_type *str, size_type len, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1129
tiny_utf8::const_reverse_iterator< basic_string, false > const_reverse_iterator
Definition tinyutf8.h:719
@ npos
Definition tinyutf8.h:726
basic_string & assign(size_type count, value_type cp) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1880
size_type get_buffer_size() const noexcept
Get buffer size (excluding the trailing LUT indicator)
Definition tinyutf8.h:1051
tiny_utf8::reverse_iterator< basic_string, true > raw_reverse_iterator
Definition tinyutf8.h:722
typename std::enable_if<(L<=SSO::size), bool >::type enable_if_small_string
SFINAE helpers for constructors.
Definition tinyutf8.h:776
tiny_utf8::reverse_iterator< basic_string, false > reverse_iterator
Definition tinyutf8.h:718
void shrink_to_fit() noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:3297
data_type * allocate(size_type total_buffer_size) const noexcept
Allocates size_type-aligned storage (make sure, total_buffer_size is a multiple of sizeof(size_type)!...
Definition tinyutf8.h:1059
enable_if_ptr< T, value_type > operator!=(T &&str) const noexcept
Definition tinyutf8.h:2575
bool starts_with(const value_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2347
raw_const_reverse_iterator raw_crbegin() const noexcept
Definition tinyutf8.h:1695
raw_reference back() noexcept
Definition tinyutf8.h:1718
bool operator<=(const std::string &str) const noexcept
Definition tinyutf8.h:2589
static data_type * get_lut_base_ptr(data_type *buffer, size_type buffer_size) noexcept
Get the LUT base pointer from buffer and buffer size.
Definition tinyutf8.h:805
basic_string & replace(iterator first, raw_iterator last, value_type repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1773
bool operator>=(const basic_string &str) const noexcept
Definition tinyutf8.h:2583
bool starts_with(const data_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2325
raw_reference front() noexcept
Definition tinyutf8.h:1711
const_reverse_iterator rget(size_type n) const noexcept
Definition tinyutf8.h:1553
static width_type get_num_bytes_of_utf8_char_before(const data_type *data_start, size_type index) noexcept
Returns the number of bytes to expect before this one (including this one) that belong to this utf8 c...
Definition tinyutf8.h:3142
static constexpr size_type get_sso_capacity() noexcept
Static helper methods.
Definition tinyutf8.h:772
ValueType value_type
Definition tinyutf8.h:709
bool starts_with(const basic_string &str) const noexcept
Definition tinyutf8.h:2281
bool operator<=(const data_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2601
bool starts_with(T str, enable_if_ptr< T, data_type > *={}) const noexcept
Definition tinyutf8.h:2311
basic_string & replace(size_type index, size_type len, value_type repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1751
basic_string & replace(iterator first, iterator last, const basic_string &repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1788
basic_string & assign(std::initializer_list< value_type > ilist) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1973
basic_string & erase(size_type pos, size_type len=1) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2078
void clear() noexcept
Definition tinyutf8.h:1438
bool empty() const noexcept
Definition tinyutf8.h:1623
width_type get_index_bytes(size_type byte_index) const noexcept
Get the number of bytes of codepoint in basic_string.
Definition tinyutf8.h:2609
basic_string(const value_type(&str)[LITLEN], const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1330
static void set_lut(data_type *iter, width_type lut_width, size_type value) noexcept
Definition tinyutf8.h:867
bool requires_unicode() const noexcept
tinyutf8-specific features
Definition tinyutf8.h:2654
raw_const_reverse_iterator raw_rbegin() const noexcept
Definition tinyutf8.h:1657
size_type find_first_not_of(const value_type *str, size_type start_codepoint=0) const noexcept
Find absence of characters in string.
Definition tinyutf8.h:5001
bool operator==(const value_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2578
friend enable_if_ptr< T, value_type, basic_string > operator+(T &&lhs, basic_string rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1868
bool starts_with(T str, enable_if_ptr< T, value_type > *={}) const noexcept
Definition tinyutf8.h:2336
basic_string & insert(size_type pos, value_type cp) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1985
value_type at(size_type n, std::nothrow_t) const noexcept
Definition tinyutf8.h:1496
bool sso_inactive() const noexcept
Check, if sso is inactive (this operation doesn't require a negation and is faster)
Definition tinyutf8.h:1037
typename std::enable_if<(L > SSO::size), bool >::type enable_if_not_small_string
Definition tinyutf8.h:778
static void copy_lut_indicator(data_type *dest, const data_type *source) noexcept
Copy lut indicator.
Definition tinyutf8.h:813
friend enable_if_ptr< T, data_type, basic_string > operator+(T &&lhs, basic_string rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1867
bool starts_with(value_type cp) const noexcept
Definition tinyutf8.h:2301
basic_string & assign(basic_string &&str) noexcept(TINY_UTF8_NOEXCEPT &&std::is_nothrow_move_assignable< Allocator >())
Definition tinyutf8.h:1909
size_type length() const noexcept
Definition tinyutf8.h:1604
bool ends_with(const basic_string &str) const noexcept
Definition tinyutf8.h:2365
bool operator<(const data_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2600
bool operator<(const basic_string &str) const noexcept
Definition tinyutf8.h:2584
basic_string(size_type n, value_type cp, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
static width_type get_lut_width(size_type buffer_size) noexcept
Definition tinyutf8.h:819
allocator_type get_allocator() const noexcept(TINY_UTF8_NOEXCEPT &&std::is_nothrow_copy_constructible< Allocator >())
Definition tinyutf8.h:1451
bool ends_with(const data_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2406
basic_string(std::initializer_list< value_type > ilist, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1340
value_type operator[](size_type n) const noexcept
Definition tinyutf8.h:1573
bool ends_with(value_type cp) const noexcept
Definition tinyutf8.h:2385
friend basic_string operator+(basic_string lhs, const value_type(&rhs)[LITLEN]) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1862
friend basic_string operator+(const data_type(&lhs)[LITLEN], basic_string rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1869
friend basic_string operator+(const value_type(&lhs)[LITLEN], basic_string rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1870
reference operator[](size_type n) noexcept
Definition tinyutf8.h:1572
raw_const_iterator raw_get(size_type n) const noexcept
Definition tinyutf8.h:1543
std::uint_fast8_t width_type
Definition tinyutf8.h:715
size_type get_num_codepoints(size_type byte_start, size_type byte_count) const noexcept
Definition tinyutf8.h:3423
friend basic_string operator+(value_type lhs, basic_string rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1866
basic_string & replace(size_type index, size_type count, const basic_string &repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1797
typename std::enable_if< std::is_pointer< typename std::remove_reference< T >::type >::value && std::is_same< CharType, typename std::remove_cv< typename std::remove_pointer< typename std::remove_reference< T >::type >::type >::type >::value, _DataType >::type enable_if_ptr
Definition tinyutf8.h:782
basic_string & assign(const value_type *str, size_type len) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1944
basic_string raw_substr(size_type start_byte, size_type byte_count) const noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:3601
raw_iterator raw_begin() noexcept
Definition tinyutf8.h:1636
raw_codepoint_reference< basic_string, true > raw_checked_reference
Definition tinyutf8.h:713
static void set_lut_indiciator(data_type *lut_base_ptr, bool active, size_type lut_len=0) noexcept
Construct the lut mode indicator.
Definition tinyutf8.h:809
size_type find_first_of(const value_type *str, size_type start_codepoint=0) const noexcept
Find characters in string.
Definition tinyutf8.h:4918
friend basic_string operator+(basic_string lhs, const data_type(&rhs)[LITLEN]) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1861
size_type rfind(value_type cp, size_type start_codepoint=basic_string::npos) const noexcept
Definition tinyutf8.h:2240
void to_wide_literal(value_type *dest) const noexcept
Definition tinyutf8.h:2681
basic_string & operator=(basic_string &&str) noexcept(TINY_UTF8_NOEXCEPT &&std::is_nothrow_move_assignable< Allocator >())
Definition tinyutf8.h:1422
basic_string & assign(const basic_string &str) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1889
enable_if_ptr< T, data_type > operator>(T &&str) const noexcept
Definition tinyutf8.h:2590
void swap(basic_string &str) noexcept(TINY_UTF8_NOEXCEPT &&std::is_nothrow_move_assignable< Allocator >())
Definition tinyutf8.h:1466
basic_string & operator+=(value_type cp) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1843
int compare(const basic_string &str) const noexcept
Definition tinyutf8.h:2456
raw_const_reverse_iterator raw_crend() const noexcept
Definition tinyutf8.h:1703
basic_string & raw_insert(size_type pos, value_type cp) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2037
basic_string & raw_replace(size_type start_byte, size_type byte_count, const basic_string &repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:4364
int compare(const std::string &str) const noexcept
Definition tinyutf8.h:2473
basic_string & replace(raw_iterator first, iterator last, const basic_string &repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1786
DataType data_type
Definition tinyutf8.h:706
bool requires_unicode_sso() const noexcept
Check, whether the string contains codepoints > 127.
Definition tinyutf8.h:3384
raw_checked_reference raw_at(size_type byte_index) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1509
friend enable_if_ptr< T, value_type, basic_string > operator+(basic_string lhs, T &&rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1860
raw_reverse_iterator raw_rbegin() noexcept
Definition tinyutf8.h:1656
basic_string & assign(T &&str, enable_if_ptr< T, value_type > *={}) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1941
static void encode_utf8(value_type cp, data_type *dest, width_type cp_bytes) noexcept
Definition tinyutf8.h:963
codepoint_reference< basic_string, false > reference
Definition tinyutf8.h:710
size_type raw_rfind(value_type cp, size_type start_byte=basic_string::npos) const noexcept
Definition tinyutf8.h:4908
size_type find_last_of(const value_type *str, size_type start_codepoint=basic_string::npos) const noexcept
Definition tinyutf8.h:4956
friend enable_if_ptr< T, data_type, basic_string > operator+(basic_string lhs, T &&rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1859
raw_const_reverse_iterator raw_rend() const noexcept
Definition tinyutf8.h:1667
size_type get_sso_data_len() const noexcept
Get the data length (when SSO is active)
Definition tinyutf8.h:1031
basic_string & replace(size_type index, size_type len, value_type repl, size_type n) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1748
basic_string substr(iterator first, raw_iterator last) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2106
basic_string & erase(raw_iterator first, raw_iterator last) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2065
bool operator>=(const std::string &str) const noexcept
Definition tinyutf8.h:2587
enable_if_ptr< T, value_type > operator==(T &&str) const noexcept
Definition tinyutf8.h:2574
basic_string(data_type ch, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1363
basic_string & replace(raw_iterator first, iterator last, value_type repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1772
~basic_string() noexcept
Definition tinyutf8.h:1403
friend basic_string operator+(basic_string lhs, data_type rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1857
static value_type decode_utf8(const data_type *data, width_type num_bytes) noexcept
Decodes a given input of rle utf8 data to a unicode codepoint, given the number of bytes it's made of...
Definition tinyutf8.h:938
bool operator<(const value_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2604
checked_reference at(size_type n) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1499
tiny_utf8::iterator< basic_string, false > iterator
Definition tinyutf8.h:716
bool operator>=(const data_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2599
width_type get_index_pre_bytes(size_type byte_index) const noexcept
Get the number of bytes before a codepoint, that build up a new codepoint.
Definition tinyutf8.h:2618
basic_string & assign(const basic_string &str, size_type pos, size_type count) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1900
basic_string(std::basic_string< data_type, C, A > str, size_type len, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1221
basic_string(const data_type *str, size_type count, const allocator_type &alloc, tiny_utf8_detail::read_bytes_tag) noexcept(TINY_UTF8_NOEXCEPT)
basic_string & raw_insert(size_type pos, const basic_string &str) noexcept(TINY_UTF8_NOEXCEPT)
basic_string & append(const basic_string &appendix) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:3740
tiny_utf8::const_iterator< basic_string, true > raw_const_iterator
Definition tinyutf8.h:721
size_type find(const basic_string &pattern, size_type start_codepoint=0) const noexcept
Definition tinyutf8.h:2156
raw_reverse_iterator raw_rend() noexcept
Definition tinyutf8.h:1666
static width_type get_codepoint_bytes(value_type cp) noexcept
Definition tinyutf8.h:906
raw_const_iterator raw_begin() const noexcept
Definition tinyutf8.h:1637
const data_type * get_buffer() const noexcept
Get buffer.
Definition tinyutf8.h:1047
bool operator==(const data_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2576
value_type back() const noexcept
Definition tinyutf8.h:1719
raw_codepoint_reference< basic_string, false > raw_reference
Definition tinyutf8.h:712
size_type get_non_sso_capacity() const noexcept
Return a good guess of how many codepoints the currently allocated buffer can hold.
Definition tinyutf8.h:3362
data_type * data() noexcept
Definition tinyutf8.h:1594
bool ends_with(T str, enable_if_ptr< T, value_type > *={}) const noexcept
Definition tinyutf8.h:2417
const_iterator cbegin() const noexcept
Definition tinyutf8.h:1676
bool lut_active() const noexcept
Definition tinyutf8.h:2672
bool operator>(const std::string &str) const noexcept
Definition tinyutf8.h:2586
raw_const_reverse_iterator raw_rget(size_type n) const noexcept
Definition tinyutf8.h:1563
value_type operator()(size_type n) const noexcept
Definition tinyutf8.h:1583
size_type raw_find_last_not_of(const value_type *str, size_type start_byte=basic_string::npos) const noexcept
Definition tinyutf8.h:5071
static width_type get_codepoint_bytes(data_type first_byte, size_type data_left) noexcept
basic_string & replace(iterator first, iterator last, value_type repl) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1774
basic_string & erase(raw_iterator first, iterator last) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2068
size_type get_num_bytes_from_start(size_type cp_count) const noexcept
Definition tinyutf8.h:3488
basic_string(std::basic_string< data_type, C, A > str, size_type pos, size_type len, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1226
std::allocator_traits< Allocator >::size_type size_type
Definition tinyutf8.h:707
basic_string & raw_erase(size_type pos, size_type len) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:4779
iterator begin() noexcept
Definition tinyutf8.h:1634
basic_string & assign(T &&str, enable_if_ptr< T, data_type > *={}) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1919
basic_string(const basic_string &str) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1282
const_reverse_iterator crend() const noexcept
Definition tinyutf8.h:1702
basic_string & replace(size_type index, value_type repl, size_type n=1) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1736
NON_SSO t_non_sso
Definition tinyutf8.h:766
static size_type determine_main_buffer_size(size_type data_len, size_type lut_len, width_type *lut_width) noexcept
Determine the needed buffer size and the needed lut width (excluding the trailling LUT indicator)
Definition tinyutf8.h:838
basic_string operator+(const basic_string &summand) &&noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1854
basic_string(const data_type(&str)[LITLEN], const allocator_type &alloc=allocator_type(), enable_if_small_string< LITLEN >={}) noexcept(TINY_UTF8_NOEXCEPT &&std::is_nothrow_copy_constructible< Allocator >())
Definition tinyutf8.h:1156
bool operator<=(const value_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2605
static width_type encode_utf8(value_type cp, data_type *dest) noexcept
Definition tinyutf8.h:983
basic_string substr(size_type pos, size_type len=basic_string::npos) const noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2115
static size_type get_lut_len(const data_type *lut_base_ptr) noexcept
Get the LUT size (given the lut is active!)
Definition tinyutf8.h:877
raw_const_iterator raw_cbegin() const noexcept
Definition tinyutf8.h:1677
basic_string substr(raw_iterator first, iterator last) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2105
basic_string & assign(const value_type(&str)[LITLEN]) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1953
const data_type * data() const noexcept
Definition tinyutf8.h:1593
basic_string(const data_type *str, size_type pos, size_type len, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1142
size_type find(const data_type *pattern, size_type start_codepoint=0) const noexcept
Definition tinyutf8.h:2173
basic_string & assign(InputIt first, InputIt last) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1964
basic_string() noexcept(TINY_UTF8_NOEXCEPT &&std::is_nothrow_default_constructible< Allocator >())
Definition tinyutf8.h:1092
basic_string & pop_back() noexcept(TINY_UTF8_NOEXCEPT)
Removes the last codepoint in the basic_string.
Definition tinyutf8.h:2043
size_type find(value_type cp, size_type start_codepoint=0) const noexcept
Definition tinyutf8.h:2141
bool operator<(const std::string &str) const noexcept
Definition tinyutf8.h:2588
raw_reference raw_at(size_type byte_index, std::nothrow_t) noexcept
Definition tinyutf8.h:1510
friend basic_string operator+(data_type lhs, basic_string rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1865
static bool is_lut_worth(size_type pot_lut_len, size_type string_len, bool lut_present, bool biased=true) noexcept
Determine, whether or not a LUT is worth to set up. General case: worth below 25%....
Definition tinyutf8.h:831
bool ends_with(const std::string &str) const noexcept
Definition tinyutf8.h:2375
const value_type & const_reference
Definition tinyutf8.h:714
raw_iterator raw_end() noexcept
Definition tinyutf8.h:1645
size_type raw_find(const data_type *pattern, size_type start_byte=0) const noexcept
Definition tinyutf8.h:2222
raw_reference operator()(size_type n) noexcept
Definition tinyutf8.h:1582
int compare(const value_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2556
basic_string & insert(raw_iterator it, const basic_string &str) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2015
size_type get_non_sso_string_len() const noexcept
Get buffer size, if SSO is disabled.
Definition tinyutf8.h:1011
basic_string(const data_type(&str)[LITLEN], size_type pos, size_type len, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1196
static size_type get_lut(const data_type *iter, width_type lut_width) noexcept
Get the nth index within a multibyte index table.
Definition tinyutf8.h:859
void set_non_sso_string_len(size_type string_len) noexcept
Non-static helper methods.
Definition tinyutf8.h:992
value_type raw_at(size_type byte_index) const noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1511
bool operator>(const data_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2598
value_type front() const noexcept
Definition tinyutf8.h:1712
basic_string(const value_type *str, size_type len, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
enable_if_ptr< T, data_type > operator==(T &&str) const noexcept
Definition tinyutf8.h:2572
basic_string & assign(const data_type(&str)[LITLEN]) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1931
iterator end() noexcept
Definition tinyutf8.h:1643
iterator get(size_type n) noexcept
Definition tinyutf8.h:1532
basic_string & erase(raw_iterator pos) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2055
const_reverse_iterator rend() const noexcept
Definition tinyutf8.h:1665
enable_if_ptr< T, value_type > operator>(T &&str) const noexcept
Definition tinyutf8.h:2594
size_type raw_find(const basic_string &pattern, size_type start_byte=0) const noexcept
Definition tinyutf8.h:2206
static size_type round_up_to_align(size_type val) noexcept
Rounds the supplied value to a multiple of sizeof(size_type)
Definition tinyutf8.h:800
codepoint_reference< basic_string, true > checked_reference
Definition tinyutf8.h:711
size_type indicator_type
Definition tinyutf8.h:725
bool operator!=(const value_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2579
width_type get_codepoint_bytes(size_type codepoint_index) const noexcept
Definition tinyutf8.h:2612
SSO t_sso
Definition tinyutf8.h:765
size_type raw_back_index() const noexcept
Get the byte index of the last codepoint.
Definition tinyutf8.h:2628
raw_reverse_iterator raw_rget(size_type n) noexcept
Definition tinyutf8.h:1562
const_reverse_iterator crbegin() const noexcept
Definition tinyutf8.h:1694
width_type get_codepoint_pre_bytes(size_type codepoint_index) const noexcept
Definition tinyutf8.h:2622
bool sso_active() const noexcept
Definition tinyutf8.h:2664
reverse_iterator rend() noexcept
Definition tinyutf8.h:1664
basic_string(const basic_string &str, const allocator_type &alloc) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1302
size_type raw_find_first_not_of(const value_type *str, size_type start_byte=0) const noexcept
Definition tinyutf8.h:5021
int compare(const data_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2513
static size_type determine_main_buffer_size(size_type data_len) noexcept
Determine the needed buffer size if the lut is empty (excluding the trailling LUT indicator)
Definition tinyutf8.h:849
bool operator==(const std::string &str) const noexcept
Definition tinyutf8.h:2570
const_iterator get(size_type n) const noexcept
Definition tinyutf8.h:1533
static width_type decode_utf8_and_len(const data_type *data, value_type &dest, size_type data_left) noexcept
Definition tinyutf8.h:952
basic_string & operator+=(const basic_string &appendix) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1833
std::basic_string< data_type > cpp_str(bool prepend_bom=false) const noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2694
bool ends_with(T str, enable_if_ptr< T, data_type > *={}) const noexcept
Definition tinyutf8.h:2395
size_type capacity() const noexcept
Definition tinyutf8.h:1482
bool ends_with(const value_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2434
bool operator!=(const data_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2577
basic_string(const data_type(&str)[LITLEN], const allocator_type &alloc=allocator_type(), enable_if_not_small_string< LITLEN >={}) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1169
bool starts_with(const std::string &str) const noexcept
Definition tinyutf8.h:2291
void deallocate(data_type *buffer, size_type buffer_size) const noexcept
Allocates size_type-aligned storage (make sure, buffer_size is a multiple of sizeof(size_type)!...
Definition tinyutf8.h:1071
basic_string & insert(raw_iterator it, value_type cp) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2005
const_iterator end() const noexcept
Definition tinyutf8.h:1644
data_type * get_buffer() noexcept
Definition tinyutf8.h:1048
basic_string & replace(raw_iterator first, iterator last, value_type repl, size_type n) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1769
basic_string & prepend(const basic_string &prependix) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1824
basic_string(basic_string &&str, const allocator_type &alloc) noexcept(TINY_UTF8_NOEXCEPT &&std::is_nothrow_copy_constructible< Allocator >())
Definition tinyutf8.h:1389
basic_string & operator=(const basic_string &str) noexcept(TINY_UTF8_NOEXCEPT)
basic_string & insert(size_type pos, const basic_string &str) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1995
std::allocator_traits< Allocator >::difference_type difference_type
Definition tinyutf8.h:708
basic_string(const data_type(&str)[LITLEN], size_type len, const allocator_type &alloc=allocator_type()) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1182
bool operator>(const value_type(&str)[LITLEN]) const noexcept
Definition tinyutf8.h:2602
raw_iterator raw_get(size_type n) noexcept
Definition tinyutf8.h:1542
value_type raw_at(size_type byte_index, std::nothrow_t) const noexcept
Definition tinyutf8.h:1520
std::basic_string< data_type > cpp_str_bom() const noexcept
Returns an std::string with the UTF-8 BOM prepended.
Definition tinyutf8.h:3405
basic_string & replace(iterator first, iterator last, value_type repl, size_type n) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1771
reference at(size_type n, std::nothrow_t) noexcept
Definition tinyutf8.h:1500
size_type size() const noexcept
Definition tinyutf8.h:1614
Allocator allocator_type
Definition tinyutf8.h:724
tiny_utf8::const_iterator< basic_string, false > const_iterator
Definition tinyutf8.h:717
bool operator>(const basic_string &str) const noexcept
Lexicographical comparison Operators.
Definition tinyutf8.h:2582
basic_string & replace(iterator first, raw_iterator last, value_type repl, size_type n) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1770
friend basic_string operator+(basic_string lhs, value_type rhs) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1858
enable_if_ptr< T, data_type > operator!=(T &&str) const noexcept
Definition tinyutf8.h:2573
tiny_utf8::const_reverse_iterator< basic_string, true > raw_const_reverse_iterator
Definition tinyutf8.h:723
raw_const_iterator raw_cend() const noexcept
Definition tinyutf8.h:1685
bool operator<=(const basic_string &str) const noexcept
Definition tinyutf8.h:2585
static const data_type * get_lut_base_ptr(const data_type *buffer, size_type buffer_size) noexcept
Definition tinyutf8.h:806
basic_string(const data_type *str, size_type pos, size_type count, size_type data_left, const allocator_type &alloc, tiny_utf8_detail::read_codepoints_tag) noexcept(TINY_UTF8_NOEXCEPT)
Constructs an basic_string from a character literal.
basic_string(const allocator_type &alloc) noexcept(TINY_UTF8_NOEXCEPT &&std::is_nothrow_copy_constructible< Allocator >())
Definition tinyutf8.h:1102
value_type at(size_type n) const noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1493
basic_string & push_back(value_type cp) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:1842
Helper to detect little endian.
Definition tinyutf8.h:173
static constexpr bool value
Definition tinyutf8.h:177
STL namespace.
std::size_t strlen< char >(const char *str)
Definition tinyutf8.h:202
std::size_t strlen(const T *str)
strlen for different character types
Definition tinyutf8.h:201
Want global declarations?
Definition tinyutf8.h:113
iterator< Container, true >::difference_type operator-(const iterator< Container, false > &lhs, const iterator< Container, false > &rhs) noexcept
Definition tinyutf8.h:672
basic_string< char32_t, char > utf8_string
Definition tinyutf8.h:124
std::size_t operator()(const tiny_utf8::basic_string< V, D, A > &string) const noexcept
Definition tinyutf8.h:2705
Layout specifications.
Definition tinyutf8.h:738
size_type data_len
Definition tinyutf8.h:740
size_type buffer_size
Definition tinyutf8.h:741
size_type string_len
Definition tinyutf8.h:742
data_type * data
Definition tinyutf8.h:739
Definition tinyutf8.h:747
SSO() noexcept
Definition tinyutf8.h:756
SSO(data_type value) noexcept
Definition tinyutf8.h:752
data_type data[size]
Definition tinyutf8.h:749
unsigned char data_len
Definition tinyutf8.h:750
@ size
Definition tinyutf8.h:748
Definition tinyutf8.h:208
Container::size_type t_index
Definition tinyutf8.h:209
codepoint_reference(typename Container::size_type index, Container *instance) noexcept
Ctor.
Definition tinyutf8.h:215
codepoint_reference & operator=(const codepoint_reference &ref) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:236
codepoint_reference & operator*() const noexcept
Dereference operator to act as pointer type.
Definition tinyutf8.h:229
Container * t_instance
Definition tinyutf8.h:210
codepoint_reference & operator=(typename Container::value_type cp) noexcept(TINY_UTF8_NOEXCEPT)
Assignment operator.
Definition tinyutf8.h:232
Definition tinyutf8.h:495
const_iterator(typename iterator_base< Container, Raw >::difference_type index, const Container *instance) noexcept
Ctor.
Definition tinyutf8.h:497
const_iterator(const iterator< Container, Raw > &other) noexcept
Ctor from non const.
Definition tinyutf8.h:502
const_iterator(const iterator< Container, !Raw > &other) noexcept
Definition tinyutf8.h:505
const_iterator() noexcept=default
Default Functions.
Definition tinyutf8.h:596
const_reverse_iterator(const reverse_iterator< Container, Raw > &other) noexcept
Ctor from non const.
Definition tinyutf8.h:603
const_reverse_iterator(typename iterator_base< Container, Raw >::difference_type index, const Container *instance) noexcept
Ctor.
Definition tinyutf8.h:598
const_reverse_iterator(const const_iterator< Container, Raw > &other) noexcept
Ctor from normal iterator.
Definition tinyutf8.h:611
const_reverse_iterator(const const_iterator< Container, !Raw > &other) noexcept
Definition tinyutf8.h:614
const_reverse_iterator(const reverse_iterator< Container, !Raw > &other) noexcept
Definition tinyutf8.h:606
const_iterator< Container, Raw > base() const noexcept
Get the underlying iterator instance.
Definition tinyutf8.h:627
const_reverse_iterator() noexcept=default
Default Functions.
iterator_base(iterator_base< Container, false > other) noexcept
Constructor from non-raw iterator.
Definition tinyutf8.h:375
iterator_base(difference_type index, Container *instance) noexcept
Ctor.
Definition tinyutf8.h:361
bool operator!=(const iterator_base &it) const noexcept
Definition tinyutf8.h:358
void * pointer
Definition tinyutf8.h:354
iterator_base() noexcept=default
Default function.
raw_codepoint_reference< Container, false > reference
Definition tinyutf8.h:353
Container::value_type value_type
Definition tinyutf8.h:351
difference_type t_index
Definition tinyutf8.h:397
void advance(difference_type n) noexcept
Advance the iterator n times (negative values allowed!)
Definition tinyutf8.h:403
value_type get_value() const noexcept
Get the value that the iterator points to.
Definition tinyutf8.h:393
void increment() noexcept
Move the iterator one codepoint ahead.
Definition tinyutf8.h:414
difference_type get_index() const noexcept
Definition tinyutf8.h:384
void decrement() noexcept
Move the iterator one codepoint backwards.
Definition tinyutf8.h:417
bool operator==(const iterator_base &it) const noexcept
Definition tinyutf8.h:357
difference_type get_raw_index() const noexcept
Get the index of the codepoint the iterator points to.
Definition tinyutf8.h:387
std::bidirectional_iterator_tag iterator_category
Definition tinyutf8.h:355
Container::difference_type difference_type
Definition tinyutf8.h:352
reference get_reference() const noexcept
Get a reference to the codepoint the iterator points to.
Definition tinyutf8.h:390
Definition tinyutf8.h:284
iterator_base(difference_type index, Container *instance) noexcept
Ctor.
Definition tinyutf8.h:300
difference_type get_raw_index() const noexcept
Get the index of the codepoint the iterator points to.
Definition tinyutf8.h:317
difference_type get_index() const noexcept
Definition tinyutf8.h:314
difference_type t_index
Definition tinyutf8.h:327
Container::difference_type difference_type
Definition tinyutf8.h:291
void increment() noexcept
Move the iterator one codepoint ahead.
Definition tinyutf8.h:336
Container * t_instance
Definition tinyutf8.h:328
std::random_access_iterator_tag iterator_category
Definition tinyutf8.h:294
iterator_base() noexcept=default
Default function.
Container * get_instance() const noexcept
Getter for the instance.
Definition tinyutf8.h:311
void * pointer
Definition tinyutf8.h:293
codepoint_reference< Container, false > reference
Definition tinyutf8.h:292
Container::value_type value_type
Definition tinyutf8.h:290
bool operator==(const iterator_base &it) const noexcept
Definition tinyutf8.h:296
bool operator!=(const iterator_base &it) const noexcept
Definition tinyutf8.h:297
value_type get_value() const noexcept
Get the value that the iterator points to.
Definition tinyutf8.h:323
reference get_reference() const noexcept
Get a reference to the codepoint the iterator points to.
Definition tinyutf8.h:320
void advance(difference_type n) noexcept
Advance the iterator n times (negative values allowed!)
Definition tinyutf8.h:333
void decrement() noexcept
Move the iterator one codepoint backwards.
Definition tinyutf8.h:339
Definition tinyutf8.h:427
iterator operator+(typename iterator_base< Container, Raw >::difference_type n) const noexcept
Increase the Iterator n times.
Definition tinyutf8.h:468
iterator() noexcept=default
Default Functions.
iterator::reference operator*() const noexcept
Returns the value of the codepoint behind the iterator.
Definition tinyutf8.h:490
iterator & operator--() noexcept
Decrease the iterator by one.
Definition tinyutf8.h:457
iterator & operator-=(typename iterator_base< Container, Raw >::difference_type n) noexcept
Definition tinyutf8.h:484
iterator operator--(int) noexcept
Definition tinyutf8.h:461
iterator & operator+=(typename iterator_base< Container, Raw >::difference_type n) noexcept
Definition tinyutf8.h:473
iterator operator-(typename iterator_base< Container, Raw >::difference_type n) const noexcept
Decrease the Iterator n times.
Definition tinyutf8.h:479
iterator(const iterator< Container, !Raw > &other) noexcept
Definition tinyutf8.h:432
iterator(typename iterator_base< Container, Raw >::difference_type index, Container *instance) noexcept
Ctor.
Definition tinyutf8.h:429
iterator operator++(int) noexcept
Definition tinyutf8.h:450
Definition tinyutf8.h:241
raw_codepoint_reference(const codepoint_reference< Container, RC > &reference) noexcept
Definition tinyutf8.h:253
raw_codepoint_reference & operator=(typename Container::value_type cp) noexcept(TINY_UTF8_NOEXCEPT)
Assignment operator.
Definition tinyutf8.h:274
Container::size_type t_index
Definition tinyutf8.h:242
raw_codepoint_reference & operator*() const noexcept
Dereference operator to act as pointer type.
Definition tinyutf8.h:267
Container * t_instance
Definition tinyutf8.h:243
raw_codepoint_reference(typename Container::size_type raw_index, Container *instance) noexcept
Ctors.
Definition tinyutf8.h:248
raw_codepoint_reference & operator=(const raw_codepoint_reference &ref) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:278
Definition tinyutf8.h:520
reverse_iterator & operator+=(typename iterator_base< Container, Raw >::difference_type n) noexcept
Definition tinyutf8.h:571
reverse_iterator operator++(int) noexcept
Definition tinyutf8.h:548
reverse_iterator operator--(int) noexcept
Definition tinyutf8.h:559
reverse_iterator(const iterator< Container, !Raw > &other) noexcept
Definition tinyutf8.h:530
reverse_iterator & operator-=(typename iterator_base< Container, Raw >::difference_type n) noexcept
Definition tinyutf8.h:582
reverse_iterator(typename iterator_base< Container, Raw >::difference_type index, Container *instance) noexcept
Ctor.
Definition tinyutf8.h:522
reverse_iterator(const iterator< Container, Raw > &other) noexcept
Ctor from normal iterator.
Definition tinyutf8.h:527
reverse_iterator() noexcept=default
Default Functions.
reverse_iterator & operator--() noexcept
Decrease the Iterator by one.
Definition tinyutf8.h:555
reverse_iterator operator+(typename iterator_base< Container, Raw >::difference_type n) const noexcept
Increase the Iterator n times.
Definition tinyutf8.h:566
iterator< Container, Raw > base() const noexcept
Get the underlying iterator instance.
Definition tinyutf8.h:591
iterator< Container, Raw >::reference operator*() const noexcept
Returns the value of the codepoint behind the iterator.
Definition tinyutf8.h:588
reverse_iterator operator-(typename iterator_base< Container, Raw >::difference_type n) const noexcept
Decrease the Iterator n times.
Definition tinyutf8.h:577
#define TINY_UTF8_FALLTHROUGH
Determine the way to inform about fallthrough behavior.
Definition tinyutf8.h:73
#define TINY_UTF8_CPP17(...)
Remove Warnings, since it is wrong for all cases in this file.
Definition tinyutf8.h:94
std::istream & operator>>(std::istream &stream, tiny_utf8::basic_string< V, D, A > &str) noexcept(TINY_UTF8_NOEXCEPT)
Definition tinyutf8.h:2724
#define TINY_UTF8_NOEXCEPT
Determine noexcept specifications.
Definition tinyutf8.h:102
std::ostream & operator<<(std::ostream &stream, const tiny_utf8::basic_string< V, D, A > &str) noexcept(TINY_UTF8_NOEXCEPT)
Stream Operations.
Definition tinyutf8.h:2720
#define TINY_UTF8_THROW(...)
Determine the mode of error handling.
Definition tinyutf8.h:53
Helper to modify the last (address-wise) byte of a little endian value of type 'T'.
Definition tinyutf8.h:183
struct tiny_utf8::tiny_utf8_detail::last_byte::@0 bytes
T number
Definition tinyutf8.h:184
char last
Definition tinyutf8.h:187
char dummy[sizeof(T) -1]
Definition tinyutf8.h:186