libzypp 17.37.17
Utf8.h
Go to the documentation of this file.
1/*---------------------------------------------------------------------\
2| ____ _ __ __ ___ |
3| |__ / \ / / . \ . \ |
4| / / \ V /| _/ _/ |
5| / /__ | | | | | | |
6| /_____||_| |_| |_| |
7| |
8----------------------------------------------------------------------/
9*
10* This file contains private API, this might break at any time between releases.
11* Strictly for internal use!
12*/
13
14#ifndef ZYPP_TUI_OUTPUT_UTF8_H_
15#define ZYPP_TUI_OUTPUT_UTF8_H_
16
17#include <cstdlib>
18#include <cstring>
19#include <wchar.h>
20
21#include <iostream>
22#include <string>
23#include <utility>
24
25namespace ztui {
26
28namespace utf8
29{
31 class string
32 {
33 public:
34 using size_type = std::string::size_type;
35 static const size_type npos = std::string::npos;
36
37 public:
38 string() {}
39 string( const char * rhs ) : _str( rhs ) {}
40 string( std::string rhs ) : _str(std::move( rhs )) {}
41
42 public:
43 const char * c_str() const { return _str.c_str(); }
44 const std::string & str() const { return _str; }
45 std::string & str() { return _str; }
46
47 public:
50 {
51 // test for locales using dual width fonts:
52 static bool isCJK = []()->bool {
53 const char * lang = ::getenv( "LANG" );
54 return ( lang && ( !strncmp( lang, "zh", 2 )
55 || !strncmp( lang, "ko", 2 )
56 || !strncmp( lang, "ja", 2 ) ) );
57 }();
58
59 if ( isCJK )
60 {
61 // this should actually be correct for ALL locales:
62 size_type len = 0;
63 const char *s = _str.c_str();
64 for ( size_type slen = _str.size(); slen > 0; )
65 {
66 if ( *s == '\033' && *(s+1) == '[' ) // skip ansi SGR
67 {
68 slen -= 2; s += 2;
69 while ( slen > 0 && *s != 'm' )
70 { --slen; ++s; }
71 if ( slen > 0 )
72 { --slen; ++s; }
73 continue;
74 }
75
76 wchar_t wc = 0;
77 size_t bytes = mbrtowc( &wc, s, slen, NULL );
78 if ( bytes <= 0 )
79 break;
80 len += wcwidth( wc );
81 slen -= bytes;
82 s += bytes;
83 }
84 return len;
85 }
86
87 // NON CJK: faster and hopefully accurate enough:
88 // simply do not count continuation bytes '10xxxxxx'
89 size_type ret = _str.size();
90 size_type ansi = 0;
91 for ( auto ch : _str )
92 {
93 if ( ansi )
94 {
95 if ( ansi == 1 && ch == '[' )
96 {
97 ansi = 2;
98 continue;
99 }
100 else if ( ansi >= 2 ) // not testing for in [0-9;m]
101 {
102 ++ansi;
103 if ( ch == 'm' ) // SGR end
104 { ret -= ansi; ansi = 0; }
105 continue;
106 }
107 }
108
109 if ( isContinuationByte( ch ) )
110 --ret;
111 else if ( ch == '\033' )
112 ansi = 1;
113 }
114 return ret;
115 }
116
119 { return size(); }
120
122 string substr( size_type pos_r = 0, size_type len_r = npos ) const
123 {
124 size_type p = upos( pos_r );
125 size_type l = upos( len_r, p );
126 return string( _str.substr( p, ( l == npos ? npos : l-p ) ) );
127 }
128
129 private:
131 bool isContinuationByte( char ch ) const
132 { return( (ch & 0xC0) == 0x80 ); }
133
135 size_type upos( size_type pos_r, size_type start_r = 0 ) const
136 {
137 if ( pos_r == npos || start_r > _str.size() )
138 return npos;
139
140 size_type upos = start_r;
141 for ( const char * chp = _str.c_str() + upos; *chp; ++chp, ++upos )
142 {
143 if ( ! isContinuationByte( *chp ) )
144 {
145 if ( pos_r )
146 --pos_r;
147 else
148 return upos;
149
150 while ( *chp == '\033' && *(chp+1) == '[' ) // skip any ansi SGR
151 {
152 chp += 2;
153 upos += 2;
154 while ( *chp && *chp != 'm' )
155 { ++chp; ++upos; }
156 if ( *chp )
157 { ++chp; ++upos; }
158 else
159 break; // incomplete ansi SGR
160 }
161 if ( ! *chp )
162 break; // incomplete ansi SGR
163 }
164 }
165 return( pos_r ? npos : upos );
166 }
167
168 private:
169 std::string _str;
170 };
171
173 inline string operator+( const string & lhs, const string & rhs )
174 { return string( lhs.str() + rhs.str() ); }
175
176 inline string operator+( const string & lhs, const std::string & rhs )
177 { return string( lhs.str() + rhs ); }
178
179 inline string operator+( const std::string & lhs, const string & rhs )
180 { return string( lhs + rhs.str() ); }
181
182 inline string operator+( const string & lhs, const char * rhs )
183 { return string( lhs.str() + rhs ); }
184
185 inline string operator+( const char * lhs, const string & rhs )
186 { return string( lhs + rhs.str() ); }
187
189 inline std::ostream & operator<<( std::ostream & str, const string & obj )
190 { return str << obj.str(); }
191
192} // namespace utf8
193
194
195}
196
197#endif // ZYPP_TUI_OUTPUT_UTF8_H_
Simple utf8 string.
Definition Utf8.h:32
std::ostream & operator<<(std::ostream &str, const string &obj)
Stream output.
Definition Utf8.h:189
size_type upos(size_type pos_r, size_type start_r=0) const
Return start of codepoint pos_r starting at position start_r.
Definition Utf8.h:135
std::string & str()
Definition Utf8.h:45
bool isContinuationByte(char ch) const
Test for continuation byte '10xxxxxx'.
Definition Utf8.h:131
const char * c_str() const
Definition Utf8.h:43
static const size_type npos
Definition Utf8.h:35
string(const char *rhs)
Definition Utf8.h:39
string substr(size_type pos_r=0, size_type len_r=npos) const
utf8 substring
Definition Utf8.h:122
size_type length() const
Definition Utf8.h:118
const std::string & str() const
Definition Utf8.h:44
size_type size() const
utf8 size
Definition Utf8.h:49
string(std::string rhs)
Definition Utf8.h:40
std::string _str
Definition Utf8.h:169
std::string::size_type size_type
Definition Utf8.h:34
string operator+(const string &lhs, const string &rhs)
concatenation
Definition Utf8.h:173
Definition Arch.h:364
String related utilities and Regular expression matching.
string operator+(const string &lhs, const std::string &rhs)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition Utf8.h:176