libzypp/Utf8_8h_source.html

/*---------------------------------------------------------------------\

|                          ____ _   __ __ ___                          |

|                         |__  / \ / / . \ . \                         |

|                           / / \ V /|  _/  _/                         |

|                          / /__ | | | | | |                           |

|                         /_____||_| |_| |_|                           |

|                                                                      |

----------------------------------------------------------------------/

*

* This file contains private API, this might break at any time between releases.

* Strictly for internal use!

*/


#ifndef ZYPP_TUI_OUTPUT_UTF8_H_

#define ZYPP_TUI_OUTPUT_UTF8_H_


#include <cstdlib>

#include <cstring>

#include <wchar.h>


#include <iostream>

#include <string>

#include <utility>


namespace ztui {


namespace utf8

{


  class string

  {

  public:

    using size_type = std::string::size_type;

    static const size_type npos = std::string::npos;


  public:

    string() {}

    string( const char * rhs )            : _str( rhs ) {}

    string( std::string  rhs )    : _str(std::move( rhs )) {}


  public:

    const char * c_str() const                     { return _str.c_str(); }

    const std::string & str() const          { return _str; }

    std::string & str()                              { return _str; }


  public:


    size_type size() const

    {

      // test for locales using dual width fonts:

      static bool isCJK = []()->bool {

        const char * lang = ::getenv( "LANG" );

        return ( lang && ( !strncmp( lang, "zh", 2 )

                        || !strncmp( lang, "ko", 2 )

                        || !strncmp( lang, "ja", 2 ) ) );

      }();


      if ( isCJK )

      {

        // this should actually be correct for ALL locales:

        size_type len = 0;

        const char *s = _str.c_str();

        for ( size_type slen = _str.size(); slen > 0; )

        {

          if ( *s == '\033' && *(s+1) == '[' )  // skip ansi SGR

          {

            slen -= 2; s += 2;

            while ( slen > 0 && *s != 'm' )

            { --slen; ++s; }

            if ( slen > 0 )

            { --slen; ++s; }

            continue;

          }


          wchar_t wc = 0;

          size_t bytes = mbrtowc( &wc, s, slen, NULL );

          if ( bytes <= 0 )

            break;

          len += wcwidth( wc );

          slen -= bytes;

          s += bytes;

        }

        return len;

      }


      // NON CJK: faster and hopefully accurate enough:

      // simply do not count continuation bytes '10xxxxxx'

      size_type ret = _str.size();

      size_type ansi = 0;

      for ( auto ch : _str )

      {

        if ( ansi )

        {

          if ( ansi == 1 && ch == '[' )

          {

            ansi = 2;

            continue;

          }

          else if ( ansi >= 2 ) // not testing for in [0-9;m]

          {

            ++ansi;

            if ( ch == 'm' ) // SGR end

            { ret -= ansi; ansi = 0; }

            continue;

          }

        }


        if ( isContinuationByte( ch ) )

          --ret;

        else if ( ch == '\033' )

          ansi = 1;

      }

      return ret;

    }


    size_type length() const

    { return size(); }


    string substr( size_type pos_r = 0, size_type len_r = npos ) const

    {

      size_type p = upos( pos_r );

      size_type l = upos( len_r, p );

      return string( _str.substr( p, ( l == npos ? npos : l-p ) ) );

    }


  private:


    bool isContinuationByte( char ch ) const

    { return( (ch & 0xC0) == 0x80 ); }


    size_type upos( size_type pos_r, size_type start_r = 0 ) const

    {

      if ( pos_r == npos || start_r > _str.size() )

        return npos;


      size_type upos = start_r;

      for ( const char * chp = _str.c_str() + upos; *chp; ++chp, ++upos )

      {

        if ( ! isContinuationByte( *chp ) )

        {

           if ( pos_r )

             --pos_r;

           else

             return upos;


           while ( *chp == '\033' && *(chp+1) == '[' )  // skip any ansi SGR

           {

             chp += 2;

             upos += 2;

             while ( *chp && *chp != 'm' )

             { ++chp; ++upos; }

             if ( *chp )

             { ++chp; ++upos; }

             else

               break;   // incomplete ansi SGR

           }

           if ( ! *chp )

             break;     // incomplete ansi SGR

        }

      }

      return( pos_r ? npos : upos );

    }


  private:

    std::string _str;

  };


  inline string operator+( const string & lhs, const string & rhs )

  { return string( lhs.str() + rhs.str() ); }


  inline string operator+( const string & lhs, const std::string & rhs )

  { return string( lhs.str() + rhs ); }


  inline string operator+( const std::string & lhs, const string & rhs )

  { return string( lhs + rhs.str() ); }


  inline string operator+( const string & lhs, const char * rhs )

  { return string( lhs.str() + rhs ); }


  inline string operator+( const char * lhs, const string & rhs )

  { return string( lhs + rhs.str() ); }


  inline std::ostream & operator<<( std::ostream & str, const string & obj )

  { return str << obj.str(); }


} // namespace utf8


}


#endif // ZYPP_TUI_OUTPUT_UTF8_H_

ztui::utf8::string
Simple utf8 string.
Definition Utf8.h:32

ztui::utf8::string::operator<<
std::ostream & operator<<(std::ostream &str, const string &obj)
Stream output.
Definition Utf8.h:189

ztui::utf8::string::upos
size_type upos(size_type pos_r, size_type start_r=0) const
Return start of codepoint pos_r starting at position start_r.
Definition Utf8.h:135

ztui::utf8::string::str
std::string & str()
Definition Utf8.h:45

ztui::utf8::string::isContinuationByte
bool isContinuationByte(char ch) const
Test for continuation byte '10xxxxxx'.
Definition Utf8.h:131

ztui::utf8::string::c_str
const char * c_str() const
Definition Utf8.h:43

ztui::utf8::string::npos
static const size_type npos
Definition Utf8.h:35

ztui::utf8::string::string
string(const char *rhs)
Definition Utf8.h:39

ztui::utf8::string::substr
string substr(size_type pos_r=0, size_type len_r=npos) const
utf8 substring
Definition Utf8.h:122

ztui::utf8::string::length
size_type length() const
Definition Utf8.h:118

ztui::utf8::string::str
const std::string & str() const
Definition Utf8.h:44

ztui::utf8::string::size
size_type size() const
utf8 size
Definition Utf8.h:49

ztui::utf8::string::string
string(std::string rhs)
Definition Utf8.h:40

ztui::utf8::string::_str
std::string _str
Definition Utf8.h:169

ztui::utf8::string::size_type
std::string::size_type size_type
Definition Utf8.h:34

ztui::utf8::string::string
string()
Definition Utf8.h:38

ztui::utf8::string::operator+
string operator+(const string &lhs, const string &rhs)
concatenation
Definition Utf8.h:173

std
Definition ansi.h:855

str
String related utilities and Regular expression matching.

ztui::ansi
Definition ansi.h:46

ztui::utf8
Definition Utf8.h:29

ztui::utf8::operator+
string operator+(const string &lhs, const std::string &rhs)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition Utf8.h:176

ztui
Definition application.cc:13