cprover
unicode.cpp File Reference
#include "unicode.h"
#include <cstring>
#include <locale>
#include <iomanip>
#include <sstream>
#include <cstdint>
Include dependency graph for unicode.cpp:

Go to the source code of this file.

Macros

#define BUFSIZE   100
 

Functions

bool is_little_endian_arch ()
 Determine endianness of the architecture. More...
 
std::string narrow (const wchar_t *s)
 
std::wstring widen (const char *s)
 
std::string narrow (const std::wstring &s)
 
std::wstring widen (const std::string &s)
 
static void utf8_append_code (unsigned int c, std::string &result)
 Appends a unicode character to a utf8-encoded string. More...
 
std::string utf32_to_utf8 (const std::basic_string< unsigned int > &s)
 
std::vector< std::string > narrow_argv (int argc, const wchar_t **argv_wide)
 
uint16_t do_swap_bytes (uint16_t x)
 A helper function for dealing with different UTF16 endians. More...
 
void utf16_append_code (unsigned int code, bool swap_bytes, std::wstring &result)
 
std::wstring utf8_to_utf16 (const std::string &in, bool swap_bytes)
 
std::wstring utf8_to_utf16_big_endian (const std::string &in)
 
std::wstring utf8_to_utf16_little_endian (const std::string &in)
 
static void utf16_little_endian_to_java (const wchar_t ch, std::ostringstream &result, const std::locale &loc)
 
std::string utf16_little_endian_to_java (const wchar_t ch)
 
std::string utf16_little_endian_to_java (const std::wstring &in)
 

Macro Definition Documentation

◆ BUFSIZE

#define BUFSIZE   100

Definition at line 29 of file unicode.cpp.

Function Documentation

◆ do_swap_bytes()

uint16_t do_swap_bytes ( uint16_t  x)

A helper function for dealing with different UTF16 endians.

parameters: A 16-bit integer
Returns
A 16-bit integer with bytes swapped

Definition at line 171 of file unicode.cpp.

Referenced by utf16_append_code().

◆ is_little_endian_arch()

bool is_little_endian_arch ( )

Determine endianness of the architecture.

Returns
True if the architecture is little_endian

Definition at line 23 of file unicode.cpp.

Referenced by utf8_to_utf16_big_endian(), and utf8_to_utf16_little_endian().

◆ narrow() [1/2]

std::string narrow ( const wchar_t *  s)

Definition at line 31 of file unicode.cpp.

References r.

Referenced by main(), narrow_argv(), ms_cl_cmdlinet::parse_env(), and ms_cl_cmdlinet::process_response_file().

◆ narrow() [2/2]

std::string narrow ( const std::wstring &  s)

Definition at line 81 of file unicode.cpp.

References r.

◆ narrow_argv()

std::vector<std::string> narrow_argv ( int  argc,
const wchar_t **  argv_wide 
)

Definition at line 155 of file unicode.cpp.

References narrow().

Referenced by main().

◆ utf16_append_code()

void utf16_append_code ( unsigned int  code,
bool  swap_bytes,
std::wstring &  result 
)

Definition at line 179 of file unicode.cpp.

References do_swap_bytes().

Referenced by utf8_to_utf16().

◆ utf16_little_endian_to_java() [1/3]

static void utf16_little_endian_to_java ( const wchar_t  ch,
std::ostringstream &  result,
const std::locale &  loc 
)
static
Parameters
chUTF-16LE character
resultstream to receive string in US-ASCII format, with \uxxxx escapes for other characters
loclocale to check for printable characters

Definition at line 291 of file unicode.cpp.

References loc.

Referenced by expr2javat::convert_constant(), utf16_constant_array_to_java(), and utf16_little_endian_to_java().

◆ utf16_little_endian_to_java() [2/3]

std::string utf16_little_endian_to_java ( const wchar_t  ch)
Parameters
chUTF-16LE character
Returns
String in US-ASCII format, with \uxxxx escapes for other characters

Definition at line 330 of file unicode.cpp.

References loc, and utf16_little_endian_to_java().

◆ utf16_little_endian_to_java() [3/3]

std::string utf16_little_endian_to_java ( const std::wstring &  in)
Parameters
inString in UTF-16LE format
Returns
String in US-ASCII format, with \uxxxx escapes for other characters

Definition at line 340 of file unicode.cpp.

References loc, and utf16_little_endian_to_java().

◆ utf32_to_utf8()

std::string utf32_to_utf8 ( const std::basic_string< unsigned int > &  s)
Parameters
utf32:encodedwide string
Returns
utf8-encoded string with the same unicode characters as the input.

Definition at line 143 of file unicode.cpp.

References utf8_append_code().

Referenced by append_universal_char(), convert_one_string_literal(), and make_identifier().

◆ utf8_append_code()

static void utf8_append_code ( unsigned int  c,
std::string &  result 
)
static

Appends a unicode character to a utf8-encoded string.

parameters: character to append, string to append to

Definition at line 117 of file unicode.cpp.

Referenced by utf32_to_utf8().

◆ utf8_to_utf16()

std::wstring utf8_to_utf16 ( const std::string &  in,
bool  swap_bytes 
)
parameters: String in UTF-8 format, bool value indicating whether the
endianness should be different from the architecture one.
Returns
String in UTF-16 format. The encoding follows the endianness of the architecture iff swap_bytes is true.

Definition at line 212 of file unicode.cpp.

References size_type(), and utf16_append_code().

Referenced by utf8_to_utf16_big_endian(), and utf8_to_utf16_little_endian().

◆ utf8_to_utf16_big_endian()

std::wstring utf8_to_utf16_big_endian ( const std::string &  in)
parameters: String in UTF-8 format
Returns
String in UTF-16BE format

Definition at line 273 of file unicode.cpp.

References is_little_endian_arch(), and utf8_to_utf16().

◆ utf8_to_utf16_little_endian()

std::wstring utf8_to_utf16_little_endian ( const std::string &  in)
parameters: String in UTF-8 format
Returns
String in UTF-16LE format

Definition at line 281 of file unicode.cpp.

References is_little_endian_arch(), and utf8_to_utf16().

Referenced by string_constraint_generatort::add_axioms_for_constant(), delete_directory(), and get_or_create_string_literal_symbol().

◆ widen() [1/2]

◆ widen() [2/2]

std::wstring widen ( const std::string &  s)

Definition at line 98 of file unicode.cpp.

References r.