Skip to content

Commit

Permalink
tests: use uint16_t for UTF-16 code points in charset test
Browse files Browse the repository at this point in the history
Charset-related APIs are using 16-bit uint16_t when referring to UTF-16,
and are not obligated to have any particular byte layout in memory. Those
are different for little- and big-endian machines, which caused test to
fail when compiling on mips and s390x.
  • Loading branch information
i-rinat committed Dec 25, 2018
1 parent 4343e4e commit 58596f4
Showing 1 changed file with 18 additions and 22 deletions.
40 changes: 18 additions & 22 deletions tests/test_ppb_char_set.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ TEST(ppb_char_set, extract_relevant_part_from_locale_name)
TEST(ppb_char_set, to_utf16_all_ASCII)
{
const char *in = "Hello, world!";
const uint8_t out[] = {'H', 0, 'e', 0, 'l', 0, 'l', 0, 'o', 0, ',', 0, ' ', 0, 'w', 0,
'o', 0, 'r', 0, 'l', 0, 'd', 0, '!', 0};
const uint16_t out[] = {'H', 'e', 'l', 'l', 'o', ',', ' ',
'w', 'o', 'r', 'l', 'd', '!'};
uint32_t res_len = 7777;
uint16_t *res = ppb_char_set_char_set_to_utf16(0, in, strlen(in), "UTF-8",
PP_CHARSET_CONVERSIONERROR_FAIL, &res_len);
Expand All @@ -56,9 +56,8 @@ TEST(ppb_char_set, to_utf16_all_ASCII)
TEST(ppb_char_set, to_utf16_basic_UTF_8)
{
const char *in = "Привет, мир!";
const uint8_t out[] = {0x1f, 0x04, 0x40, 0x04, 0x38, 0x04, 0x32, 0x04, 0x35, 0x04,
0x42, 0x04, 0x2c, 0x00, 0x20, 0x00, 0x3c, 0x04, 0x38, 0x04,
0x40, 0x04, 0x21, 0x00};
const uint16_t out[] = {0x41f, 0x440, 0x438, 0x432, 0x435, 0x442,
0x2c, 0x20, 0x43c, 0x438, 0x440, 0x21};
uint32_t res_len = 7777;
uint16_t *res = ppb_char_set_char_set_to_utf16(0, in, strlen(in), "UTF-8",
PP_CHARSET_CONVERSIONERROR_FAIL, &res_len);
Expand All @@ -83,8 +82,7 @@ TEST(ppb_char_set, to_utf16_wrong_UTF_8_with_error)

TEST(ppb_char_set, from_utf16_all_ASCII)
{
const uint8_t in[] = {'H', 0, 'e', 0, 'l', 0, 'l', 0, 'o', 0, ',', 0, ' ', 0, 'w', 0,
'o', 0, 'r', 0, 'l', 0, 'd', 0, '!', 0};
const uint16_t in[] = {'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!'};
const char *out = "Hello, world!";
uint32_t res_len = 7777;
char *res = ppb_char_set_utf16_to_char_set(0, (const uint16_t *)in,
Expand All @@ -97,9 +95,8 @@ TEST(ppb_char_set, from_utf16_all_ASCII)

TEST(ppb_char_set, to_utf16_non_ASCII_all_correct)
{
const uint8_t in[] = {0x1f, 0x04, 0x40, 0x04, 0x38, 0x04, 0x32, 0x04, 0x35, 0x04,
0x42, 0x04, 0x2c, 0x00, 0x20, 0x00, 0x3c, 0x04, 0x38, 0x04,
0x40, 0x04, 0x21, 0x00}; // "Привет, мир!"
const uint16_t in[] = {0x41f, 0x440, 0x438, 0x432, 0x435, 0x442, 0x2c,
0x20, 0x43c, 0x438, 0x440, 0x21}; // "Привет, мир!"
const char *out = "\xcf\xf0\xe8\xe2\xe5\xf2\x2c\x20\xec\xe8\xf0\x21"; // "Привет, мир!"
uint32_t res_len = 7777;
char *res = ppb_char_set_utf16_to_char_set(0, (const uint16_t *)in,
Expand All @@ -112,9 +109,9 @@ TEST(ppb_char_set, to_utf16_non_ASCII_all_correct)

TEST(ppb_char_set, to_utf16_non_ASCII_PP_CHARSET_CONVERSIONERROR_FAIL)
{
const uint8_t in[] = {0x1f, 0x04, 0x40, 0x04, 0x38, 0x04, 0x32, 0x04, 0x35, 0x04,
0x42, 0x04, 0x2c, 0x00, 0x20, 0x00, 0x6b, 0x26, 0x3c, 0x04,
0x38, 0x04, 0x40, 0x04, 0x21, 0x00}; // "Привет, ♫мир!"
const uint16_t in[] = {0x41f, 0x440, 0x438, 0x432, 0x435, 0x442, 0x2c,
0x20, 0x266b, 0x43c, 0x438, 0x440, 0x21};
// "♫" in "Привет, ♫мир!" cannot be represented in cp1251.
// const char *out = "\xcf\xf0\xe8\xe2\xe5\xf2\x2c\x20\xec\xe8\xf0\x21"; // "Привет, мир!"
uint32_t res_len = 7777;
char *res = ppb_char_set_utf16_to_char_set(0, (const uint16_t *)in,
Expand All @@ -127,9 +124,9 @@ TEST(ppb_char_set, to_utf16_non_ASCII_PP_CHARSET_CONVERSIONERROR_FAIL)

TEST(ppb_char_set, to_utf16_non_ASCII_PP_CHARSET_CONVERSIONERROR_SKIP)
{
const uint8_t in[] = {0x1f, 0x04, 0x40, 0x04, 0x38, 0x04, 0x32, 0x04, 0x35, 0x04,
0x42, 0x04, 0x2c, 0x00, 0x20, 0x00, 0x6b, 0x26, 0x3c, 0x04,
0x38, 0x04, 0x40, 0x04, 0x21, 0x00}; // "Привет, ♫мир!"
const uint16_t in[] = {
0x41f, 0x440, 0x438, 0x432, 0x435, 0x442, 0x2c,
0x20, 0x266b, 0x43c, 0x438, 0x440, 0x21}; // "Привет, ♫мир!"
const char *out = "\xcf\xf0\xe8\xe2\xe5\xf2\x2c\x20\xec\xe8\xf0\x21"; // "Привет, мир!"
uint32_t res_len = 7777;
char *res = ppb_char_set_utf16_to_char_set(0, (const uint16_t *)in,
Expand All @@ -142,9 +139,9 @@ TEST(ppb_char_set, to_utf16_non_ASCII_PP_CHARSET_CONVERSIONERROR_SKIP)

TEST(ppb_char_set, to_utf16_non_ASCII_PP_CHARSET_CONVERSIONERROR_SUBSTITUTE)
{
const uint8_t in[] = {0x1f, 0x04, 0x40, 0x04, 0x38, 0x04, 0x32, 0x04, 0x35, 0x04,
0x42, 0x04, 0x2c, 0x00, 0x20, 0x00, 0x6b, 0x26, 0x3c, 0x04,
0x38, 0x04, 0x40, 0x04, 0x21, 0x00}; // "Привет, ♫мир!"
const uint16_t in[] = {
0x41f, 0x440, 0x438, 0x432, 0x435, 0x442, 0x2c,
0x20, 0x266b, 0x43c, 0x438, 0x440, 0x21}; // "Привет, ♫мир!"
const char *out = "\xcf\xf0\xe8\xe2\xe5\xf2\x2c\x20\x3f\xec\xe8\xf0\x21";// "Привет, ?мир!"
uint32_t res_len = 7777;
char *res = ppb_char_set_utf16_to_char_set(0, (const uint16_t *)in,
Expand All @@ -164,9 +161,8 @@ TEST(ppb_char_set, gb2312_ASCII_to_utf16)
0x6c, 0x64, 0x21};

// "Hello, world!" in UTF16-LE
const uint8_t out[] = {0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00,
0x2c, 0x00, 0x20, 0x00, 0x77, 0x00, 0x6f, 0x00, 0x72, 0x00,
0x6c, 0x00, 0x64, 0x00, 0x21, 0x00};
const uint16_t out[] = {0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20,
0x77, 0x6f, 0x72, 0x6c, 0x64, 0x21};

uint32_t res_len = 7777;
uint16_t *res = ppb_char_set_char_set_to_utf16(0, (const char *)in, sizeof(in), "gb2312",
Expand Down

0 comments on commit 58596f4

Please sign in to comment.