tests: use uint16_t for UTF-16 code points in charset test

Charset-related APIs are using 16-bit uint16_t when referring to UTF-16, and are not obligated to have any particular byte layout in memory. Those are different for little- and big-endian machines, which caused test to fail when compiling on mips and s390x.
i-rinat · Dec 25, 2018 · 58596f4 · 58596f4
1 parent 4343e4e
commit 58596f4
Showing 1 changed file with 18 additions and 22 deletions.
diff --git a/tests/test_ppb_char_set.c b/tests/test_ppb_char_set.c
@@ -43,8 +43,8 @@ TEST(ppb_char_set, extract_relevant_part_from_locale_name)
 TEST(ppb_char_set, to_utf16_all_ASCII)
 {
     const char *in = "Hello, world!";
-    const uint8_t out[] = {'H', 0, 'e', 0, 'l', 0, 'l', 0, 'o', 0, ',', 0, ' ', 0, 'w', 0,
-                           'o', 0, 'r', 0, 'l', 0, 'd', 0, '!', 0};
+    const uint16_t out[] = {'H', 'e', 'l', 'l', 'o', ',', ' ',
+                            'w', 'o', 'r', 'l', 'd', '!'};
     uint32_t res_len = 7777;
     uint16_t *res = ppb_char_set_char_set_to_utf16(0, in, strlen(in), "UTF-8",
                                                    PP_CHARSET_CONVERSIONERROR_FAIL, &res_len);
@@ -56,9 +56,8 @@ TEST(ppb_char_set, to_utf16_all_ASCII)
 TEST(ppb_char_set, to_utf16_basic_UTF_8)
 {
     const char *in = "Привет, мир!";
-    const uint8_t out[] = {0x1f, 0x04, 0x40, 0x04, 0x38, 0x04, 0x32, 0x04, 0x35, 0x04,
-                           0x42, 0x04, 0x2c, 0x00, 0x20, 0x00, 0x3c, 0x04, 0x38, 0x04,
-                           0x40, 0x04, 0x21, 0x00};
+    const uint16_t out[] = {0x41f, 0x440, 0x438, 0x432, 0x435, 0x442,
+                            0x2c,  0x20,  0x43c, 0x438, 0x440, 0x21};
     uint32_t res_len = 7777;
     uint16_t *res = ppb_char_set_char_set_to_utf16(0, in, strlen(in), "UTF-8",
                                                    PP_CHARSET_CONVERSIONERROR_FAIL, &res_len);
@@ -83,8 +82,7 @@ TEST(ppb_char_set, to_utf16_wrong_UTF_8_with_error)
 
 TEST(ppb_char_set, from_utf16_all_ASCII)
 {
-    const uint8_t in[] = {'H', 0, 'e', 0, 'l', 0, 'l', 0, 'o', 0, ',', 0, ' ', 0, 'w', 0,
-                          'o', 0, 'r', 0, 'l', 0, 'd', 0, '!', 0};
+    const uint16_t in[] = {'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!'};
     const char *out = "Hello, world!";
     uint32_t res_len = 7777;
     char *res = ppb_char_set_utf16_to_char_set(0, (const uint16_t *)in,
@@ -97,9 +95,8 @@ TEST(ppb_char_set, from_utf16_all_ASCII)
 
 TEST(ppb_char_set, to_utf16_non_ASCII_all_correct)
 {
-    const uint8_t in[] = {0x1f, 0x04, 0x40, 0x04, 0x38, 0x04, 0x32, 0x04, 0x35, 0x04,
-                          0x42, 0x04, 0x2c, 0x00, 0x20, 0x00, 0x3c, 0x04, 0x38, 0x04,
-                          0x40, 0x04, 0x21, 0x00}; // "Привет, мир!"
+    const uint16_t in[] = {0x41f, 0x440, 0x438, 0x432, 0x435, 0x442, 0x2c,
+                           0x20,  0x43c, 0x438, 0x440, 0x21};  // "Привет, мир!"
     const char *out = "\xcf\xf0\xe8\xe2\xe5\xf2\x2c\x20\xec\xe8\xf0\x21"; // "Привет, мир!"
     uint32_t res_len = 7777;
     char *res = ppb_char_set_utf16_to_char_set(0, (const uint16_t *)in,
@@ -112,9 +109,9 @@ TEST(ppb_char_set, to_utf16_non_ASCII_all_correct)
 
 TEST(ppb_char_set, to_utf16_non_ASCII_PP_CHARSET_CONVERSIONERROR_FAIL)
 {
-    const uint8_t in[] = {0x1f, 0x04, 0x40, 0x04, 0x38, 0x04, 0x32, 0x04, 0x35, 0x04,
-                          0x42, 0x04, 0x2c, 0x00, 0x20, 0x00, 0x6b, 0x26, 0x3c, 0x04,
-                          0x38, 0x04, 0x40, 0x04, 0x21, 0x00}; // "Привет, ♫мир!"
+    const uint16_t in[] = {0x41f, 0x440,  0x438, 0x432, 0x435, 0x442, 0x2c,
+                           0x20,  0x266b, 0x43c, 0x438, 0x440, 0x21};
+    // "♫" in "Привет, ♫мир!" cannot be represented in cp1251.
     // const char *out = "\xcf\xf0\xe8\xe2\xe5\xf2\x2c\x20\xec\xe8\xf0\x21"; // "Привет, мир!"
     uint32_t res_len = 7777;
     char *res = ppb_char_set_utf16_to_char_set(0, (const uint16_t *)in,
@@ -127,9 +124,9 @@ TEST(ppb_char_set, to_utf16_non_ASCII_PP_CHARSET_CONVERSIONERROR_FAIL)
 
 TEST(ppb_char_set, to_utf16_non_ASCII_PP_CHARSET_CONVERSIONERROR_SKIP)
 {
-    const uint8_t in[] = {0x1f, 0x04, 0x40, 0x04, 0x38, 0x04, 0x32, 0x04, 0x35, 0x04,
-                          0x42, 0x04, 0x2c, 0x00, 0x20, 0x00, 0x6b, 0x26, 0x3c, 0x04,
-                          0x38, 0x04, 0x40, 0x04, 0x21, 0x00}; // "Привет, ♫мир!"
+    const uint16_t in[] = {
+        0x41f, 0x440,  0x438, 0x432, 0x435, 0x442, 0x2c,
+        0x20,  0x266b, 0x43c, 0x438, 0x440, 0x21};  // "Привет, ♫мир!"
     const char *out = "\xcf\xf0\xe8\xe2\xe5\xf2\x2c\x20\xec\xe8\xf0\x21"; // "Привет, мир!"
     uint32_t res_len = 7777;
     char *res = ppb_char_set_utf16_to_char_set(0, (const uint16_t *)in,
@@ -142,9 +139,9 @@ TEST(ppb_char_set, to_utf16_non_ASCII_PP_CHARSET_CONVERSIONERROR_SKIP)
 
 TEST(ppb_char_set, to_utf16_non_ASCII_PP_CHARSET_CONVERSIONERROR_SUBSTITUTE)
 {
-    const uint8_t in[] = {0x1f, 0x04, 0x40, 0x04, 0x38, 0x04, 0x32, 0x04, 0x35, 0x04,
-                          0x42, 0x04, 0x2c, 0x00, 0x20, 0x00, 0x6b, 0x26, 0x3c, 0x04,
-                          0x38, 0x04, 0x40, 0x04, 0x21, 0x00}; // "Привет, ♫мир!"
+    const uint16_t in[] = {
+        0x41f, 0x440,  0x438, 0x432, 0x435, 0x442, 0x2c,
+        0x20,  0x266b, 0x43c, 0x438, 0x440, 0x21};  // "Привет, ♫мир!"
     const char *out = "\xcf\xf0\xe8\xe2\xe5\xf2\x2c\x20\x3f\xec\xe8\xf0\x21";// "Привет, ?мир!"
     uint32_t res_len = 7777;
     char *res = ppb_char_set_utf16_to_char_set(0, (const uint16_t *)in,
@@ -164,9 +161,8 @@ TEST(ppb_char_set, gb2312_ASCII_to_utf16)
                           0x6c, 0x64, 0x21};
 
     // "Hello, world!" in UTF16-LE
-    const uint8_t out[] = {0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00,
-                           0x2c, 0x00, 0x20, 0x00, 0x77, 0x00, 0x6f, 0x00, 0x72, 0x00,
-                           0x6c, 0x00, 0x64, 0x00, 0x21, 0x00};
+    const uint16_t out[] = {0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20,
+                            0x77, 0x6f, 0x72, 0x6c, 0x64, 0x21};
 
     uint32_t res_len = 7777;
     uint16_t *res = ppb_char_set_char_set_to_utf16(0, (const char *)in, sizeof(in), "gb2312",