
Provide unit tests for Unicode functions.
Signed-off-by: Heinrich Schuchardt xypron.glpk@gmx.de --- MAINTAINERS | 1 + include/test/suites.h | 3 +- test/Kconfig | 8 + test/Makefile | 1 + test/cmd_ut.c | 14 +- test/unicode_ut.c | 470 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 493 insertions(+), 4 deletions(-) create mode 100644 test/unicode_ut.c
diff --git a/MAINTAINERS b/MAINTAINERS index 51a1472cf8..a324139471 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -374,6 +374,7 @@ F: include/asm-generic/pe.h F: lib/charset.c F: lib/efi*/ F: test/py/tests/test_efi* +F: test/unicode_ut.c F: cmd/bootefi.c F: tools/file2include.c
diff --git a/include/test/suites.h b/include/test/suites.h index b5019a7cd2..8e4eac60eb 100644 --- a/include/test/suites.h +++ b/include/test/suites.h @@ -23,10 +23,11 @@ struct unit_test; int cmd_ut_category(const char *name, struct unit_test *tests, int n_ents, int argc, char * const argv[]);
+int do_ut_compression(cmd_tbl_t *cmdtp, int flag, int argc, char *const argv[]); int do_ut_dm(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[]); int do_ut_env(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[]); int do_ut_overlay(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[]); int do_ut_time(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[]); -int do_ut_compression(cmd_tbl_t *cmdtp, int flag, int argc, char *const argv[]); +int do_ut_unicode(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[]);
#endif /* __TEST_SUITES_H__ */ diff --git a/test/Kconfig b/test/Kconfig index 3643761bc6..de16d179d0 100644 --- a/test/Kconfig +++ b/test/Kconfig @@ -15,6 +15,14 @@ config UT_TIME problems. But if you are having problems with udelay() and the like, this is a good place to start.
+config UT_UNICODE + bool "Unit tests for Unicode functions" + depends on UNIT_TEST + default y + help + Enables the 'ut unicode' command which tests that the functions for + manipulating Unicode strings work correctly. + source "test/dm/Kconfig" source "test/env/Kconfig" source "test/overlay/Kconfig" diff --git a/test/Makefile b/test/Makefile index 1092011fdb..a5f52fd5ad 100644 --- a/test/Makefile +++ b/test/Makefile @@ -8,4 +8,5 @@ obj-$(CONFIG_SANDBOX) += command_ut.o obj-$(CONFIG_SANDBOX) += compression.o obj-$(CONFIG_SANDBOX) += print_ut.o obj-$(CONFIG_UT_TIME) += time_ut.o +obj-$(CONFIG_UT_UNICODE) += unicode_ut.o obj-$(CONFIG_$(SPL_)LOG) += log/ diff --git a/test/cmd_ut.c b/test/cmd_ut.c index 934a5a931b..d6a2593850 100644 --- a/test/cmd_ut.c +++ b/test/cmd_ut.c @@ -49,6 +49,10 @@ static cmd_tbl_t cmd_ut_sub[] = { #ifdef CONFIG_UT_TIME U_BOOT_CMD_MKENT(time, CONFIG_SYS_MAXARGS, 1, do_ut_time, "", ""), #endif +#if defined(CONFIG_UT_UNICODE) && \ + !defined(CONFIG_SPL_BUILD) && !defined(API_BUILD) + U_BOOT_CMD_MKENT(unicode, CONFIG_SYS_MAXARGS, 1, do_ut_unicode, "", ""), +#endif #ifdef CONFIG_SANDBOX U_BOOT_CMD_MKENT(compression, CONFIG_SYS_MAXARGS, 1, do_ut_compression, "", ""), @@ -93,6 +97,9 @@ static int do_ut(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[]) #ifdef CONFIG_SYS_LONGHELP static char ut_help_text[] = "all - execute all enabled tests\n" +#ifdef CONFIG_SANDBOX + "ut compression - Test compressors and bootm decompression\n" +#endif #ifdef CONFIG_UT_DM "ut dm [test-name]\n" #endif @@ -105,11 +112,12 @@ static char ut_help_text[] = #ifdef CONFIG_UT_TIME "ut time - Very basic test of time functions\n" #endif -#ifdef CONFIG_SANDBOX - "ut compression - Test compressors and bootm decompression\n" +#if defined(CONFIG_UT_UNICODE) && \ + !defined(CONFIG_SPL_BUILD) && !defined(API_BUILD) + "ut unicode - test Unicode functions\n" #endif ; -#endif +#endif /* CONFIG_SYS_LONGHELP */
U_BOOT_CMD( ut, CONFIG_SYS_MAXARGS, 1, do_ut, diff --git a/test/unicode_ut.c b/test/unicode_ut.c new file mode 100644 index 0000000000..29316606c4 --- /dev/null +++ b/test/unicode_ut.c @@ -0,0 +1,470 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Unit tests for Unicode functions + * + * Copyright (c) 2018 Heinrich Schuchardt xypron.glpk@gmx.de + */ + +#include <common.h> +#include <charset.h> +#include <command.h> +#include <errno.h> + +/* Constants c1-c4 and d1-d4 encode the same letters */ + +/* Six characters translating to one utf-8 byte each. */ +static const u16 c1[] = {0x55, 0x2d, 0x42, 0x6f, 0x6f, 0x74, 0x00}; +/* One character translating to two utf-8 bytes */ +static const u16 c2[] = {0x6b, 0x61, 0x66, 0x62, 0xe1, 0x74, 0x75, 0x72, 0x00}; +/* Three characters translating to three utf-8 bytes each */ +static const u16 c3[] = {0x6f5c, 0x6c34, 0x8266, 0x00}; +/* Three letters translating to four utf-8 bytes each */ +static const u16 c4[] = {0xd801, 0xdc8d, 0xd801, 0xdc96, 0xd801, 0xdc87, + 0x0000}; + +/* Six characters translating to one utf-16 word each. */ +static const char d1[] = {0x55, 0x2d, 0x42, 0x6f, 0x6f, 0x74, 0x00}; +/* Eight characters translating to one utf-16 word each */ +static const char d2[] = {0x6b, 0x61, 0x66, 0x62, 0xc3, 0xa1, 0x74, 0x75, + 0x72, 0x00}; +/* Three characters translating to one utf-16 word each */ +static const char d3[] = {0xe6, 0xbd, 0x9c, 0xe6, 0xb0, 0xb4, 0xe8, 0x89, + 0xa6, 0x00}; +/* Three letters translating to two utf-16 word each */ +static const char d4[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96, + 0xf0, 0x90, 0x92, 0x87, 0x00}; + +static int ut_utf8_get(void) +{ + const char *s; + s32 code; + int i; + + /* Check characters less than 0x800 */ + s = d2; + for (i = 0; i < 8; ++i) { + code = utf8_get((const char **)&s); + /* c2 is the utf-8 encoding of d2 */ + if (code != c2[i]) + return -1; + if (!code) + break; + } + if (s != d2 + 9) + return -1; + + /* Check characters less than 0x10000 */ + s = d3; + for (i = 0; i < 4; ++i) { + code = utf8_get((const char **)&s); + /* c3 is the utf-8 encoding of d3 */ + if (code != c3[i]) + return -1; + if (!code) + break; + } + if (s != d3 + 9) + return -1; + + /* Check character greater 0xffff */ + s = d4; + code = utf8_get((const char **)&s); + if (code != 0x0001048d) + return -1; + if (s != d4 + 4) + return -1; + + return 0; +} + +static int ut_utf8_put(void) +{ + char buffer[8] = { 0, }; + char *pos; + + /* Commercial at, translates to one character */ + pos = buffer; + if (utf8_put('@', &pos)) + return -1; + if (pos - buffer != 1) + return -1; + if (buffer[0] != '@' || buffer[1]) + return -1; + + /* Latin letter G with acute, translates to two charactes */ + pos = buffer; + if (utf8_put(0x1f4, &pos)) + return -1; + if (pos - buffer != 2) + return -1; + if (buffer[0] != (char)0xc7 || buffer[1] != (char)0xb4 || buffer[2]) + return -1; + + /* Tagalog letter i, translates to three characters */ + pos = buffer; + if (utf8_put(0x1701, &pos)) + return -1; + if (pos - buffer != 3) + return -1; + if (buffer[0] != (char)0xe1 || buffer[1] != (char)0x9c || + buffer[2] != (char)0x81 || buffer[3]) + return -1; + + /* Hamster face, translates to four characters */ + pos = buffer; + if (utf8_put(0x1f439, &pos)) + return -1; + if (pos - buffer != 4) + return -1; + if (buffer[0] != (char)0xf0 || buffer[1] != (char)0x9f || + buffer[2] != (char)0x90 || buffer[3] != (char)0xb9 || buffer[4]) + return -1; + + /* Illegal code */ + pos = buffer; + if (utf8_put(0xd888, &pos) != -1) + return -1; + + return 0; +} + +int ut_utf8_utf16_strlen(void) +{ + if (utf8_utf16_strlen(d1) != 6) + return 1; + if (utf8_utf16_strlen(d2) != 8) + return 1; + if (utf8_utf16_strlen(d3) != 3) + return 1; + if (utf8_utf16_strlen(d4) != 6) + return 1; + return 0; +} + +int ut_utf8_utf16_strnlen(void) +{ + if (utf8_utf16_strnlen(d1, 3) != 3) + return 1; + if (utf8_utf16_strnlen(d1, 13) != 6) + return 1; + if (utf8_utf16_strnlen(d2, 6) != 6) + return 1; + if (utf8_utf16_strnlen(d3, 2) != 2) + return 1; + if (utf8_utf16_strnlen(d4, 2) != 4) + return 1; + if (utf8_utf16_strnlen(d4, 3) != 6) + return 1; + return 0; +} + +int ut_u16_strcmp(const u16 *a1, const u16 *a2, size_t count) +{ + for (; (*a1 || *a2) && count; ++a1, ++a2, --count) { + if (*a1 < *a2) + return -1; + if (*a1 > *a2) + return 1; + } + return 0; +} + +int ut_utf8_utf16_strcpy(void) +{ + u16 buf[16]; + u16 *pos; + + pos = buf; + utf8_utf16_strcpy(&pos, d1); + if (pos - buf != 6) + return 1; + if (ut_u16_strcmp(buf, c1, 16)) + return 1; + + pos = buf; + utf8_utf16_strcpy(&pos, d2); + if (pos - buf != 8) + return 1; + if (ut_u16_strcmp(buf, c2, 16)) + return 1; + + pos = buf; + utf8_utf16_strcpy(&pos, d3); + if (pos - buf != 3) + return 1; + if (ut_u16_strcmp(buf, c3, 16)) + return 1; + + pos = buf; + utf8_utf16_strcpy(&pos, d4); + if (pos - buf != 6) + return 1; + if (ut_u16_strcmp(buf, c4, 16)) + return 1; + + return 0; +} + +int ut_utf8_utf16_strncpy(void) +{ + u16 buf[16]; + u16 *pos; + + pos = buf; + memset(buf, 0, sizeof(buf)); + utf8_utf16_strncpy(&pos, d1, 4); + if (pos - buf != 4) + return 1; + if (buf[4]) + return 1; + if (ut_u16_strcmp(buf, c1, 4)) + return 1; + + pos = buf; + memset(buf, 0, sizeof(buf)); + utf8_utf16_strncpy(&pos, d2, 10); + if (pos - buf != 8) + return 1; + if (!buf[4]) + return 1; + if (ut_u16_strcmp(buf, c2, SIZE_MAX)) + return 1; + + pos = buf; + memset(buf, 0, sizeof(buf)); + utf8_utf16_strncpy(&pos, d3, 2); + if (pos - buf != 2) + return 1; + if (buf[2]) + return 1; + if (ut_u16_strcmp(buf, c3, 2)) + return 1; + + pos = buf; + memset(buf, 0, sizeof(buf)); + utf8_utf16_strncpy(&pos, d4, 2); + if (pos - buf != 4) + return 1; + if (buf[4]) + return 1; + if (ut_u16_strcmp(buf, c4, 4)) + return 1; + + pos = buf; + memset(buf, 0, sizeof(buf)); + utf8_utf16_strncpy(&pos, d4, 10); + if (pos - buf != 6) + return 1; + if (!buf[5]) + return 1; + if (ut_u16_strcmp(buf, c4, SIZE_MAX)) + return 1; + + return 0; +} + +static int ut_utf16_get(void) +{ + const u16 *s; + s32 code; + int i; + + /* Check characters less than 0x10000 */ + s = c2; + for (i = 0; i < 9; ++i) { + code = utf16_get((const u16 **)&s); + if (code != c2[i]) + return -1; + if (!code) + break; + } + if (s != c2 + 8) + return -1; + + /* Check character greater 0xffff */ + s = c4; + code = utf16_get((const u16 **)&s); + if (code != 0x0001048d) + return -1; + if (s != c4 + 2) + return -1; + + return 0; +} + +static int ut_utf16_put(void) +{ + u16 buffer[4] = { 0, }; + u16 *pos; + + /* Commercial at, translates to one word */ + pos = buffer; + if (utf16_put('@', &pos)) + return -1; + if (pos - buffer != 1) + return -1; + if (buffer[0] != (u16)'@' || buffer[1]) + return -1; + + /* Hamster face, translates to two words */ + pos = buffer; + if (utf16_put(0x1f439, &pos)) + return -1; + if (pos - buffer != 2) + return -1; + if (buffer[0] != (u16)0xd83d || buffer[1] != (u16)0xdc39 || buffer[2]) + return -1; + + /* Illegal code */ + pos = buffer; + if (utf16_put(0xd888, &pos) != -1) + return -1; + + return 0; +} + +int ut_utf16_utf8_strlen(void) +{ + if (utf16_utf8_strlen(c1) != 6) + return 1; + if (utf16_utf8_strlen(c2) != 9) + return 1; + if (utf16_utf8_strlen(c3) != 9) + return 1; + if (utf16_utf8_strlen(c4) != 12) + return 1; + return 0; +} + +int ut_utf16_utf8_strnlen(void) +{ + if (utf16_utf8_strnlen(c1, 3) != 3) + return 1; + if (utf16_utf8_strnlen(c1, 13) != 6) + return 1; + if (utf16_utf8_strnlen(c2, 6) != 7) + return 1; + if (utf16_utf8_strnlen(c3, 2) != 6) + return 1; + if (utf16_utf8_strnlen(c4, 2) != 8) + return 1; + if (utf16_utf8_strnlen(c4, 3) != 12) + return 1; + return 0; +} + +int ut_utf16_utf8_strcpy(void) +{ + char buf[16]; + char *pos; + + pos = buf; + utf16_utf8_strcpy(&pos, c1); + if (pos - buf != 6) + return 1; + if (strcmp(buf, d1)) + return 1; + + pos = buf; + utf16_utf8_strcpy(&pos, c2); + if (pos - buf != 9) + return 1; + if (strcmp(buf, d2)) + return 1; + + pos = buf; + utf16_utf8_strcpy(&pos, c3); + if (pos - buf != 9) + return 1; + if (strcmp(buf, d3)) + return 1; + + pos = buf; + utf16_utf8_strcpy(&pos, c4); + if (pos - buf != 12) + return 1; + if (strcmp(buf, d4)) + return 1; + + return 0; +} + +int ut_utf16_utf8_strncpy(void) +{ + char buf[16]; + char *pos; + + pos = buf; + memset(buf, 0, sizeof(buf)); + utf16_utf8_strncpy(&pos, c1, 4); + if (pos - buf != 4) + return 1; + if (buf[4]) + return 1; + if (strncmp(buf, d1, 4)) + return 1; + + pos = buf; + memset(buf, 0, sizeof(buf)); + utf16_utf8_strncpy(&pos, c2, 10); + if (pos - buf != 9) + return 1; + if (!buf[4]) + return 1; + if (strncmp(buf, d2, SIZE_MAX)) + return 1; + + pos = buf; + memset(buf, 0, sizeof(buf)); + utf16_utf8_strncpy(&pos, c3, 2); + if (pos - buf != 6) + return 1; + if (buf[6]) + return 1; + if (strncmp(buf, d3, 6)) + return 1; + + pos = buf; + memset(buf, 0, sizeof(buf)); + utf16_utf8_strncpy(&pos, c4, 2); + if (pos - buf != 8) + return 1; + if (buf[8]) + return 1; + if (strncmp(buf, d4, 8)) + return 1; + + pos = buf; + memset(buf, 0, sizeof(buf)); + utf16_utf8_strncpy(&pos, c4, 10); + if (pos - buf != 12) + return 1; + if (!buf[5]) + return 1; + if (strncmp(buf, d4, SIZE_MAX)) + return 1; + + return 0; +} + +int do_ut_unicode(cmd_tbl_t *cmdtp, int flag, int argc, char * const argv[]) +{ + int ret = 0; + + ret |= ut_utf8_get(); + ret |= ut_utf8_put(); + ret |= ut_utf8_utf16_strlen(); + ret |= ut_utf8_utf16_strnlen(); + ret |= ut_utf8_utf16_strcpy(); + ret |= ut_utf8_utf16_strncpy(); + ret |= ut_utf16_get(); + ret |= ut_utf16_put(); + ret |= ut_utf16_utf8_strlen(); + ret |= ut_utf16_utf8_strnlen(); + ret |= ut_utf16_utf8_strcpy(); + ret |= ut_utf16_utf8_strncpy(); + + printf("Test %s\n", ret ? "failed" : "passed"); + + return ret ? CMD_RET_FAILURE : CMD_RET_SUCCESS; +}