
On 11.09.18 22:38, Heinrich Schuchardt wrote:
Up to now the EFI_TEXT_INPUT_PROTOCOL only supported ASCII characters. With the patch it can consume UTF-8 from the console.
Currently only the serial console and the console can deliver UTF-8. Local consoles are restricted to ASCII.
Signed-off-by: Heinrich Schuchardt xypron.glpk@gmx.de
v2: drop support for German keyboard move reading of Unicode code to charset.c
include/charset.h | 9 +++ lib/charset.c | 136 ++++++++++++++++++++++------------- lib/efi_loader/efi_console.c | 13 ++-- test/unicode_ut.c | 8 +-- 4 files changed, 108 insertions(+), 58 deletions(-)
diff --git a/include/charset.h b/include/charset.h index 686db5a1fe..a7de5f6948 100644 --- a/include/charset.h +++ b/include/charset.h @@ -8,11 +8,20 @@ #ifndef __CHARSET_H_ #define __CHARSET_H_
+#include <efi.h>
Yeah ... eh ... no :).
I assume this is just a leftover from the old version?
#include <linux/kernel.h> #include <linux/types.h>
#define MAX_UTF8_PER_UTF16 3
+/**
- console_read_unicode() - read Unicode code point from console
- @code: code point
Please specify this a bit clearer.
- Return: 0 = success
- */
+int console_read_unicode(s32 *code);
/**
- utf8_get() - get next UTF-8 code point from buffer
diff --git a/lib/charset.c b/lib/charset.c index 72c808ce64..1806b41cc3 100644 --- a/lib/charset.c +++ b/lib/charset.c @@ -5,6 +5,7 @@
- Copyright (c) 2017 Rob Clark
*/
+#include <common.h> #include <charset.h> #include <capitalization.h> #include <malloc.h> @@ -18,67 +19,106 @@ static struct capitalization_table capitalization_table[] = CP437_CAPITALIZATION_TABLE; #endif
-s32 utf8_get(const char **src) +/**
- get_code() - read Unicode code point from UTF-8 stream
- @read_u8: - stream reader
- @src: - string buffer passed to stream reader, optional
- Return: - Unicode code point
- */
+static int get_code(u8 (*read_u8)(void *data), void *data) {
- s32 code = 0;
- unsigned char c;
- s32 ch = 0;
- if (!src || !*src)
return -1;
- if (!**src)
- ch = read_u8(data);
- if (!ch) return 0;
- c = **src;
- if (c >= 0x80) {
++*src;
if (!**src)
return -1;
/*
* We do not expect a continuation byte (0x80 - 0xbf).
* 0x80 is coded as 0xc2 0x80, so we cannot have less then 0xc2
* here.
* The highest code point is 0x10ffff which is coded as
* 0xf4 0x8f 0xbf 0xbf. So we cannot have a byte above 0xf4.
*/
if (c < 0xc2 || code > 0xf4)
return -1;
if (c >= 0xe0) {
if (c >= 0xf0) {
- if (ch >= 0xc2 && ch <= 0xf4) {
int code = 0;
if (ch >= 0xe0) {
if (ch >= 0xf0) { /* 0xf0 - 0xf4 */
c &= 0x07;
code = c << 18;
c = **src;
++*src;
if (!**src)
return -1;
if (c < 0x80 || c > 0xbf)
return -1;
c &= 0x3f;
ch &= 0x07;
code = ch << 18;
ch = read_u8(data);
if (ch < 0x80 || ch > 0xbf)
goto error;
ch &= 0x3f; } else { /* 0xe0 - 0xef */
c &= 0x0f;
ch &= 0x0f; }
code += c << 12;
code += ch << 12; if ((code >= 0xD800 && code <= 0xDFFF) || code >= 0x110000)
return -1;
c = **src;
++*src;
if (!**src)
return -1;
if (c < 0x80 || c > 0xbf)
return -1;
goto error;
ch = read_u8(data);
if (ch < 0x80 || ch > 0xbf)
} /* 0xc0 - 0xdf or continuation byte (0x80 - 0xbf) */goto error;
c &= 0x3f;
code += c << 6;
c = **src;
if (c < 0x80 || c > 0xbf)
return -1;
c &= 0x3f;
ch &= 0x3f;
code += ch << 6;
ch = read_u8(data);
if (ch < 0x80 || ch > 0xbf)
goto error;
ch &= 0x3f;
ch += code;
- } else if (ch >= 0x80) {
}goto error;
- code += c;
- return ch;
+error:
- return '?';
+}
+/**
- read_string() - read byte from character string
- @data: - pointer to string
- Return: - byte read
- The string pointer is incremented if it does not point to '\0'.
- */
+static u8 read_string(void *data)
+{
- const char **src = (const char **)data;
- u8 c;
- if (!src || !*src || !**src)
return 0;
- c = (unsigned char)**src;
Please remove the cast. Btw, you could also write this as
return *(*src++);
++*src;
- return code;
- return c;
+}
+/**
- read_console() - read byte from console
- @src - not used, needed to match interface
- Return: - byte read
- */
+static u8 read_console(void *data) +{
- return getc();
+}
+int console_read_unicode(s32 *code) +{
- if (!tstc())
/* No input available */
return 1;
Please avoid multi-line indented code without braces.
- /* Read Unicode code */
- *code = get_code(read_console, NULL);
- return 0;
+}
+s32 utf8_get(const char **src) +{
- return get_code(read_string, src);
}
Alex