
On 31.08.18 21:31, Heinrich Schuchardt wrote:
This patch provides a define to initialize a table that maps lower to capital letters for Unicode code point 0x0000 - 0xffff.
Signed-off-by: Heinrich Schuchardt xypron.glpk@gmx.de
v2 add shorter tables for code pages 437 and 1250
MAINTAINERS | 1 + include/capitalization.h | 2028 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 2029 insertions(+) create mode 100644 include/capitalization.h
diff --git a/MAINTAINERS b/MAINTAINERS index 46f826a0fe..8c9cd83347 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -381,6 +381,7 @@ T: git git://github.com/agraf/u-boot.git F: doc/README.uefi F: doc/README.iscsi F: Documentation/efi.rst +F: include/capitalization.h F: include/efi* F: include/pe.h F: include/asm-generic/pe.h diff --git a/include/capitalization.h b/include/capitalization.h new file mode 100644 index 0000000000..2c24e1bf47 --- /dev/null +++ b/include/capitalization.h @@ -0,0 +1,2028 @@ +/* SPDX-License-Identifier: Unicode-DFS-2016 */ +/*
- Capitalization tables
- */
+struct capitalization_table {
- u16 upper;
- u16 lower;
+};
+/*
- Correspondence table for small and capital Unicode letters in the range of
- 0x0000 - 0xffff based on http://www.unicode.org/Public/UCA/11.0.0/allkeys.txt
- */
+#define UNICODE_CAPITALIZATION_TABLE { \
- { 0x0531, /* ARMENIAN CAPITAL LETTER AYB */ \
0x0561, /* ARMENIAN SMALL LETTER AYB */ }, \
[...]
- { 0x2C7F, /* LATIN CAPITAL LETTER Z WITH SWASH TAIL */ \
0x0240, /* LATIN SMALL LETTER Z WITH SWASH TAIL */ }, \
- { 0x0000, /* END OF LIST CAPITAL LETTERS */ \
0x0000, /* END OF LIST SMALL LETTERS */ }, \
+}
+/*
- Correspondence table for small and capital letters of codepage 437.
- */
+#define CP437_CAPITALIZATION_TABLE { \
- { 0x03a6, 0x03c6, }, \
I like how you added comments to each entry on what exactly the character is. Please keep that habit in the trimmed down tables too.
- { 0x03a3, 0x03c3, }, \
- { 0x0041, 0x0061, }, \
- { 0x00c4, 0x00e4, }, \
- { 0x00c5, 0x00e5, }, \
- { 0x00c6, 0x00e6, }, \
- { 0x0042, 0x0062, }, \
- { 0x0043, 0x0063, }, \
- { 0x00c7, 0x00e7, }, \
- { 0x0044, 0x0064, }, \
- { 0x0045, 0x0065, }, \
- { 0x00c9, 0x00e9, }, \
- { 0x0046, 0x0066, }, \
- { 0x0047, 0x0067, }, \
- { 0x0048, 0x0068, }, \
- { 0x0049, 0x0069, }, \
- { 0x004a, 0x006a, }, \
- { 0x004b, 0x006b, }, \
- { 0x004c, 0x006c, }, \
- { 0x004d, 0x006d, }, \
- { 0x004e, 0x006e, }, \
- { 0x00d1, 0x00f1, }, \
- { 0x004f, 0x006f, }, \
- { 0x00d6, 0x00f6, }, \
- { 0x0050, 0x0070, }, \
- { 0x0051, 0x0071, }, \
Most of these are just latin A to Z. These are already covered in your conversion by code, no? So you can just omit them.
- { 0x0052, 0x0072, }, \
- { 0x0053, 0x0073, }, \
- { 0x0054, 0x0074, }, \
- { 0x0055, 0x0075, }, \
- { 0x00dc, 0x00fc, }, \
- { 0x0056, 0x0076, }, \
- { 0x0057, 0x0077, }, \
- { 0x0058, 0x0078, }, \
- { 0x0059, 0x0079, }, \
- { 0x005a, 0x007a, }, \
- { 0x0000, 0x0000, }, \
... that would leave 11 entries for cp437 ...
+}
+/*
- Correspondence table for small and capital letters of codepage 1250.
- */
+#define CP1250_CAPITALIZATION_TABLE { \
- { 0x0041, 0x0061, }, \
Please sort the list by code point - or any other recognizable sorting order ;).
- { 0x00c1, 0x00e1, }, \
- { 0x0102, 0x0103, }, \
- { 0x00c2, 0x00e2, }, \
- { 0x00c4, 0x00e4, }, \
- { 0x0104, 0x0105, }, \
- { 0x0042, 0x0062, }, \
- { 0x0043, 0x0063, }, \
- { 0x0106, 0x0107, }, \
- { 0x010c, 0x010d, }, \
- { 0x00c7, 0x00e7, }, \
- { 0x0044, 0x0064, }, \
- { 0x010e, 0x010f, }, \
- { 0x0110, 0x0111, }, \
- { 0x0045, 0x0065, }, \
- { 0x00c9, 0x00e9, }, \
- { 0x011a, 0x011b, }, \
- { 0x00cb, 0x00eb, }, \
- { 0x0118, 0x0119, }, \
- { 0x0046, 0x0066, }, \
- { 0x0047, 0x0067, }, \
- { 0x0048, 0x0068, }, \
- { 0x0049, 0x0069, }, \
- { 0x00cd, 0x00ed, }, \
- { 0x00ce, 0x00ee, }, \
- { 0x004a, 0x006a, }, \
- { 0x004b, 0x006b, }, \
- { 0x004c, 0x006c, }, \
- { 0x0139, 0x013a, }, \
- { 0x013d, 0x013e, }, \
- { 0x0141, 0x0142, }, \
- { 0x004d, 0x006d, }, \
- { 0x004e, 0x006e, }, \
- { 0x0143, 0x0144, }, \
- { 0x0147, 0x0148, }, \
- { 0x004f, 0x006f, }, \
- { 0x00d3, 0x00f3, }, \
- { 0x00d4, 0x00f4, }, \
- { 0x00d6, 0x00f6, }, \
- { 0x0150, 0x0151, }, \
- { 0x0050, 0x0070, }, \
- { 0x0051, 0x0071, }, \
- { 0x0052, 0x0072, }, \
- { 0x0154, 0x0155, }, \
- { 0x0158, 0x0159, }, \
- { 0x0053, 0x0073, }, \
- { 0x015a, 0x015b, }, \
- { 0x0160, 0x0161, }, \
- { 0x015e, 0x015f, }, \
- { 0x0054, 0x0074, }, \
- { 0x0164, 0x0165, }, \
- { 0x0162, 0x0163, }, \
- { 0x0055, 0x0075, }, \
- { 0x00da, 0x00fa, }, \
- { 0x00dc, 0x00fc, }, \
- { 0x0170, 0x0171, }, \
- { 0x016e, 0x016f, }, \
- { 0x0056, 0x0076, }, \
- { 0x0057, 0x0077, }, \
- { 0x0058, 0x0078, }, \
- { 0x0059, 0x0079, }, \
- { 0x00dd, 0x00fd, }, \
- { 0x005a, 0x007a, }, \
- { 0x0179, 0x017a, }, \
- { 0x017d, 0x017e, }, \
- { 0x017b, 0x017c, }, \
... and 40 unique points for cp1250.
How about we just combine the two tables into one and call it "western"?
Alex