mirror of
https://github.com/opnsense/src.git
synced 2026-06-08 00:02:14 -04:00
localedata: add some exceptions to utf8proc widths
Hangul Jamo medial vowels and final consonants are reportedly combining characters that won't take up any columns on their own and should be reported as zero-width, so add an exception for these as well to reflect how they work in practice. This conforms to how other implementations (e.g., glibc) treat these characters. Reviewed by: bapt (earlier version), jkim Sponsored by: Klara, Inc. Differential Revision: https://reviews.freebsd.org/D47472
This commit is contained in:
parent
a4b7367eb0
commit
160c36eae4
1 changed files with 17 additions and 1 deletions
|
|
@ -28,6 +28,21 @@
|
|||
|
||||
#include <utf8proc.h>
|
||||
|
||||
static int
|
||||
width_of(int32_t wc)
|
||||
{
|
||||
|
||||
/*
|
||||
* Hangul Jamo medial vowels and final consonants are more of
|
||||
* a combining character, and should be considered zero-width.
|
||||
*/
|
||||
if (wc >= 0x1160 && wc <= 0x11ff)
|
||||
return (0);
|
||||
|
||||
/* No override by default, trust utf8proc's width. */
|
||||
return (utf8proc_charwidth(wc));
|
||||
}
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
|
|
@ -43,9 +58,10 @@ main(void)
|
|||
wcc = utf8proc_category(wc);
|
||||
if (wcc == UTF8PROC_CATEGORY_CC)
|
||||
continue;
|
||||
wcw = utf8proc_charwidth(wc);
|
||||
wcw = width_of(wc);
|
||||
if (wcw == 1)
|
||||
continue;
|
||||
|
||||
printf("%04X %d\n", wc, wcw);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue