diff --git a/src/common/wchar.c b/src/common/wchar.c index 71f564abfad..aadad6ff116 100644 --- a/src/common/wchar.c +++ b/src/common/wchar.c @@ -266,12 +266,22 @@ pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) return cnt; } +/* + * mbverifychar does not accept SS2 or SS3 (CS2 and CS3 are not defined for + * EUC_CN), but mb2wchar_with_len does. Tell a coherent story for code that + * relies on agreement between mb2wchar_with_len and mblen. Invalid text + * datums (e.g. from shared catalogs) reach this. + */ static int pg_euccn_mblen(const unsigned char *s) { int len; - if (IS_HIGHBIT_SET(*s)) + if (*s == SS2) + len = 3; + else if (*s == SS3) + len = 3; + else if (IS_HIGHBIT_SET(*s)) len = 2; else len = 1; @@ -1916,7 +1926,7 @@ pg_encoding_set_invalid(int encoding, char *dst) const pg_wchar_tbl pg_wchar_table[] = { {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifychar, pg_ascii_verifystr, 1}, /* PG_SQL_ASCII */ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JP */ - {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 2}, /* PG_EUC_CN */ + {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 3}, /* PG_EUC_CN */ {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifychar, pg_euckr_verifystr, 3}, /* PG_EUC_KR */ {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifychar, pg_euctw_verifystr, 4}, /* PG_EUC_TW */ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JIS_2004 */