From 3e0eba19675404ce873cbeadfaff8c99ec94d012 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 11 May 2026 05:13:51 -0700 Subject: [PATCH] Prevent buffer overrun in unicode_normalize(). Some UTF8 characters decompose to more than a dozen codepoints. It is possible for an input string that fits into well under 1GB to produce more than 4G decomposed codepoints, causing unicode_normalize()'s decomp_size variable to wrap around to a small positive value. This results in a small output buffer allocation and subsequent buffer overrun. To fix, test after each addition to see if we've overrun MaxAllocSize, and break out of the loop early if so. In frontend code we want to just return NULL for this failure (treating it like OOM). In the backend, we can rely on the following palloc() call to throw error. I also tightened things up in the calling functions in varlena.c, using size_t rather than int and allocating the input workspace with palloc_array(). These changes are probably unnecessary given the knowledge that the original input and the normalized output_chars array must fit into 1GB, but it's a lot easier to believe the code is safe with these changes. Reported-by: Xint Code Reported-by: Bruce Dang Author: Tom Lane Co-authored-by: Heikki Linnakangas Backpatch-through: 14 Security: CVE-2026-6473 --- src/backend/utils/adt/varlena.c | 14 +++++++------- src/common/unicode_norm.c | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 8f803be0461..4d334e9314e 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -6310,18 +6310,18 @@ unicode_normalize_func(PG_FUNCTION_ARGS) text *input = PG_GETARG_TEXT_PP(0); char *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1)); UnicodeNormalizationForm form; - int size; + size_t size; pg_wchar *input_chars; pg_wchar *output_chars; unsigned char *p; text *result; - int i; + size_t i; form = unicode_norm_form_from_string(formstr); /* convert to pg_wchar */ size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input)); - input_chars = palloc((size + 1) * sizeof(pg_wchar)); + input_chars = palloc_array(pg_wchar, size + 1); p = (unsigned char *) VARDATA_ANY(input); for (i = 0; i < size; i++) { @@ -6376,20 +6376,20 @@ unicode_is_normalized(PG_FUNCTION_ARGS) text *input = PG_GETARG_TEXT_PP(0); char *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1)); UnicodeNormalizationForm form; - int size; + size_t size; pg_wchar *input_chars; pg_wchar *output_chars; unsigned char *p; - int i; + size_t i; UnicodeNormalizationQC quickcheck; - int output_size; + size_t output_size; bool result; form = unicode_norm_form_from_string(formstr); /* convert to pg_wchar */ size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input)); - input_chars = palloc((size + 1) * sizeof(pg_wchar)); + input_chars = palloc_array(pg_wchar, size + 1); p = (unsigned char *) VARDATA_ANY(input); for (i = 0; i < size; i++) { diff --git a/src/common/unicode_norm.c b/src/common/unicode_norm.c index 06bf921e458..783a37eb3de 100644 --- a/src/common/unicode_norm.c +++ b/src/common/unicode_norm.c @@ -23,6 +23,7 @@ #include "common/unicode_norm_hashfunc.h" #include "common/unicode_normprops_table.h" #include "port/pg_bswap.h" +#include "utils/memutils.h" #else #include "common/unicode_norm_table.h" #endif @@ -420,10 +421,28 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input) /* * Calculate how many characters long the decomposed version will be. + * + * Some characters decompose to quite a few code points, so that the + * decomposed version's size could overrun MaxAllocSize, and even 32-bit + * size_t, even though the input string presumably fits in that. In + * frontend we want to just return NULL in that case, so monitor the sum + * and exit early once we'd need more than MaxAllocSize bytes. */ decomp_size = 0; for (p = input; *p; p++) + { decomp_size += get_decomposed_size(*p, compat); + if (unlikely(decomp_size > MaxAllocSize / sizeof(pg_wchar))) + { +#ifndef FRONTEND + /* Exit loop and let palloc() throw error below */ + break; +#else + /* Just return NULL with no explicit error */ + return NULL; +#endif + } + } decomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar)); if (decomp_chars == NULL)