diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 8f803be0461..4d334e9314e 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -6310,18 +6310,18 @@ unicode_normalize_func(PG_FUNCTION_ARGS) text *input = PG_GETARG_TEXT_PP(0); char *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1)); UnicodeNormalizationForm form; - int size; + size_t size; pg_wchar *input_chars; pg_wchar *output_chars; unsigned char *p; text *result; - int i; + size_t i; form = unicode_norm_form_from_string(formstr); /* convert to pg_wchar */ size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input)); - input_chars = palloc((size + 1) * sizeof(pg_wchar)); + input_chars = palloc_array(pg_wchar, size + 1); p = (unsigned char *) VARDATA_ANY(input); for (i = 0; i < size; i++) { @@ -6376,20 +6376,20 @@ unicode_is_normalized(PG_FUNCTION_ARGS) text *input = PG_GETARG_TEXT_PP(0); char *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1)); UnicodeNormalizationForm form; - int size; + size_t size; pg_wchar *input_chars; pg_wchar *output_chars; unsigned char *p; - int i; + size_t i; UnicodeNormalizationQC quickcheck; - int output_size; + size_t output_size; bool result; form = unicode_norm_form_from_string(formstr); /* convert to pg_wchar */ size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input)); - input_chars = palloc((size + 1) * sizeof(pg_wchar)); + input_chars = palloc_array(pg_wchar, size + 1); p = (unsigned char *) VARDATA_ANY(input); for (i = 0; i < size; i++) { diff --git a/src/common/unicode_norm.c b/src/common/unicode_norm.c index 06bf921e458..783a37eb3de 100644 --- a/src/common/unicode_norm.c +++ b/src/common/unicode_norm.c @@ -23,6 +23,7 @@ #include "common/unicode_norm_hashfunc.h" #include "common/unicode_normprops_table.h" #include "port/pg_bswap.h" +#include "utils/memutils.h" #else #include "common/unicode_norm_table.h" #endif @@ -420,10 +421,28 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input) /* * Calculate how many characters long the decomposed version will be. + * + * Some characters decompose to quite a few code points, so that the + * decomposed version's size could overrun MaxAllocSize, and even 32-bit + * size_t, even though the input string presumably fits in that. In + * frontend we want to just return NULL in that case, so monitor the sum + * and exit early once we'd need more than MaxAllocSize bytes. */ decomp_size = 0; for (p = input; *p; p++) + { decomp_size += get_decomposed_size(*p, compat); + if (unlikely(decomp_size > MaxAllocSize / sizeof(pg_wchar))) + { +#ifndef FRONTEND + /* Exit loop and let palloc() throw error below */ + break; +#else + /* Just return NULL with no explicit error */ + return NULL; +#endif + } + } decomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar)); if (decomp_chars == NULL)