Optimize sorting and deduplicating trigrams

Use templated qsort() so that the comparison function can be
inlined. To speed up qunique(), use a specialized comparison function
that only checks for equality.

Author: David Geier <geidav.pg@gmail.com>
Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>
Discussion: https://www.postgresql.org/message-id/2a76b5ef-4b12-4023-93a1-eed6e64968f3@gmail.com
This commit is contained in:
Heikki Linnakangas 2026-04-07 14:11:25 +03:00
parent 884f9b3c76
commit 9f3755ea07

View file

@ -226,6 +226,55 @@ CMPTRGM_CHOOSE(const void *a, const void *b)
return CMPTRGM(a, b);
}
#define ST_SORT trigram_qsort_signed
#define ST_ELEMENT_TYPE_VOID
#define ST_COMPARE(a, b) CMPTRGM_SIGNED(a, b)
#define ST_SCOPE static
#define ST_DEFINE
#define ST_DECLARE
#include "lib/sort_template.h"
#define ST_SORT trigram_qsort_unsigned
#define ST_ELEMENT_TYPE_VOID
#define ST_COMPARE(a, b) CMPTRGM_UNSIGNED(a, b)
#define ST_SCOPE static
#define ST_DEFINE
#define ST_DECLARE
#include "lib/sort_template.h"
/* Sort an array of trigrams, handling signedess correctly */
static void
trigram_qsort(trgm *array, size_t n)
{
if (GetDefaultCharSignedness())
trigram_qsort_signed(array, n, sizeof(trgm));
else
trigram_qsort_unsigned(array, n, sizeof(trgm));
}
/*
* Compare two trigrams for equality. This has the same signature as
* comparison functions used for sorting, so that this can be used with
* qunique(). This doesn't need separate versions for "signed char" and "
* unsigned char" because equality is the same for both.
*/
static inline int
CMPTRGM_EQ(const void *a, const void *b)
{
char *aa = (char *) a;
char *bb = (char *) b;
return aa[0] != bb[0] || aa[1] != bb[1] || aa[2] != bb[2] ? 1 : 0;
}
/* Deduplicate an array of trigrams */
static size_t
trigram_qunique(trgm *array, size_t n)
{
return qunique(array, n, sizeof(trgm), CMPTRGM_EQ);
}
/*
* Deprecated function.
* Use "pg_trgm.similarity_threshold" GUC variable instead of this function.
@ -281,12 +330,6 @@ show_limit(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT4(similarity_threshold);
}
static int
comp_trgm(const void *a, const void *b)
{
return CMPTRGM(a, b);
}
/*
* Finds first word in string, returns pointer to the word,
* endword points to the character after word
@ -569,8 +612,8 @@ generate_trgm(char *str, int slen)
*/
if (len > 1)
{
qsort(GETARR(trg), len, sizeof(trgm), comp_trgm);
len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
trigram_qsort(GETARR(trg), len);
len = trigram_qunique(GETARR(trg), len);
}
SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
@ -1100,8 +1143,8 @@ generate_wildcard_trgm(const char *str, int slen)
len = arr.length;
if (len > 1)
{
qsort(GETARR(trg), len, sizeof(trgm), comp_trgm);
len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
trigram_qsort(GETARR(trg), len);
len = trigram_qunique(GETARR(trg), len);
}
trg->flag = ARRKEY;