mirror of
https://github.com/postgres/postgres.git
synced 2026-04-24 07:40:59 -04:00
Centralize detection of x86 CPU features
We now maintain an array of booleans that indicate which features were detected at runtime. When code wants to check for a given feature, the array is automatically checked if it has been initialized and if not, a single function checks all features at once. Move all x86 feature detection to pg_cpu_x86.c, and move the CRC function choosing logic to the file where the hardware-specific functions are defined, consistent with more recent hardware-specific files in src/port. Reviewed-by: Zsolt Parragi <zsolt.parragi@percona.com> Discussion: https://postgr.es/m/CANWCAZbgEUFw7LuYSVeJ=Tj98R5HoOB1Ffeqk3aLvbw5rU5NTw@mail.gmail.com
This commit is contained in:
parent
d6d9b96b40
commit
16743db061
5 changed files with 112 additions and 127 deletions
50
src/include/port/pg_cpu.h
Normal file
50
src/include/port/pg_cpu.h
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pg_cpu.h
|
||||
* Runtime CPU feature detection
|
||||
*
|
||||
* Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/port/pg_cpu.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef PG_CPU_H
|
||||
#define PG_CPU_H
|
||||
|
||||
#if defined(USE_SSE2) || defined(__i386__)
|
||||
|
||||
typedef enum X86FeatureId
|
||||
{
|
||||
/* Have we run feature detection? */
|
||||
INIT_PG_X86,
|
||||
|
||||
/* scalar registers and 128-bit XMM registers */
|
||||
PG_SSE4_2,
|
||||
PG_POPCNT,
|
||||
|
||||
/* 512-bit ZMM registers */
|
||||
PG_AVX512_BW,
|
||||
PG_AVX512_VL,
|
||||
PG_AVX512_VPCLMULQDQ,
|
||||
PG_AVX512_VPOPCNTDQ,
|
||||
} X86FeatureId;
|
||||
#define X86FeaturesSize (PG_AVX512_VPOPCNTDQ + 1)
|
||||
|
||||
extern PGDLLIMPORT bool X86Features[];
|
||||
|
||||
extern void set_x86_features(void);
|
||||
|
||||
static inline bool
|
||||
x86_feature_available(X86FeatureId feature)
|
||||
{
|
||||
if (X86Features[INIT_PG_X86] == false)
|
||||
set_x86_features();
|
||||
|
||||
return X86Features[feature];
|
||||
}
|
||||
|
||||
#endif /* defined(USE_SSE2) || defined(__i386__) */
|
||||
|
||||
#endif /* PG_CPU_H */
|
||||
|
|
@ -1,12 +1,7 @@
|
|||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pg_cpu_x86.c
|
||||
* Choose between Intel SSE 4.2 and software CRC-32C implementation.
|
||||
*
|
||||
* On first call, checks if the CPU we're running on supports Intel SSE
|
||||
* 4.2. If it does, use the special SSE instructions for CRC-32C
|
||||
* computation. Otherwise, fall back to the pure software implementation
|
||||
* (slicing-by-8).
|
||||
* Runtime CPU feature detection for x86
|
||||
*
|
||||
* Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
|
|
@ -34,9 +29,11 @@
|
|||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
#include "port/pg_crc32c.h"
|
||||
#include "port/pg_cpu.h"
|
||||
|
||||
#ifndef USE_SLICING_BY_8_CRC32C
|
||||
|
||||
/* array indexed by enum X86FeatureId */
|
||||
bool X86Features[X86FeaturesSize] = {0};
|
||||
|
||||
/*
|
||||
* Does XGETBV say the ZMM registers are enabled?
|
||||
|
|
@ -58,22 +55,13 @@ zmm_regs_available(void)
|
|||
}
|
||||
|
||||
/*
|
||||
* This gets called on the first call. It replaces the function pointer
|
||||
* so that subsequent calls are routed directly to the chosen implementation.
|
||||
* Parse the CPU ID info for runtime checks.
|
||||
*/
|
||||
static pg_crc32c
|
||||
pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len)
|
||||
void
|
||||
set_x86_features(void)
|
||||
{
|
||||
unsigned int exx[4] = {0, 0, 0, 0};
|
||||
|
||||
/*
|
||||
* Set fallback. We must guard since slicing-by-8 is not visible
|
||||
* everywhere.
|
||||
*/
|
||||
#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK
|
||||
pg_comp_crc32c = pg_comp_crc32c_sb8;
|
||||
#endif
|
||||
|
||||
#if defined(HAVE__GET_CPUID)
|
||||
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
|
||||
#elif defined(HAVE__CPUID)
|
||||
|
|
@ -82,36 +70,33 @@ pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len)
|
|||
#error cpuid instruction not available
|
||||
#endif
|
||||
|
||||
if ((exx[2] & (1 << 20)) != 0) /* SSE 4.2 */
|
||||
X86Features[PG_SSE4_2] = exx[2] >> 20 & 1;
|
||||
X86Features[PG_POPCNT] = exx[2] >> 23 & 1;
|
||||
|
||||
/* All these features depend on OSXSAVE */
|
||||
if (exx[2] & (1 << 27))
|
||||
{
|
||||
pg_comp_crc32c = pg_comp_crc32c_sse42;
|
||||
/* second cpuid call on leaf 7 to check extended AVX-512 support */
|
||||
|
||||
if (exx[2] & (1 << 27) && /* OSXSAVE */
|
||||
zmm_regs_available())
|
||||
{
|
||||
/* second cpuid call on leaf 7 to check extended AVX-512 support */
|
||||
|
||||
memset(exx, 0, 4 * sizeof(exx[0]));
|
||||
memset(exx, 0, 4 * sizeof(exx[0]));
|
||||
|
||||
#if defined(HAVE__GET_CPUID_COUNT)
|
||||
__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
|
||||
__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
|
||||
#elif defined(HAVE__CPUIDEX)
|
||||
__cpuidex(exx, 7, 0);
|
||||
__cpuidex(exx, 7, 0);
|
||||
#endif
|
||||
|
||||
#ifdef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK
|
||||
if (exx[2] & (1 << 10) && /* VPCLMULQDQ */
|
||||
exx[1] & (1 << 31)) /* AVX512-VL */
|
||||
pg_comp_crc32c = pg_comp_crc32c_avx512;
|
||||
#endif
|
||||
if (zmm_regs_available())
|
||||
{
|
||||
X86Features[PG_AVX512_BW] = exx[1] >> 30 & 1;
|
||||
X86Features[PG_AVX512_VL] = exx[1] >> 31 & 1;
|
||||
|
||||
X86Features[PG_AVX512_VPCLMULQDQ] = exx[2] >> 10 & 1;
|
||||
X86Features[PG_AVX512_VPOPCNTDQ] = exx[2] >> 14 & 1;
|
||||
}
|
||||
}
|
||||
|
||||
return pg_comp_crc32c(crc, data, len);
|
||||
X86Features[INIT_PG_X86] = true;
|
||||
}
|
||||
|
||||
pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose;
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* defined(USE_SSE2) || defined(__i386__) */
|
||||
|
|
|
|||
|
|
@ -19,8 +19,11 @@
|
|||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
#include "port/pg_cpu.h"
|
||||
#include "port/pg_crc32c.h"
|
||||
|
||||
static pg_crc32c pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len);
|
||||
|
||||
pg_attribute_no_sanitize_alignment()
|
||||
pg_attribute_target("sse4.2")
|
||||
pg_crc32c
|
||||
|
|
@ -158,4 +161,33 @@ pg_comp_crc32c_avx512(pg_crc32c crc, const void *data, size_t len)
|
|||
return pg_comp_crc32c_sse42(crc0, buf, len);
|
||||
}
|
||||
|
||||
#endif /* USE_AVX512_CRC32C_WITH_RUNTIME_CHECK */
|
||||
|
||||
/*
|
||||
* This gets called on the first call. It replaces the function pointer
|
||||
* so that subsequent calls are routed directly to the chosen implementation.
|
||||
*/
|
||||
static pg_crc32c
|
||||
pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len)
|
||||
{
|
||||
/*
|
||||
* Set fallback. We must guard since slicing-by-8 is not visible
|
||||
* everywhere.
|
||||
*/
|
||||
#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK
|
||||
pg_comp_crc32c = pg_comp_crc32c_sb8;
|
||||
#endif
|
||||
|
||||
if (x86_feature_available(PG_SSE4_2))
|
||||
pg_comp_crc32c = pg_comp_crc32c_sse42;
|
||||
|
||||
#ifdef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK
|
||||
if (x86_feature_available(PG_AVX512_VL) &&
|
||||
x86_feature_available(PG_AVX512_VPCLMULQDQ))
|
||||
pg_comp_crc32c = pg_comp_crc32c_avx512;
|
||||
#endif
|
||||
|
||||
return pg_comp_crc32c(crc, data, len);
|
||||
}
|
||||
|
||||
pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose;
|
||||
|
|
|
|||
|
|
@ -14,19 +14,12 @@
|
|||
|
||||
#ifdef HAVE_X86_64_POPCNTQ
|
||||
|
||||
#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
|
||||
#include <cpuid.h>
|
||||
#endif
|
||||
|
||||
#ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#include "port/pg_bitutils.h"
|
||||
#include "port/pg_cpu.h"
|
||||
|
||||
/*
|
||||
* The SSE4.2 versions are built regardless of whether we are building the
|
||||
|
|
@ -58,84 +51,9 @@ static uint64 pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask);
|
|||
uint64 (*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose;
|
||||
uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask) = pg_popcount_masked_choose;
|
||||
|
||||
/*
|
||||
* Return true if CPUID indicates that the POPCNT instruction is available.
|
||||
*/
|
||||
static bool
|
||||
pg_popcount_sse42_available(void)
|
||||
{
|
||||
unsigned int exx[4] = {0, 0, 0, 0};
|
||||
|
||||
#if defined(HAVE__GET_CPUID)
|
||||
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
|
||||
#elif defined(HAVE__CPUID)
|
||||
__cpuid(exx, 1);
|
||||
#else
|
||||
#error cpuid instruction not available
|
||||
#endif
|
||||
|
||||
return (exx[2] & (1 << 23)) != 0; /* POPCNT */
|
||||
}
|
||||
|
||||
#ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
|
||||
|
||||
/*
|
||||
* Does CPUID say there's support for XSAVE instructions?
|
||||
*/
|
||||
static inline bool
|
||||
xsave_available(void)
|
||||
{
|
||||
unsigned int exx[4] = {0, 0, 0, 0};
|
||||
|
||||
#if defined(HAVE__GET_CPUID)
|
||||
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
|
||||
#elif defined(HAVE__CPUID)
|
||||
__cpuid(exx, 1);
|
||||
#else
|
||||
#error cpuid instruction not available
|
||||
#endif
|
||||
return (exx[2] & (1 << 27)) != 0; /* osxsave */
|
||||
}
|
||||
|
||||
/*
|
||||
* Does XGETBV say the ZMM registers are enabled?
|
||||
*
|
||||
* NB: Caller is responsible for verifying that xsave_available() returns true
|
||||
* before calling this.
|
||||
*/
|
||||
#ifdef HAVE_XSAVE_INTRINSICS
|
||||
pg_attribute_target("xsave")
|
||||
#endif
|
||||
static inline bool
|
||||
zmm_regs_available(void)
|
||||
{
|
||||
#ifdef HAVE_XSAVE_INTRINSICS
|
||||
return (_xgetbv(0) & 0xe6) == 0xe6;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Does CPUID say there's support for AVX-512 popcount and byte-and-word
|
||||
* instructions?
|
||||
*/
|
||||
static inline bool
|
||||
avx512_popcnt_available(void)
|
||||
{
|
||||
unsigned int exx[4] = {0, 0, 0, 0};
|
||||
|
||||
#if defined(HAVE__GET_CPUID_COUNT)
|
||||
__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
|
||||
#elif defined(HAVE__CPUIDEX)
|
||||
__cpuidex(exx, 7, 0);
|
||||
#else
|
||||
#error cpuid instruction not available
|
||||
#endif
|
||||
return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */
|
||||
(exx[1] & (1 << 30)) != 0; /* avx512-bw */
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if the CPU supports the instructions required for the AVX-512
|
||||
* pg_popcount() implementation.
|
||||
|
|
@ -143,9 +61,8 @@ avx512_popcnt_available(void)
|
|||
static bool
|
||||
pg_popcount_avx512_available(void)
|
||||
{
|
||||
return xsave_available() &&
|
||||
zmm_regs_available() &&
|
||||
avx512_popcnt_available();
|
||||
return x86_feature_available(PG_AVX512_BW) &&
|
||||
x86_feature_available(PG_AVX512_VPOPCNTDQ);
|
||||
}
|
||||
|
||||
#endif /* USE_AVX512_POPCNT_WITH_RUNTIME_CHECK */
|
||||
|
|
@ -159,7 +76,7 @@ pg_popcount_avx512_available(void)
|
|||
static inline void
|
||||
choose_popcount_functions(void)
|
||||
{
|
||||
if (pg_popcount_sse42_available())
|
||||
if (x86_feature_available(PG_POPCNT))
|
||||
{
|
||||
pg_popcount_optimized = pg_popcount_sse42;
|
||||
pg_popcount_masked_optimized = pg_popcount_masked_sse42;
|
||||
|
|
|
|||
|
|
@ -3395,6 +3395,7 @@ X509_NAME
|
|||
X509_NAME_ENTRY
|
||||
X509_STORE
|
||||
X509_STORE_CTX
|
||||
X86FeatureId
|
||||
XLTW_Oper
|
||||
XLogCtlData
|
||||
XLogCtlInsert
|
||||
|
|
|
|||
Loading…
Reference in a new issue