mirror of
https://github.com/postgres/postgres.git
synced 2026-04-10 19:47:07 -04:00
Allow retrieving x86 TSC frequency/flags from CPUID
This adds additional x86 specific CPUID checks for flags needed for determining whether the Time-Stamp Counter (TSC) is usable on a given system, as well as a helper function to retrieve the TSC frequency from CPUID. This is intended for a future patch that will utilize the TSC to lower the overhead of timing instrumentation. In passing, always make pg_cpuid_subleaf reset the variables used for its result, to avoid accidentally using stale results if __get_cpuid_count errors out and the caller doesn't check for it. Author: Lukas Fittl <lukas@fittl.com> Author: David Geier <geidav.pg@gmail.com> Author: Andres Freund <andres@anarazel.de> Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: David Geier <geidav.pg@gmail.com> Reviewed-by: John Naylor <john.naylor@postgresql.org> Reviewed-by: Jakub Wartak <jakub.wartak@enterprisedb.com> (in an earlier version) Discussion: https://www.postgresql.org/message-id/flat/20200612232810.f46nbqkdhbutzqdg%40alap3.anarazel.de
This commit is contained in:
parent
0022622c93
commit
bcb2cf41f9
2 changed files with 145 additions and 5 deletions
|
|
@ -32,8 +32,16 @@ typedef enum X86FeatureId
|
||||||
PG_AVX512_VL,
|
PG_AVX512_VL,
|
||||||
PG_AVX512_VPCLMULQDQ,
|
PG_AVX512_VPCLMULQDQ,
|
||||||
PG_AVX512_VPOPCNTDQ,
|
PG_AVX512_VPOPCNTDQ,
|
||||||
|
|
||||||
|
/* identification */
|
||||||
|
PG_HYPERVISOR,
|
||||||
|
|
||||||
|
/* Time-Stamp Counter (TSC) flags */
|
||||||
|
PG_RDTSCP,
|
||||||
|
PG_TSC_INVARIANT,
|
||||||
|
PG_TSC_ADJUST,
|
||||||
} X86FeatureId;
|
} X86FeatureId;
|
||||||
#define X86FeaturesSize (PG_AVX512_VPOPCNTDQ + 1)
|
#define X86FeaturesSize (PG_TSC_ADJUST + 1)
|
||||||
|
|
||||||
extern PGDLLIMPORT bool X86Features[];
|
extern PGDLLIMPORT bool X86Features[];
|
||||||
|
|
||||||
|
|
@ -48,6 +56,8 @@ x86_feature_available(X86FeatureId feature)
|
||||||
return X86Features[feature];
|
return X86Features[feature];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern uint32 x86_tsc_frequency_khz(void);
|
||||||
|
|
||||||
#endif /* defined(USE_SSE2) || defined(__i386__) */
|
#endif /* defined(USE_SSE2) || defined(__i386__) */
|
||||||
|
|
||||||
#endif /* PG_CPU_H */
|
#endif /* PG_CPU_H */
|
||||||
|
|
|
||||||
|
|
@ -80,13 +80,13 @@ pg_cpuid(int leaf, unsigned int *reg)
|
||||||
static inline bool
|
static inline bool
|
||||||
pg_cpuid_subleaf(int leaf, int subleaf, unsigned int *reg)
|
pg_cpuid_subleaf(int leaf, int subleaf, unsigned int *reg)
|
||||||
{
|
{
|
||||||
|
memset(reg, 0, 4 * sizeof(unsigned int));
|
||||||
#if defined(HAVE__GET_CPUID_COUNT)
|
#if defined(HAVE__GET_CPUID_COUNT)
|
||||||
return __get_cpuid_count(leaf, subleaf, ®[EAX], ®[EBX], ®[ECX], ®[EDX]) == 1;
|
return __get_cpuid_count(leaf, subleaf, ®[EAX], ®[EBX], ®[ECX], ®[EDX]) == 1;
|
||||||
#elif defined(HAVE__CPUIDEX)
|
#elif defined(HAVE__CPUIDEX)
|
||||||
__cpuidex((int *) reg, leaf, subleaf);
|
__cpuidex((int *) reg, leaf, subleaf);
|
||||||
return true;
|
return true;
|
||||||
#else
|
#else
|
||||||
memset(reg, 0, 4 * sizeof(unsigned int));
|
|
||||||
return false;
|
return false;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
@ -101,19 +101,24 @@ void
|
||||||
set_x86_features(void)
|
set_x86_features(void)
|
||||||
{
|
{
|
||||||
unsigned int reg[4] = {0};
|
unsigned int reg[4] = {0};
|
||||||
|
bool have_osxsave;
|
||||||
|
|
||||||
pg_cpuid(0x01, reg);
|
pg_cpuid(0x01, reg);
|
||||||
|
|
||||||
X86Features[PG_SSE4_2] = reg[ECX] >> 20 & 1;
|
X86Features[PG_SSE4_2] = reg[ECX] >> 20 & 1;
|
||||||
X86Features[PG_POPCNT] = reg[ECX] >> 23 & 1;
|
X86Features[PG_POPCNT] = reg[ECX] >> 23 & 1;
|
||||||
|
X86Features[PG_HYPERVISOR] = reg[ECX] >> 31 & 1;
|
||||||
|
have_osxsave = reg[ECX] >> 27 & 1;
|
||||||
|
|
||||||
|
pg_cpuid_subleaf(0x07, 0, reg);
|
||||||
|
|
||||||
|
X86Features[PG_TSC_ADJUST] = reg[EBX] >> 1 & 1;
|
||||||
|
|
||||||
/* leaf 7 features that depend on OSXSAVE */
|
/* leaf 7 features that depend on OSXSAVE */
|
||||||
if (reg[ECX] & (1 << 27))
|
if (have_osxsave)
|
||||||
{
|
{
|
||||||
uint32 xcr0_val = 0;
|
uint32 xcr0_val = 0;
|
||||||
|
|
||||||
pg_cpuid_subleaf(0x07, 0, reg);
|
|
||||||
|
|
||||||
#ifdef HAVE_XSAVE_INTRINSICS
|
#ifdef HAVE_XSAVE_INTRINSICS
|
||||||
/* get value of Extended Control Register */
|
/* get value of Extended Control Register */
|
||||||
xcr0_val = _xgetbv(0);
|
xcr0_val = _xgetbv(0);
|
||||||
|
|
@ -135,7 +140,132 @@ set_x86_features(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Check for other TSC related flags */
|
||||||
|
pg_cpuid(0x80000001, reg);
|
||||||
|
X86Features[PG_RDTSCP] = reg[EDX] >> 27 & 1;
|
||||||
|
|
||||||
|
pg_cpuid(0x80000007, reg);
|
||||||
|
X86Features[PG_TSC_INVARIANT] = reg[EDX] >> 8 & 1;
|
||||||
|
|
||||||
X86Features[INIT_PG_X86] = true;
|
X86Features[INIT_PG_X86] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* TSC (Time-stamp Counter) handling code */
|
||||||
|
|
||||||
|
static uint32 x86_hypervisor_tsc_frequency_khz(void);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Determine the TSC frequency of the CPU through CPUID, where supported.
|
||||||
|
*
|
||||||
|
* Needed to interpret the tick value returned by RDTSC/RDTSCP. Return value of
|
||||||
|
* 0 indicates the frequency information was not accessible via CPUID.
|
||||||
|
*/
|
||||||
|
uint32
|
||||||
|
x86_tsc_frequency_khz(void)
|
||||||
|
{
|
||||||
|
unsigned int reg[4] = {0};
|
||||||
|
|
||||||
|
if (x86_feature_available(PG_HYPERVISOR))
|
||||||
|
{
|
||||||
|
uint32 freq = x86_hypervisor_tsc_frequency_khz();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the hypervisor specific logic didn't figure out the frequency,
|
||||||
|
* it's possible (although not likely, as often that's hidden from
|
||||||
|
* guests) that the non-virtualized logic can figure out the
|
||||||
|
* frequency.
|
||||||
|
*/
|
||||||
|
if (freq > 0)
|
||||||
|
return freq;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On modern Intel CPUs, the TSC is implemented by invariant timekeeping
|
||||||
|
* hardware, also called "Always Running Timer", or ART. The ART stays
|
||||||
|
* consistent even if the CPU changes frequency due to changing power
|
||||||
|
* levels.
|
||||||
|
*
|
||||||
|
* As documented in "Determining the Processor Base Frequency" in the
|
||||||
|
* "Intel® 64 and IA-32 Architectures Software Developer's Manual",
|
||||||
|
* February 2026 Edition, we can get the TSC frequency as follows:
|
||||||
|
*
|
||||||
|
* Nominal TSC frequency = ( CPUID.15H:ECX[31:0] * CPUID.15H:EBX[31:0] ) /
|
||||||
|
* CPUID.15H:EAX[31:0]
|
||||||
|
*
|
||||||
|
* With CPUID.15H:ECX representing the nominal core crystal clock
|
||||||
|
* frequency, and EAX/EBX representing values used to translate the TSC
|
||||||
|
* value to that frequency, see "Chapter 20.17 "Time-Stamp Counter" of
|
||||||
|
* that manual.
|
||||||
|
*
|
||||||
|
* Older Intel CPUs, and other vendors do not set CPUID.15H:ECX, and as
|
||||||
|
* such we fall back to alternate approaches.
|
||||||
|
*/
|
||||||
|
pg_cpuid(0x15, reg);
|
||||||
|
if (reg[ECX] > 0)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* EBX not being set indicates invariant TSC is not available. Require
|
||||||
|
* EAX being non-zero too, to avoid a theoretical divide by zero.
|
||||||
|
*/
|
||||||
|
if (reg[EAX] == 0 || reg[EBX] == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return reg[ECX] / 1000 * reg[EBX] / reg[EAX];
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When CPUID.15H is not available/incomplete, we can instead try to get
|
||||||
|
* the processor base frequency in MHz from CPUID.16H:EAX, the "Processor
|
||||||
|
* Frequency Information Leaf".
|
||||||
|
*/
|
||||||
|
pg_cpuid(0x16, reg);
|
||||||
|
if (reg[EAX] > 0)
|
||||||
|
return reg[EAX] * 1000;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Support for reading TSC frequency for hypervisors passing it to a guest VM.
|
||||||
|
*
|
||||||
|
* Two Hypervisors (VMware and KVM) are known to make TSC frequency in KHz
|
||||||
|
* available at the vendor-specific 0x40000010 leaf in the EAX register.
|
||||||
|
*
|
||||||
|
* For some other Hypervisors that have an invariant TSC, e.g. HyperV, we would
|
||||||
|
* need to access a model-specific register (MSR) to get the frequency. MSRs are
|
||||||
|
* separate from CPUID and typically not available for unprivileged processes,
|
||||||
|
* so we can't get the frequency this way.
|
||||||
|
*/
|
||||||
|
#define CPUID_HYPERVISOR_VMWARE(r) (r[EBX] == 0x61774d56 && r[ECX] == 0x4d566572 && r[EDX] == 0x65726177) /* VMwareVMware */
|
||||||
|
#define CPUID_HYPERVISOR_KVM(r) (r[EBX] == 0x4b4d564b && r[ECX] == 0x564b4d56 && r[EDX] == 0x0000004d) /* KVMKVMKVM */
|
||||||
|
static uint32
|
||||||
|
x86_hypervisor_tsc_frequency_khz(void)
|
||||||
|
{
|
||||||
|
unsigned int reg[4] = {0};
|
||||||
|
|
||||||
|
#if defined(HAVE__CPUIDEX)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The hypervisor is determined using the 0x40000000 Hypervisor
|
||||||
|
* information leaf, which requires use of __cpuidex to set ECX to 0 to
|
||||||
|
* access it.
|
||||||
|
*
|
||||||
|
* The similar __get_cpuid_count function does not work as expected since
|
||||||
|
* it contains a check for __get_cpuid_max, which has been observed to be
|
||||||
|
* lower than the special Hypervisor leaf, despite it being available.
|
||||||
|
*/
|
||||||
|
__cpuidex((int *) reg, 0x40000000, 0);
|
||||||
|
|
||||||
|
if (reg[EAX] >= 0x40000010 && (CPUID_HYPERVISOR_VMWARE(reg) || CPUID_HYPERVISOR_KVM(reg)))
|
||||||
|
{
|
||||||
|
__cpuidex((int *) reg, 0x40000010, 0);
|
||||||
|
if (reg[EAX] > 0)
|
||||||
|
return reg[EAX];
|
||||||
|
}
|
||||||
|
#endif /* HAVE__CPUIDEX */
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif /* defined(USE_SSE2) || defined(__i386__) */
|
#endif /* defined(USE_SSE2) || defined(__i386__) */
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue