mirror of
https://github.com/postgres/postgres.git
synced 2026-04-09 02:56:13 -04:00
Allow retrieving x86 TSC frequency/flags from CPUID
This adds additional x86 specific CPUID checks for flags needed for determining whether the Time-Stamp Counter (TSC) is usable on a given system, as well as a helper function to retrieve the TSC frequency from CPUID. This is intended for a future patch that will utilize the TSC to lower the overhead of timing instrumentation. In passing, always make pg_cpuid_subleaf reset the variables used for its result, to avoid accidentally using stale results if __get_cpuid_count errors out and the caller doesn't check for it. Author: Lukas Fittl <lukas@fittl.com> Author: David Geier <geidav.pg@gmail.com> Author: Andres Freund <andres@anarazel.de> Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: David Geier <geidav.pg@gmail.com> Reviewed-by: John Naylor <john.naylor@postgresql.org> Reviewed-by: Jakub Wartak <jakub.wartak@enterprisedb.com> (in an earlier version) Discussion: https://www.postgresql.org/message-id/flat/20200612232810.f46nbqkdhbutzqdg%40alap3.anarazel.de
This commit is contained in:
parent
0022622c93
commit
bcb2cf41f9
2 changed files with 145 additions and 5 deletions
|
|
@ -32,8 +32,16 @@ typedef enum X86FeatureId
|
|||
PG_AVX512_VL,
|
||||
PG_AVX512_VPCLMULQDQ,
|
||||
PG_AVX512_VPOPCNTDQ,
|
||||
|
||||
/* identification */
|
||||
PG_HYPERVISOR,
|
||||
|
||||
/* Time-Stamp Counter (TSC) flags */
|
||||
PG_RDTSCP,
|
||||
PG_TSC_INVARIANT,
|
||||
PG_TSC_ADJUST,
|
||||
} X86FeatureId;
|
||||
#define X86FeaturesSize (PG_AVX512_VPOPCNTDQ + 1)
|
||||
#define X86FeaturesSize (PG_TSC_ADJUST + 1)
|
||||
|
||||
extern PGDLLIMPORT bool X86Features[];
|
||||
|
||||
|
|
@ -48,6 +56,8 @@ x86_feature_available(X86FeatureId feature)
|
|||
return X86Features[feature];
|
||||
}
|
||||
|
||||
extern uint32 x86_tsc_frequency_khz(void);
|
||||
|
||||
#endif /* defined(USE_SSE2) || defined(__i386__) */
|
||||
|
||||
#endif /* PG_CPU_H */
|
||||
|
|
|
|||
|
|
@ -80,13 +80,13 @@ pg_cpuid(int leaf, unsigned int *reg)
|
|||
static inline bool
|
||||
pg_cpuid_subleaf(int leaf, int subleaf, unsigned int *reg)
|
||||
{
|
||||
memset(reg, 0, 4 * sizeof(unsigned int));
|
||||
#if defined(HAVE__GET_CPUID_COUNT)
|
||||
return __get_cpuid_count(leaf, subleaf, ®[EAX], ®[EBX], ®[ECX], ®[EDX]) == 1;
|
||||
#elif defined(HAVE__CPUIDEX)
|
||||
__cpuidex((int *) reg, leaf, subleaf);
|
||||
return true;
|
||||
#else
|
||||
memset(reg, 0, 4 * sizeof(unsigned int));
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
|
@ -101,19 +101,24 @@ void
|
|||
set_x86_features(void)
|
||||
{
|
||||
unsigned int reg[4] = {0};
|
||||
bool have_osxsave;
|
||||
|
||||
pg_cpuid(0x01, reg);
|
||||
|
||||
X86Features[PG_SSE4_2] = reg[ECX] >> 20 & 1;
|
||||
X86Features[PG_POPCNT] = reg[ECX] >> 23 & 1;
|
||||
X86Features[PG_HYPERVISOR] = reg[ECX] >> 31 & 1;
|
||||
have_osxsave = reg[ECX] >> 27 & 1;
|
||||
|
||||
pg_cpuid_subleaf(0x07, 0, reg);
|
||||
|
||||
X86Features[PG_TSC_ADJUST] = reg[EBX] >> 1 & 1;
|
||||
|
||||
/* leaf 7 features that depend on OSXSAVE */
|
||||
if (reg[ECX] & (1 << 27))
|
||||
if (have_osxsave)
|
||||
{
|
||||
uint32 xcr0_val = 0;
|
||||
|
||||
pg_cpuid_subleaf(0x07, 0, reg);
|
||||
|
||||
#ifdef HAVE_XSAVE_INTRINSICS
|
||||
/* get value of Extended Control Register */
|
||||
xcr0_val = _xgetbv(0);
|
||||
|
|
@ -135,7 +140,132 @@ set_x86_features(void)
|
|||
}
|
||||
}
|
||||
|
||||
/* Check for other TSC related flags */
|
||||
pg_cpuid(0x80000001, reg);
|
||||
X86Features[PG_RDTSCP] = reg[EDX] >> 27 & 1;
|
||||
|
||||
pg_cpuid(0x80000007, reg);
|
||||
X86Features[PG_TSC_INVARIANT] = reg[EDX] >> 8 & 1;
|
||||
|
||||
X86Features[INIT_PG_X86] = true;
|
||||
}
|
||||
|
||||
/* TSC (Time-stamp Counter) handling code */
|
||||
|
||||
static uint32 x86_hypervisor_tsc_frequency_khz(void);
|
||||
|
||||
/*
|
||||
* Determine the TSC frequency of the CPU through CPUID, where supported.
|
||||
*
|
||||
* Needed to interpret the tick value returned by RDTSC/RDTSCP. Return value of
|
||||
* 0 indicates the frequency information was not accessible via CPUID.
|
||||
*/
|
||||
uint32
|
||||
x86_tsc_frequency_khz(void)
|
||||
{
|
||||
unsigned int reg[4] = {0};
|
||||
|
||||
if (x86_feature_available(PG_HYPERVISOR))
|
||||
{
|
||||
uint32 freq = x86_hypervisor_tsc_frequency_khz();
|
||||
|
||||
/*
|
||||
* If the hypervisor specific logic didn't figure out the frequency,
|
||||
* it's possible (although not likely, as often that's hidden from
|
||||
* guests) that the non-virtualized logic can figure out the
|
||||
* frequency.
|
||||
*/
|
||||
if (freq > 0)
|
||||
return freq;
|
||||
}
|
||||
|
||||
/*
|
||||
* On modern Intel CPUs, the TSC is implemented by invariant timekeeping
|
||||
* hardware, also called "Always Running Timer", or ART. The ART stays
|
||||
* consistent even if the CPU changes frequency due to changing power
|
||||
* levels.
|
||||
*
|
||||
* As documented in "Determining the Processor Base Frequency" in the
|
||||
* "Intel® 64 and IA-32 Architectures Software Developer's Manual",
|
||||
* February 2026 Edition, we can get the TSC frequency as follows:
|
||||
*
|
||||
* Nominal TSC frequency = ( CPUID.15H:ECX[31:0] * CPUID.15H:EBX[31:0] ) /
|
||||
* CPUID.15H:EAX[31:0]
|
||||
*
|
||||
* With CPUID.15H:ECX representing the nominal core crystal clock
|
||||
* frequency, and EAX/EBX representing values used to translate the TSC
|
||||
* value to that frequency, see "Chapter 20.17 "Time-Stamp Counter" of
|
||||
* that manual.
|
||||
*
|
||||
* Older Intel CPUs, and other vendors do not set CPUID.15H:ECX, and as
|
||||
* such we fall back to alternate approaches.
|
||||
*/
|
||||
pg_cpuid(0x15, reg);
|
||||
if (reg[ECX] > 0)
|
||||
{
|
||||
/*
|
||||
* EBX not being set indicates invariant TSC is not available. Require
|
||||
* EAX being non-zero too, to avoid a theoretical divide by zero.
|
||||
*/
|
||||
if (reg[EAX] == 0 || reg[EBX] == 0)
|
||||
return 0;
|
||||
|
||||
return reg[ECX] / 1000 * reg[EBX] / reg[EAX];
|
||||
}
|
||||
|
||||
/*
|
||||
* When CPUID.15H is not available/incomplete, we can instead try to get
|
||||
* the processor base frequency in MHz from CPUID.16H:EAX, the "Processor
|
||||
* Frequency Information Leaf".
|
||||
*/
|
||||
pg_cpuid(0x16, reg);
|
||||
if (reg[EAX] > 0)
|
||||
return reg[EAX] * 1000;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Support for reading TSC frequency for hypervisors passing it to a guest VM.
|
||||
*
|
||||
* Two Hypervisors (VMware and KVM) are known to make TSC frequency in KHz
|
||||
* available at the vendor-specific 0x40000010 leaf in the EAX register.
|
||||
*
|
||||
* For some other Hypervisors that have an invariant TSC, e.g. HyperV, we would
|
||||
* need to access a model-specific register (MSR) to get the frequency. MSRs are
|
||||
* separate from CPUID and typically not available for unprivileged processes,
|
||||
* so we can't get the frequency this way.
|
||||
*/
|
||||
#define CPUID_HYPERVISOR_VMWARE(r) (r[EBX] == 0x61774d56 && r[ECX] == 0x4d566572 && r[EDX] == 0x65726177) /* VMwareVMware */
|
||||
#define CPUID_HYPERVISOR_KVM(r) (r[EBX] == 0x4b4d564b && r[ECX] == 0x564b4d56 && r[EDX] == 0x0000004d) /* KVMKVMKVM */
|
||||
static uint32
|
||||
x86_hypervisor_tsc_frequency_khz(void)
|
||||
{
|
||||
unsigned int reg[4] = {0};
|
||||
|
||||
#if defined(HAVE__CPUIDEX)
|
||||
|
||||
/*
|
||||
* The hypervisor is determined using the 0x40000000 Hypervisor
|
||||
* information leaf, which requires use of __cpuidex to set ECX to 0 to
|
||||
* access it.
|
||||
*
|
||||
* The similar __get_cpuid_count function does not work as expected since
|
||||
* it contains a check for __get_cpuid_max, which has been observed to be
|
||||
* lower than the special Hypervisor leaf, despite it being available.
|
||||
*/
|
||||
__cpuidex((int *) reg, 0x40000000, 0);
|
||||
|
||||
if (reg[EAX] >= 0x40000010 && (CPUID_HYPERVISOR_VMWARE(reg) || CPUID_HYPERVISOR_KVM(reg)))
|
||||
{
|
||||
__cpuidex((int *) reg, 0x40000010, 0);
|
||||
if (reg[EAX] > 0)
|
||||
return reg[EAX];
|
||||
}
|
||||
#endif /* HAVE__CPUIDEX */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#endif /* defined(USE_SSE2) || defined(__i386__) */
|
||||
|
|
|
|||
Loading…
Reference in a new issue