diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 584bc9f49dd..8bdbb6db0f9 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -2533,6 +2533,72 @@ include_dir 'conf.d' + + Timing + + + + timing_clock_source (enum) + + timing_clock_source configuration parameter + + + + + Selects the method for making timing measurements using the OS or + specialized CPU instructions. Possible values are: + + + + auto (automatically chooses TSC + clock source on supported x86-64 CPUs, otherwise uses the OS system + clock) + + + + + system (measures timing using the OS system clock) + + + + + tsc (measures timing with a CPU instruction, e.g. + using RDTSC/RDTSCP on x86-64) + + + + The default is auto. Only superusers can change this + setting. Changing the setting during query execution is not recommended + and may cause interval timings to jump significantly or produce negative + values. + + + + Time-Stamp Counter + TSC + + TSC + If enabled, the TSC clock source, named after the + Time-Stamp Counter on x86-64, will use specialized CPU instructions when + measuring time intervals. This lowers timing overhead compared to reading + the OS system clock, and reduces the measurement error on top of the + actual runtime, for example with EXPLAIN ANALYZE. + + + RDTSC + On x86-64 CPUs the TSC clock source utilizes the + RDTSC instruction for EXPLAIN ANALYZE. + For timings that require higher precision the RDTSCP + instruction is used, which avoids inaccuracies due to CPU instruction + re-ordering. Use of the TSC clock source is not + supported on older x86-64 CPUs and other architectures, and is not + advised on systems that utilize an emulated TSC, as it + is likely slower than the system clock source. + + + + + Background Writer diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c index 011a9684df0..4c3aec7fdee 100644 --- a/src/backend/executor/instrument.c +++ b/src/backend/executor/instrument.c @@ -16,6 +16,8 @@ #include #include "executor/instrument.h" +#include "portability/instr_time.h" +#include "utils/guc_hooks.h" BufferUsage pgBufferUsage; static BufferUsage save_pgBufferUsage; @@ -52,7 +54,7 @@ InstrStart(Instrumentation *instr) if (!INSTR_TIME_IS_ZERO(instr->starttime)) elog(ERROR, "InstrStart called twice in a row"); else - INSTR_TIME_SET_CURRENT(instr->starttime); + INSTR_TIME_SET_CURRENT_FAST(instr->starttime); } /* save buffer usage totals at start, if needed */ @@ -78,7 +80,7 @@ InstrStopCommon(Instrumentation *instr, instr_time *accum_time) if (INSTR_TIME_IS_ZERO(instr->starttime)) elog(ERROR, "InstrStop called without start"); - INSTR_TIME_SET_CURRENT(endtime); + INSTR_TIME_SET_CURRENT_FAST(endtime); INSTR_TIME_ACCUM_DIFF(*accum_time, endtime, instr->starttime); INSTR_TIME_SET_ZERO(instr->starttime); @@ -345,3 +347,75 @@ WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub) dst->wal_fpi_bytes += add->wal_fpi_bytes - sub->wal_fpi_bytes; dst->wal_buffers_full += add->wal_buffers_full - sub->wal_buffers_full; } + +/* GUC hooks for timing_clock_source */ + +bool +check_timing_clock_source(int *newval, void **extra, GucSource source) +{ + /* + * Do nothing if timing is not initialized. This is only expected on child + * processes in EXEC_BACKEND builds, as GUC hooks can be called during + * InitializeGUCOptions() before InitProcessGlobals() has had a chance to + * run pg_initialize_timing(). Instead, TSC will be initialized via + * restore_backend_variables. + */ +#ifdef EXEC_BACKEND + if (!timing_initialized) + return true; +#else + Assert(timing_initialized); +#endif + +#if PG_INSTR_TSC_CLOCK + pg_initialize_timing_tsc(); + + if (*newval == TIMING_CLOCK_SOURCE_TSC && timing_tsc_frequency_khz <= 0) + { + GUC_check_errdetail("TSC is not supported as timing clock source"); + return false; + } +#endif + + return true; +} + +void +assign_timing_clock_source(int newval, void *extra) +{ +#ifdef EXEC_BACKEND + if (!timing_initialized) + return; +#else + Assert(timing_initialized); +#endif + + /* + * Ignore the return code since the check hook already verified TSC is + * usable if it's explicitly requested. + */ + pg_set_timing_clock_source(newval); +} + +const char * +show_timing_clock_source(void) +{ + switch (timing_clock_source) + { + case TIMING_CLOCK_SOURCE_AUTO: +#if PG_INSTR_TSC_CLOCK + if (pg_current_timing_clock_source() == TIMING_CLOCK_SOURCE_TSC) + return "auto (tsc)"; +#endif + return "auto (system)"; + case TIMING_CLOCK_SOURCE_SYSTEM: + return "system"; +#if PG_INSTR_TSC_CLOCK + case TIMING_CLOCK_SOURCE_TSC: + return "tsc"; +#endif + } + + /* unreachable */ + return "?"; +} diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c index ed0f4f2d234..8f3cfea880c 100644 --- a/src/backend/postmaster/launch_backend.c +++ b/src/backend/postmaster/launch_backend.c @@ -57,6 +57,7 @@ #ifdef EXEC_BACKEND #include "nodes/queryjumble.h" +#include "portability/instr_time.h" #include "storage/pg_shmem.h" #include "storage/spin.h" #endif @@ -129,6 +130,8 @@ typedef struct int MyPMChildSlot; + int32 timing_tsc_frequency_khz; + /* * These are only used by backend processes, but are here because passing * a socket needs some special handling on Windows. 'client_sock' is an @@ -750,6 +753,8 @@ save_backend_variables(BackendParameters *param, param->MaxBackends = MaxBackends; param->num_pmchild_slots = num_pmchild_slots; + param->timing_tsc_frequency_khz = timing_tsc_frequency_khz; + #ifdef WIN32 param->PostmasterHandle = PostmasterHandle; if (!write_duplicated_handle(¶m->initial_signal_pipe, @@ -1004,6 +1009,12 @@ restore_backend_variables(BackendParameters *param) MaxBackends = param->MaxBackends; num_pmchild_slots = param->num_pmchild_slots; + timing_tsc_frequency_khz = param->timing_tsc_frequency_khz; + + /* Re-run logic usually done by assign_timing_clock_source */ + pg_initialize_timing(); + pg_set_timing_clock_source(timing_clock_source); + #ifdef WIN32 PostmasterHandle = param->PostmasterHandle; pgwin32_initial_signal_pipe = param->initial_signal_pipe; diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat index 632f3ba4989..86c1eba5dab 100644 --- a/src/backend/utils/misc/guc_parameters.dat +++ b/src/backend/utils/misc/guc_parameters.dat @@ -3060,6 +3060,17 @@ assign_hook => 'assign_timezone_abbreviations', }, +{ name => 'timing_clock_source', type => 'enum', context => 'PGC_SUSET', group => 'RESOURCES_TIME', + short_desc => 'Controls the clock source used for collecting timing measurements.', + long_desc => 'This enables the use of specialized clock sources, specifically the RDTSC clock source on x86-64 systems (if available), to support timing measurements with lower overhead during EXPLAIN and other instrumentation.', + variable => 'timing_clock_source', + boot_val => 'TIMING_CLOCK_SOURCE_AUTO', + options => 'timing_clock_source_options', + check_hook => 'check_timing_clock_source', + assign_hook => 'assign_timing_clock_source', + show_hook => 'show_timing_clock_source', +}, + { name => 'trace_connection_negotiation', type => 'bool', context => 'PGC_POSTMASTER', group => 'DEVELOPER_OPTIONS', short_desc => 'Logs details of pre-authentication connection handshake.', flags => 'GUC_NOT_IN_SAMPLE', diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index d9ca13baff9..290ccbc543e 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -90,6 +90,7 @@ #include "storage/standby.h" #include "tcop/backend_startup.h" #include "tcop/tcopprot.h" +#include "portability/instr_time.h" #include "tsearch/ts_cache.h" #include "utils/builtins.h" #include "utils/bytea.h" @@ -373,6 +374,15 @@ static const struct config_enum_entry huge_pages_options[] = { {NULL, 0, false} }; +static const struct config_enum_entry timing_clock_source_options[] = { + {"auto", TIMING_CLOCK_SOURCE_AUTO, false}, + {"system", TIMING_CLOCK_SOURCE_SYSTEM, false}, +#if PG_INSTR_TSC_CLOCK + {"tsc", TIMING_CLOCK_SOURCE_TSC, false}, +#endif + {NULL, 0, false} +}; + static const struct config_enum_entry huge_pages_status_options[] = { {"off", HUGE_PAGES_OFF, false}, {"on", HUGE_PAGES_ON, false}, @@ -731,6 +741,7 @@ const char *const config_group_names[] = [CONN_AUTH_TCP] = gettext_noop("Connections and Authentication / TCP Settings"), [CONN_AUTH_AUTH] = gettext_noop("Connections and Authentication / Authentication"), [CONN_AUTH_SSL] = gettext_noop("Connections and Authentication / SSL"), + [RESOURCES_TIME] = gettext_noop("Resource Usage / Time"), [RESOURCES_MEM] = gettext_noop("Resource Usage / Memory"), [RESOURCES_DISK] = gettext_noop("Resource Usage / Disk"), [RESOURCES_KERNEL] = gettext_noop("Resource Usage / Kernel Resources"), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 2e10eb4a36a..4f2bbf05295 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -196,6 +196,10 @@ #max_files_per_process = 1000 # min 64 # (change requires restart) +# - Time - + +#timing_clock_source = auto # auto, system, tsc (if supported) + # - Background Writer - #bgwriter_delay = 200ms # 10-10000ms between rounds diff --git a/src/common/instr_time.c b/src/common/instr_time.c index 9271113a287..fc6e1852c30 100644 --- a/src/common/instr_time.c +++ b/src/common/instr_time.c @@ -18,14 +18,17 @@ #include "postgres_fe.h" #endif +#include + +#include "port/pg_cpu.h" #include "portability/instr_time.h" /* * Stores what the number of ticks needs to be multiplied with to end up * with nanoseconds using integer math. * - * On certain platforms (currently Windows) the ticks to nanoseconds conversion - * requires floating point math because: + * In certain cases (TSC on x86-64, and QueryPerformanceCounter on Windows) + * the ticks to nanoseconds conversion requires floating point math because: * * sec = ticks / frequency_hz * ns = ticks / frequency_hz * 1,000,000,000 @@ -51,16 +54,26 @@ * value to encourage compilers to generate better assembly, since we can be * sure these values are not negative. * - * On all other platforms we are using clock_gettime(), which uses nanoseconds + * In all other cases we are using clock_gettime(), which uses nanoseconds * as ticks. Hence, we set the multiplier to zero, which causes pg_ticks_to_ns * to return the original value. */ uint64 ticks_per_ns_scaled = 0; uint64 max_ticks_no_overflow = 0; bool timing_initialized = false; +int timing_clock_source = TIMING_CLOCK_SOURCE_AUTO; +bool timing_tsc_enabled = false; +int32 timing_tsc_frequency_khz = -1; + +static void set_ticks_per_ns(void); static void set_ticks_per_ns_system(void); +#if PG_INSTR_TSC_CLOCK +static bool tsc_use_by_default(void); +static void set_ticks_per_ns_for_tsc(void); +#endif + /* * Initializes timing infrastructure. Must be called before making any use * of INSTR* macros. @@ -75,6 +88,49 @@ pg_initialize_timing(void) timing_initialized = true; } +bool +pg_set_timing_clock_source(TimingClockSourceType source) +{ + Assert(timing_initialized); + +#if PG_INSTR_TSC_CLOCK + pg_initialize_timing_tsc(); + + switch (source) + { + case TIMING_CLOCK_SOURCE_AUTO: + timing_tsc_enabled = (timing_tsc_frequency_khz > 0) && tsc_use_by_default(); + break; + case TIMING_CLOCK_SOURCE_SYSTEM: + timing_tsc_enabled = false; + break; + case TIMING_CLOCK_SOURCE_TSC: + /* Tell caller TSC is not usable */ + if (timing_tsc_frequency_khz <= 0) + return false; + timing_tsc_enabled = true; + break; + } +#endif + + set_ticks_per_ns(); + timing_clock_source = source; + return true; +} + +static void +set_ticks_per_ns(void) +{ +#if PG_INSTR_TSC_CLOCK + if (timing_tsc_enabled) + { + set_ticks_per_ns_for_tsc(); + return; + } +#endif + set_ticks_per_ns_system(); +} + #ifndef WIN32 static void @@ -104,3 +160,213 @@ set_ticks_per_ns_system(void) } #endif /* WIN32 */ + +/* TSC specific logic */ + +#if PG_INSTR_TSC_CLOCK + +static void tsc_detect_frequency(void); + +/* + * Initialize the TSC clock source by determining its usability and frequency. + * + * This can be called multiple times without causing repeated work, as + * timing_tsc_frequency_khz will be set to 0 if a prior call determined the + * TSC is not usable. On EXEC_BACKEND (Windows), the TSC frequency may also be + * set by restore_backend_variables. + */ +void +pg_initialize_timing_tsc(void) +{ + if (timing_tsc_frequency_khz < 0) + tsc_detect_frequency(); +} + +static void +set_ticks_per_ns_for_tsc(void) +{ + ticks_per_ns_scaled = ((NS_PER_S / 1000) << TICKS_TO_NS_SHIFT) / timing_tsc_frequency_khz; + max_ticks_no_overflow = PG_INT64_MAX / ticks_per_ns_scaled; +} + +/* + * Detect the TSC frequency and whether RDTSCP is available on x86-64. + * + * This can't be reliably determined at compile time, since the + * availability of an "invariant" TSC (that is not affected by CPU + * frequency changes) is dependent on the CPU architecture. Additionally, + * there are cases where TSC availability is impacted by virtualization, + * where a simple cpuid feature check would not be enough. + */ +static void +tsc_detect_frequency(void) +{ + timing_tsc_frequency_khz = 0; + + /* We require RDTSCP support and an invariant TSC, bail if not available */ + if (!x86_feature_available(PG_RDTSCP) || !x86_feature_available(PG_TSC_INVARIANT)) + return; + + /* Determine speed at which the TSC advances */ + timing_tsc_frequency_khz = x86_tsc_frequency_khz(); + if (timing_tsc_frequency_khz > 0) + return; + + /* + * CPUID did not give us the TSC frequency. We can instead measure the + * frequency by comparing ticks against walltime in a calibration loop. + */ + timing_tsc_frequency_khz = pg_tsc_calibrate_frequency(); +} + +/* + * Decides whether to use the TSC clock source if the user did not specify it + * one way or the other, and it is available (checked separately). + * + * Inspired by the Linux kernel's clocksource watchdog disable logic as updated + * in 2021 to reflect the reliability of the TSC on Intel platforms, see + * check_system_tsc_reliable() in arch/x86/kernel/tsc.c, as well as discussion + * in https://lore.kernel.org/lkml/87eekfk8bd.fsf@nanos.tec.linutronix.de/ + * and https://lore.kernel.org/lkml/87a6pimt1f.ffs@nanos.tec.linutronix.de/ + * for reference. + * + * When tsc_detect_frequency determines the TSC is viable (invariant, etc.), and + * we're on an Intel platform (determined via TSC_ADJUST), we consider the TSC + * trustworthy by default, matching the Linux kernel. + * + * On other CPU platforms (e.g. AMD), or in some virtual machines, we don't have + * an easy way to determine the TSC's reliability. If on Linux, we can check if + * TSC is the active clocksource, based on it having run the watchdog logic to + * monitor TSC correctness. For other platforms the user must explicitly enable + * it via GUC instead. + */ +static bool +tsc_use_by_default(void) +{ + if (x86_feature_available(PG_TSC_ADJUST)) + return true; + +#if defined(__linux__) + { + FILE *fp; + char buf[128]; + + fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r"); + if (fp) + { + bool is_tsc = (fgets(buf, sizeof(buf), fp) != NULL && + strcmp(buf, "tsc\n") == 0); + + fclose(fp); + if (is_tsc) + return true; + } + } +#endif + + return false; +} + +/* + * Calibrate the TSC frequency by comparing TSC ticks against walltime. + * + * Takes initial TSC and system clock snapshots, then loops, recomputing the + * frequency each TSC_CALIBRATION_SKIPS iterations from cumulative TSC + * ticks divided by elapsed time. + * + * Once the frequency estimate stabilizes (consecutive iterations agree), we + * consider it converged and the frequency in KHz is returned. If either too + * many iterations or a time limit passes without convergence, 0 is returned. + */ +#define TSC_CALIBRATION_MAX_NS (50 * NS_PER_MS) +#define TSC_CALIBRATION_ITERATIONS 1000000 +#define TSC_CALIBRATION_SKIPS 100 +#define TSC_CALIBRATION_STABLE_CYCLES 10 +uint32 +pg_tsc_calibrate_frequency(void) +{ + instr_time initial_wall; + int64 initial_tsc; + double freq_khz = 0; + double prev_freq_khz = 0; + int stable_count = 0; + int64 prev_tsc; + int saved_clock_source = timing_clock_source; + + /* + * Frequency must be initialized to avoid recursion via + * pg_set_timing_clock_source. + */ + Assert(timing_tsc_frequency_khz >= 0); + + /* Ensure INSTR_* calls below work on system time */ + pg_set_timing_clock_source(TIMING_CLOCK_SOURCE_SYSTEM); + + INSTR_TIME_SET_CURRENT(initial_wall); + + initial_tsc = pg_rdtscp(); + prev_tsc = initial_tsc; + + for (int i = 0; i < TSC_CALIBRATION_ITERATIONS; i++) + { + instr_time now_wall; + int64 now_tsc; + int64 elapsed_ns; + int64 elapsed_ticks; + + INSTR_TIME_SET_CURRENT(now_wall); + + now_tsc = pg_rdtscp(); + + INSTR_TIME_SUBTRACT(now_wall, initial_wall); + elapsed_ns = INSTR_TIME_GET_NANOSEC(now_wall); + + /* Safety: bail out if we've taken too long */ + if (elapsed_ns >= TSC_CALIBRATION_MAX_NS) + break; + + elapsed_ticks = now_tsc - initial_tsc; + + /* + * Skip if TSC hasn't advanced, or we walked backwards for some + * reason. + */ + if (now_tsc == prev_tsc || elapsed_ns <= 0 || elapsed_ticks <= 0) + continue; + + /* + * We only measure frequency every TSC_CALIBRATION_SKIPS to avoid + * stabilizing based on just a handful of RDTSC instructions. + */ + if (i % TSC_CALIBRATION_SKIPS != 0) + continue; + + freq_khz = ((double) elapsed_ticks / elapsed_ns) * 1000 * 1000; + + /* + * Once freq_khz / prev_freq_khz is small, check if it stays that way. + * If it does for long enough, we've got a winner frequency. + */ + if (prev_freq_khz != 0 && fabs(1 - freq_khz / prev_freq_khz) < 0.0001) + { + stable_count++; + if (stable_count >= TSC_CALIBRATION_STABLE_CYCLES) + break; + } + else + stable_count = 0; + + prev_tsc = now_tsc; + prev_freq_khz = freq_khz; + } + + /* Restore the previous clock source */ + pg_set_timing_clock_source(saved_clock_source); + + if (stable_count < TSC_CALIBRATION_STABLE_CYCLES) + return 0; /* did not converge */ + + return (uint32) freq_khz; +} + +#endif /* PG_INSTR_TSC_CLOCK */ diff --git a/src/include/portability/instr_time.h b/src/include/portability/instr_time.h index 115f5176317..5da5eb2c057 100644 --- a/src/include/portability/instr_time.h +++ b/src/include/portability/instr_time.h @@ -4,9 +4,10 @@ * portable high-precision interval timing * * This file provides an abstraction layer to hide portability issues in - * interval timing. On Unix we use clock_gettime(), and on Windows we use - * QueryPerformanceCounter(). These macros also give some breathing room to - * use other high-precision-timing APIs. + * interval timing. On x86 we use the RDTSC/RDTSCP instruction directly in + * certain cases, or alternatively clock_gettime() on Unix-like systems and + * QueryPerformanceCounter() on Windows. These macros also give some breathing + * room to use other high-precision-timing APIs. * * The basic data type is instr_time, which all callers should treat as an * opaque typedef. instr_time can store either an absolute time (of @@ -17,7 +18,11 @@ * * INSTR_TIME_SET_ZERO(t) set t to zero (memset is acceptable too) * - * INSTR_TIME_SET_CURRENT(t) set t to current time + * INSTR_TIME_SET_CURRENT_FAST(t) set t to current time without waiting + * for instructions in out-of-order window + * + * INSTR_TIME_SET_CURRENT(t) set t to current time while waiting for + * instructions in OOO to retire * * * INSTR_TIME_ADD(x, y) x += y @@ -86,28 +91,99 @@ typedef struct instr_time /* * PG_INSTR_TICKS_TO_NS controls whether pg_ticks_to_ns/pg_ns_to_ticks needs to * check ticks_per_ns_scaled and potentially convert ticks <=> nanoseconds. + * + * PG_INSTR_TSC_CLOCK controls whether the TSC clock source is compiled in, and + * potentially used based on timing_tsc_enabled. */ -#ifdef WIN32 +#if defined(__x86_64__) || defined(_M_X64) #define PG_INSTR_TICKS_TO_NS 1 +#define PG_INSTR_TSC_CLOCK 1 +#elif defined(WIN32) +#define PG_INSTR_TICKS_TO_NS 1 +#define PG_INSTR_TSC_CLOCK 0 #else #define PG_INSTR_TICKS_TO_NS 0 +#define PG_INSTR_TSC_CLOCK 0 #endif /* * Variables used to translate ticks to nanoseconds, initialized by - * pg_initialize_timing. + * pg_initialize_timing and adjusted by pg_set_timing_clock_source calls or + * changes of the "timing_clock_source" GUC. + * + * Note that changing these values after setting an instr_time and before + * reading/converting it will lead to incorrect results. This is technically + * possible because the GUC can be changed at runtime, but unlikely, and we + * allow changing this at runtime to simplify testing of different sources. */ extern PGDLLIMPORT uint64 ticks_per_ns_scaled; extern PGDLLIMPORT uint64 max_ticks_no_overflow; extern PGDLLIMPORT bool timing_initialized; +typedef enum +{ + TIMING_CLOCK_SOURCE_AUTO, + TIMING_CLOCK_SOURCE_SYSTEM, +#if PG_INSTR_TSC_CLOCK + TIMING_CLOCK_SOURCE_TSC +#endif +} TimingClockSourceType; + +extern int timing_clock_source; + /* * Initialize timing infrastructure * - * This must be called at least once before using INSTR_TIME_SET_CURRENT* macros. + * This must be called at least once before using INSTR_TIME_SET_CURRENT* + * macros. + * + * If you want to use the TSC clock source in a client program, + * pg_set_timing_clock_source() needs to also be called. */ extern void pg_initialize_timing(void); +/* + * Sets the time source to be used. Mainly intended for frontend programs, + * the backend should set it via the timing_clock_source GUC instead. + * + * Returns false if the clock source could not be set, for example when TSC + * is not available despite being explicitly set. + */ +extern bool pg_set_timing_clock_source(TimingClockSourceType source); + +/* Whether to actually use TSC based on availability and GUC settings. */ +extern PGDLLIMPORT bool timing_tsc_enabled; + +/* + * TSC frequency in kHz, set during initialization. + * + * -1 = not yet initialized, 0 = TSC not usable, >0 = frequency in kHz. + */ +extern PGDLLIMPORT int32 timing_tsc_frequency_khz; + +#if PG_INSTR_TSC_CLOCK + +extern void pg_initialize_timing_tsc(void); + +extern uint32 pg_tsc_calibrate_frequency(void); + +#endif /* PG_INSTR_TSC_CLOCK */ + +/* + * Returns the current timing clock source effectively in use, resolving + * TIMING_CLOCK_SOURCE_AUTO to either TIMING_CLOCK_SOURCE_SYSTEM or + * TIMING_CLOCK_SOURCE_TSC. + */ +static inline TimingClockSourceType +pg_current_timing_clock_source(void) +{ +#if PG_INSTR_TSC_CLOCK + if (timing_tsc_enabled) + return TIMING_CLOCK_SOURCE_TSC; +#endif + return TIMING_CLOCK_SOURCE_SYSTEM; +} + #ifndef WIN32 /* On POSIX, use clock_gettime() for system clock source */ @@ -125,24 +201,27 @@ extern void pg_initialize_timing(void); * than CLOCK_MONOTONIC. In particular, as of macOS 10.12, Apple provides * CLOCK_MONOTONIC_RAW which is both faster to read and higher resolution than * their version of CLOCK_MONOTONIC. + * + * Note this does not get used in case the TSC clock source logic is used, + * which directly calls architecture specific timing instructions (e.g. RDTSC). */ #if defined(__darwin__) && defined(CLOCK_MONOTONIC_RAW) -#define PG_INSTR_CLOCK CLOCK_MONOTONIC_RAW +#define PG_INSTR_SYSTEM_CLOCK CLOCK_MONOTONIC_RAW #elif defined(CLOCK_MONOTONIC) -#define PG_INSTR_CLOCK CLOCK_MONOTONIC +#define PG_INSTR_SYSTEM_CLOCK CLOCK_MONOTONIC #else -#define PG_INSTR_CLOCK CLOCK_REALTIME +#define PG_INSTR_SYSTEM_CLOCK CLOCK_REALTIME #endif static inline instr_time -pg_get_ticks(void) +pg_get_ticks_system(void) { instr_time now; struct timespec tmp; Assert(timing_initialized); - clock_gettime(PG_INSTR_CLOCK, &tmp); + clock_gettime(PG_INSTR_SYSTEM_CLOCK, &tmp); now.ticks = tmp.tv_sec * NS_PER_S + tmp.tv_nsec; return now; @@ -153,7 +232,7 @@ pg_get_ticks(void) /* On Windows, use QueryPerformanceCounter() for system clock source */ static inline instr_time -pg_get_ticks(void) +pg_get_ticks_system(void) { instr_time now; LARGE_INTEGER tmp; @@ -248,6 +327,84 @@ pg_ns_to_ticks(int64 ns) #endif /* PG_INSTR_TICKS_TO_NS */ } +#if PG_INSTR_TSC_CLOCK + +#ifdef _MSC_VER +#include +#endif /* defined(_MSC_VER) */ + +/* Helpers to abstract compiler differences for reading the x86 TSC. */ +static inline int64 +pg_rdtsc(void) +{ +#ifdef _MSC_VER + return __rdtsc(); +#else + return __builtin_ia32_rdtsc(); +#endif /* defined(_MSC_VER) */ +} + +static inline int64 +pg_rdtscp(void) +{ + uint32 unused; + +#ifdef _MSC_VER + return __rdtscp(&unused); +#else + return __builtin_ia32_rdtscp(&unused); +#endif /* defined(_MSC_VER) */ +} + +/* + * Marked always_inline due to a shortcoming in gcc's heuristics leading to + * only inlining the function partially. + * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=124795 + */ +static pg_attribute_always_inline instr_time +pg_get_ticks(void) +{ + if (likely(timing_tsc_enabled)) + { + instr_time now; + + now.ticks = pg_rdtscp(); + return now; + } + + return pg_get_ticks_system(); +} + +static pg_attribute_always_inline instr_time +pg_get_ticks_fast(void) +{ + if (likely(timing_tsc_enabled)) + { + instr_time now; + + now.ticks = pg_rdtsc(); + return now; + } + + return pg_get_ticks_system(); +} + +#else + +static pg_attribute_always_inline instr_time +pg_get_ticks(void) +{ + return pg_get_ticks_system(); +} + +static pg_attribute_always_inline instr_time +pg_get_ticks_fast(void) +{ + return pg_get_ticks_system(); +} + +#endif /* PG_INSTR_TSC_CLOCK */ + /* * Common macros */ @@ -256,6 +413,9 @@ pg_ns_to_ticks(int64 ns) #define INSTR_TIME_SET_ZERO(t) ((t).ticks = 0) +#define INSTR_TIME_SET_CURRENT_FAST(t) \ + ((t) = pg_get_ticks_fast()) + #define INSTR_TIME_SET_CURRENT(t) \ ((t) = pg_get_ticks()) diff --git a/src/include/utils/guc_hooks.h b/src/include/utils/guc_hooks.h index b01697c1f60..307f4fbaefe 100644 --- a/src/include/utils/guc_hooks.h +++ b/src/include/utils/guc_hooks.h @@ -163,6 +163,9 @@ extern const char *show_timezone(void); extern bool check_timezone_abbreviations(char **newval, void **extra, GucSource source); extern void assign_timezone_abbreviations(const char *newval, void *extra); +extern void assign_timing_clock_source(int newval, void *extra); +extern bool check_timing_clock_source(int *newval, void **extra, GucSource source); +extern const char *show_timing_clock_source(void); extern bool check_transaction_buffers(int *newval, void **extra, GucSource source); extern bool check_transaction_deferrable(bool *newval, void **extra, GucSource source); extern bool check_transaction_isolation(int *newval, void **extra, GucSource source); diff --git a/src/include/utils/guc_tables.h b/src/include/utils/guc_tables.h index 71a80161961..63440b8e36c 100644 --- a/src/include/utils/guc_tables.h +++ b/src/include/utils/guc_tables.h @@ -60,6 +60,7 @@ enum config_group CONN_AUTH_TCP, CONN_AUTH_AUTH, CONN_AUTH_SSL, + RESOURCES_TIME, RESOURCES_MEM, RESOURCES_DISK, RESOURCES_KERNEL, diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 637c669a146..a998bb5e882 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3185,6 +3185,7 @@ TimeoutId TimeoutType Timestamp TimestampTz +TimingClockSourceType TmFromChar TmToChar ToastAttrInfo