diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 584bc9f49dd..8bdbb6db0f9 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2533,6 +2533,72 @@ include_dir 'conf.d'
+
+ Timing
+
+
+
+ timing_clock_source (enum)
+
+ timing_clock_source configuration parameter
+
+
+
+
+ Selects the method for making timing measurements using the OS or
+ specialized CPU instructions. Possible values are:
+
+
+
+ auto (automatically chooses TSC
+ clock source on supported x86-64 CPUs, otherwise uses the OS system
+ clock)
+
+
+
+
+ system (measures timing using the OS system clock)
+
+
+
+
+ tsc (measures timing with a CPU instruction, e.g.
+ using RDTSC/RDTSCP on x86-64)
+
+
+
+ The default is auto. Only superusers can change this
+ setting. Changing the setting during query execution is not recommended
+ and may cause interval timings to jump significantly or produce negative
+ values.
+
+
+
+ Time-Stamp Counter
+ TSC
+
+ TSC
+ If enabled, the TSC clock source, named after the
+ Time-Stamp Counter on x86-64, will use specialized CPU instructions when
+ measuring time intervals. This lowers timing overhead compared to reading
+ the OS system clock, and reduces the measurement error on top of the
+ actual runtime, for example with EXPLAIN ANALYZE.
+
+
+ RDTSC
+ On x86-64 CPUs the TSC clock source utilizes the
+ RDTSC instruction for EXPLAIN ANALYZE.
+ For timings that require higher precision the RDTSCP
+ instruction is used, which avoids inaccuracies due to CPU instruction
+ re-ordering. Use of the TSC clock source is not
+ supported on older x86-64 CPUs and other architectures, and is not
+ advised on systems that utilize an emulated TSC, as it
+ is likely slower than the system clock source.
+
+
+
+
+
Background Writer
diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c
index 011a9684df0..4c3aec7fdee 100644
--- a/src/backend/executor/instrument.c
+++ b/src/backend/executor/instrument.c
@@ -16,6 +16,8 @@
#include
#include "executor/instrument.h"
+#include "portability/instr_time.h"
+#include "utils/guc_hooks.h"
BufferUsage pgBufferUsage;
static BufferUsage save_pgBufferUsage;
@@ -52,7 +54,7 @@ InstrStart(Instrumentation *instr)
if (!INSTR_TIME_IS_ZERO(instr->starttime))
elog(ERROR, "InstrStart called twice in a row");
else
- INSTR_TIME_SET_CURRENT(instr->starttime);
+ INSTR_TIME_SET_CURRENT_FAST(instr->starttime);
}
/* save buffer usage totals at start, if needed */
@@ -78,7 +80,7 @@ InstrStopCommon(Instrumentation *instr, instr_time *accum_time)
if (INSTR_TIME_IS_ZERO(instr->starttime))
elog(ERROR, "InstrStop called without start");
- INSTR_TIME_SET_CURRENT(endtime);
+ INSTR_TIME_SET_CURRENT_FAST(endtime);
INSTR_TIME_ACCUM_DIFF(*accum_time, endtime, instr->starttime);
INSTR_TIME_SET_ZERO(instr->starttime);
@@ -345,3 +347,75 @@ WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
dst->wal_fpi_bytes += add->wal_fpi_bytes - sub->wal_fpi_bytes;
dst->wal_buffers_full += add->wal_buffers_full - sub->wal_buffers_full;
}
+
+/* GUC hooks for timing_clock_source */
+
+bool
+check_timing_clock_source(int *newval, void **extra, GucSource source)
+{
+ /*
+ * Do nothing if timing is not initialized. This is only expected on child
+ * processes in EXEC_BACKEND builds, as GUC hooks can be called during
+ * InitializeGUCOptions() before InitProcessGlobals() has had a chance to
+ * run pg_initialize_timing(). Instead, TSC will be initialized via
+ * restore_backend_variables.
+ */
+#ifdef EXEC_BACKEND
+ if (!timing_initialized)
+ return true;
+#else
+ Assert(timing_initialized);
+#endif
+
+#if PG_INSTR_TSC_CLOCK
+ pg_initialize_timing_tsc();
+
+ if (*newval == TIMING_CLOCK_SOURCE_TSC && timing_tsc_frequency_khz <= 0)
+ {
+ GUC_check_errdetail("TSC is not supported as timing clock source");
+ return false;
+ }
+#endif
+
+ return true;
+}
+
+void
+assign_timing_clock_source(int newval, void *extra)
+{
+#ifdef EXEC_BACKEND
+ if (!timing_initialized)
+ return;
+#else
+ Assert(timing_initialized);
+#endif
+
+ /*
+ * Ignore the return code since the check hook already verified TSC is
+ * usable if it's explicitly requested.
+ */
+ pg_set_timing_clock_source(newval);
+}
+
+const char *
+show_timing_clock_source(void)
+{
+ switch (timing_clock_source)
+ {
+ case TIMING_CLOCK_SOURCE_AUTO:
+#if PG_INSTR_TSC_CLOCK
+ if (pg_current_timing_clock_source() == TIMING_CLOCK_SOURCE_TSC)
+ return "auto (tsc)";
+#endif
+ return "auto (system)";
+ case TIMING_CLOCK_SOURCE_SYSTEM:
+ return "system";
+#if PG_INSTR_TSC_CLOCK
+ case TIMING_CLOCK_SOURCE_TSC:
+ return "tsc";
+#endif
+ }
+
+ /* unreachable */
+ return "?";
+}
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index ed0f4f2d234..8f3cfea880c 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -57,6 +57,7 @@
#ifdef EXEC_BACKEND
#include "nodes/queryjumble.h"
+#include "portability/instr_time.h"
#include "storage/pg_shmem.h"
#include "storage/spin.h"
#endif
@@ -129,6 +130,8 @@ typedef struct
int MyPMChildSlot;
+ int32 timing_tsc_frequency_khz;
+
/*
* These are only used by backend processes, but are here because passing
* a socket needs some special handling on Windows. 'client_sock' is an
@@ -750,6 +753,8 @@ save_backend_variables(BackendParameters *param,
param->MaxBackends = MaxBackends;
param->num_pmchild_slots = num_pmchild_slots;
+ param->timing_tsc_frequency_khz = timing_tsc_frequency_khz;
+
#ifdef WIN32
param->PostmasterHandle = PostmasterHandle;
if (!write_duplicated_handle(¶m->initial_signal_pipe,
@@ -1004,6 +1009,12 @@ restore_backend_variables(BackendParameters *param)
MaxBackends = param->MaxBackends;
num_pmchild_slots = param->num_pmchild_slots;
+ timing_tsc_frequency_khz = param->timing_tsc_frequency_khz;
+
+ /* Re-run logic usually done by assign_timing_clock_source */
+ pg_initialize_timing();
+ pg_set_timing_clock_source(timing_clock_source);
+
#ifdef WIN32
PostmasterHandle = param->PostmasterHandle;
pgwin32_initial_signal_pipe = param->initial_signal_pipe;
diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat
index 632f3ba4989..86c1eba5dab 100644
--- a/src/backend/utils/misc/guc_parameters.dat
+++ b/src/backend/utils/misc/guc_parameters.dat
@@ -3060,6 +3060,17 @@
assign_hook => 'assign_timezone_abbreviations',
},
+{ name => 'timing_clock_source', type => 'enum', context => 'PGC_SUSET', group => 'RESOURCES_TIME',
+ short_desc => 'Controls the clock source used for collecting timing measurements.',
+ long_desc => 'This enables the use of specialized clock sources, specifically the RDTSC clock source on x86-64 systems (if available), to support timing measurements with lower overhead during EXPLAIN and other instrumentation.',
+ variable => 'timing_clock_source',
+ boot_val => 'TIMING_CLOCK_SOURCE_AUTO',
+ options => 'timing_clock_source_options',
+ check_hook => 'check_timing_clock_source',
+ assign_hook => 'assign_timing_clock_source',
+ show_hook => 'show_timing_clock_source',
+},
+
{ name => 'trace_connection_negotiation', type => 'bool', context => 'PGC_POSTMASTER', group => 'DEVELOPER_OPTIONS',
short_desc => 'Logs details of pre-authentication connection handshake.',
flags => 'GUC_NOT_IN_SAMPLE',
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index d9ca13baff9..290ccbc543e 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -90,6 +90,7 @@
#include "storage/standby.h"
#include "tcop/backend_startup.h"
#include "tcop/tcopprot.h"
+#include "portability/instr_time.h"
#include "tsearch/ts_cache.h"
#include "utils/builtins.h"
#include "utils/bytea.h"
@@ -373,6 +374,15 @@ static const struct config_enum_entry huge_pages_options[] = {
{NULL, 0, false}
};
+static const struct config_enum_entry timing_clock_source_options[] = {
+ {"auto", TIMING_CLOCK_SOURCE_AUTO, false},
+ {"system", TIMING_CLOCK_SOURCE_SYSTEM, false},
+#if PG_INSTR_TSC_CLOCK
+ {"tsc", TIMING_CLOCK_SOURCE_TSC, false},
+#endif
+ {NULL, 0, false}
+};
+
static const struct config_enum_entry huge_pages_status_options[] = {
{"off", HUGE_PAGES_OFF, false},
{"on", HUGE_PAGES_ON, false},
@@ -731,6 +741,7 @@ const char *const config_group_names[] =
[CONN_AUTH_TCP] = gettext_noop("Connections and Authentication / TCP Settings"),
[CONN_AUTH_AUTH] = gettext_noop("Connections and Authentication / Authentication"),
[CONN_AUTH_SSL] = gettext_noop("Connections and Authentication / SSL"),
+ [RESOURCES_TIME] = gettext_noop("Resource Usage / Time"),
[RESOURCES_MEM] = gettext_noop("Resource Usage / Memory"),
[RESOURCES_DISK] = gettext_noop("Resource Usage / Disk"),
[RESOURCES_KERNEL] = gettext_noop("Resource Usage / Kernel Resources"),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 2e10eb4a36a..4f2bbf05295 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -196,6 +196,10 @@
#max_files_per_process = 1000 # min 64
# (change requires restart)
+# - Time -
+
+#timing_clock_source = auto # auto, system, tsc (if supported)
+
# - Background Writer -
#bgwriter_delay = 200ms # 10-10000ms between rounds
diff --git a/src/common/instr_time.c b/src/common/instr_time.c
index 9271113a287..fc6e1852c30 100644
--- a/src/common/instr_time.c
+++ b/src/common/instr_time.c
@@ -18,14 +18,17 @@
#include "postgres_fe.h"
#endif
+#include
+
+#include "port/pg_cpu.h"
#include "portability/instr_time.h"
/*
* Stores what the number of ticks needs to be multiplied with to end up
* with nanoseconds using integer math.
*
- * On certain platforms (currently Windows) the ticks to nanoseconds conversion
- * requires floating point math because:
+ * In certain cases (TSC on x86-64, and QueryPerformanceCounter on Windows)
+ * the ticks to nanoseconds conversion requires floating point math because:
*
* sec = ticks / frequency_hz
* ns = ticks / frequency_hz * 1,000,000,000
@@ -51,16 +54,26 @@
* value to encourage compilers to generate better assembly, since we can be
* sure these values are not negative.
*
- * On all other platforms we are using clock_gettime(), which uses nanoseconds
+ * In all other cases we are using clock_gettime(), which uses nanoseconds
* as ticks. Hence, we set the multiplier to zero, which causes pg_ticks_to_ns
* to return the original value.
*/
uint64 ticks_per_ns_scaled = 0;
uint64 max_ticks_no_overflow = 0;
bool timing_initialized = false;
+int timing_clock_source = TIMING_CLOCK_SOURCE_AUTO;
+bool timing_tsc_enabled = false;
+int32 timing_tsc_frequency_khz = -1;
+
+static void set_ticks_per_ns(void);
static void set_ticks_per_ns_system(void);
+#if PG_INSTR_TSC_CLOCK
+static bool tsc_use_by_default(void);
+static void set_ticks_per_ns_for_tsc(void);
+#endif
+
/*
* Initializes timing infrastructure. Must be called before making any use
* of INSTR* macros.
@@ -75,6 +88,49 @@ pg_initialize_timing(void)
timing_initialized = true;
}
+bool
+pg_set_timing_clock_source(TimingClockSourceType source)
+{
+ Assert(timing_initialized);
+
+#if PG_INSTR_TSC_CLOCK
+ pg_initialize_timing_tsc();
+
+ switch (source)
+ {
+ case TIMING_CLOCK_SOURCE_AUTO:
+ timing_tsc_enabled = (timing_tsc_frequency_khz > 0) && tsc_use_by_default();
+ break;
+ case TIMING_CLOCK_SOURCE_SYSTEM:
+ timing_tsc_enabled = false;
+ break;
+ case TIMING_CLOCK_SOURCE_TSC:
+ /* Tell caller TSC is not usable */
+ if (timing_tsc_frequency_khz <= 0)
+ return false;
+ timing_tsc_enabled = true;
+ break;
+ }
+#endif
+
+ set_ticks_per_ns();
+ timing_clock_source = source;
+ return true;
+}
+
+static void
+set_ticks_per_ns(void)
+{
+#if PG_INSTR_TSC_CLOCK
+ if (timing_tsc_enabled)
+ {
+ set_ticks_per_ns_for_tsc();
+ return;
+ }
+#endif
+ set_ticks_per_ns_system();
+}
+
#ifndef WIN32
static void
@@ -104,3 +160,213 @@ set_ticks_per_ns_system(void)
}
#endif /* WIN32 */
+
+/* TSC specific logic */
+
+#if PG_INSTR_TSC_CLOCK
+
+static void tsc_detect_frequency(void);
+
+/*
+ * Initialize the TSC clock source by determining its usability and frequency.
+ *
+ * This can be called multiple times without causing repeated work, as
+ * timing_tsc_frequency_khz will be set to 0 if a prior call determined the
+ * TSC is not usable. On EXEC_BACKEND (Windows), the TSC frequency may also be
+ * set by restore_backend_variables.
+ */
+void
+pg_initialize_timing_tsc(void)
+{
+ if (timing_tsc_frequency_khz < 0)
+ tsc_detect_frequency();
+}
+
+static void
+set_ticks_per_ns_for_tsc(void)
+{
+ ticks_per_ns_scaled = ((NS_PER_S / 1000) << TICKS_TO_NS_SHIFT) / timing_tsc_frequency_khz;
+ max_ticks_no_overflow = PG_INT64_MAX / ticks_per_ns_scaled;
+}
+
+/*
+ * Detect the TSC frequency and whether RDTSCP is available on x86-64.
+ *
+ * This can't be reliably determined at compile time, since the
+ * availability of an "invariant" TSC (that is not affected by CPU
+ * frequency changes) is dependent on the CPU architecture. Additionally,
+ * there are cases where TSC availability is impacted by virtualization,
+ * where a simple cpuid feature check would not be enough.
+ */
+static void
+tsc_detect_frequency(void)
+{
+ timing_tsc_frequency_khz = 0;
+
+ /* We require RDTSCP support and an invariant TSC, bail if not available */
+ if (!x86_feature_available(PG_RDTSCP) || !x86_feature_available(PG_TSC_INVARIANT))
+ return;
+
+ /* Determine speed at which the TSC advances */
+ timing_tsc_frequency_khz = x86_tsc_frequency_khz();
+ if (timing_tsc_frequency_khz > 0)
+ return;
+
+ /*
+ * CPUID did not give us the TSC frequency. We can instead measure the
+ * frequency by comparing ticks against walltime in a calibration loop.
+ */
+ timing_tsc_frequency_khz = pg_tsc_calibrate_frequency();
+}
+
+/*
+ * Decides whether to use the TSC clock source if the user did not specify it
+ * one way or the other, and it is available (checked separately).
+ *
+ * Inspired by the Linux kernel's clocksource watchdog disable logic as updated
+ * in 2021 to reflect the reliability of the TSC on Intel platforms, see
+ * check_system_tsc_reliable() in arch/x86/kernel/tsc.c, as well as discussion
+ * in https://lore.kernel.org/lkml/87eekfk8bd.fsf@nanos.tec.linutronix.de/
+ * and https://lore.kernel.org/lkml/87a6pimt1f.ffs@nanos.tec.linutronix.de/
+ * for reference.
+ *
+ * When tsc_detect_frequency determines the TSC is viable (invariant, etc.), and
+ * we're on an Intel platform (determined via TSC_ADJUST), we consider the TSC
+ * trustworthy by default, matching the Linux kernel.
+ *
+ * On other CPU platforms (e.g. AMD), or in some virtual machines, we don't have
+ * an easy way to determine the TSC's reliability. If on Linux, we can check if
+ * TSC is the active clocksource, based on it having run the watchdog logic to
+ * monitor TSC correctness. For other platforms the user must explicitly enable
+ * it via GUC instead.
+ */
+static bool
+tsc_use_by_default(void)
+{
+ if (x86_feature_available(PG_TSC_ADJUST))
+ return true;
+
+#if defined(__linux__)
+ {
+ FILE *fp;
+ char buf[128];
+
+ fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r");
+ if (fp)
+ {
+ bool is_tsc = (fgets(buf, sizeof(buf), fp) != NULL &&
+ strcmp(buf, "tsc\n") == 0);
+
+ fclose(fp);
+ if (is_tsc)
+ return true;
+ }
+ }
+#endif
+
+ return false;
+}
+
+/*
+ * Calibrate the TSC frequency by comparing TSC ticks against walltime.
+ *
+ * Takes initial TSC and system clock snapshots, then loops, recomputing the
+ * frequency each TSC_CALIBRATION_SKIPS iterations from cumulative TSC
+ * ticks divided by elapsed time.
+ *
+ * Once the frequency estimate stabilizes (consecutive iterations agree), we
+ * consider it converged and the frequency in KHz is returned. If either too
+ * many iterations or a time limit passes without convergence, 0 is returned.
+ */
+#define TSC_CALIBRATION_MAX_NS (50 * NS_PER_MS)
+#define TSC_CALIBRATION_ITERATIONS 1000000
+#define TSC_CALIBRATION_SKIPS 100
+#define TSC_CALIBRATION_STABLE_CYCLES 10
+uint32
+pg_tsc_calibrate_frequency(void)
+{
+ instr_time initial_wall;
+ int64 initial_tsc;
+ double freq_khz = 0;
+ double prev_freq_khz = 0;
+ int stable_count = 0;
+ int64 prev_tsc;
+ int saved_clock_source = timing_clock_source;
+
+ /*
+ * Frequency must be initialized to avoid recursion via
+ * pg_set_timing_clock_source.
+ */
+ Assert(timing_tsc_frequency_khz >= 0);
+
+ /* Ensure INSTR_* calls below work on system time */
+ pg_set_timing_clock_source(TIMING_CLOCK_SOURCE_SYSTEM);
+
+ INSTR_TIME_SET_CURRENT(initial_wall);
+
+ initial_tsc = pg_rdtscp();
+ prev_tsc = initial_tsc;
+
+ for (int i = 0; i < TSC_CALIBRATION_ITERATIONS; i++)
+ {
+ instr_time now_wall;
+ int64 now_tsc;
+ int64 elapsed_ns;
+ int64 elapsed_ticks;
+
+ INSTR_TIME_SET_CURRENT(now_wall);
+
+ now_tsc = pg_rdtscp();
+
+ INSTR_TIME_SUBTRACT(now_wall, initial_wall);
+ elapsed_ns = INSTR_TIME_GET_NANOSEC(now_wall);
+
+ /* Safety: bail out if we've taken too long */
+ if (elapsed_ns >= TSC_CALIBRATION_MAX_NS)
+ break;
+
+ elapsed_ticks = now_tsc - initial_tsc;
+
+ /*
+ * Skip if TSC hasn't advanced, or we walked backwards for some
+ * reason.
+ */
+ if (now_tsc == prev_tsc || elapsed_ns <= 0 || elapsed_ticks <= 0)
+ continue;
+
+ /*
+ * We only measure frequency every TSC_CALIBRATION_SKIPS to avoid
+ * stabilizing based on just a handful of RDTSC instructions.
+ */
+ if (i % TSC_CALIBRATION_SKIPS != 0)
+ continue;
+
+ freq_khz = ((double) elapsed_ticks / elapsed_ns) * 1000 * 1000;
+
+ /*
+ * Once freq_khz / prev_freq_khz is small, check if it stays that way.
+ * If it does for long enough, we've got a winner frequency.
+ */
+ if (prev_freq_khz != 0 && fabs(1 - freq_khz / prev_freq_khz) < 0.0001)
+ {
+ stable_count++;
+ if (stable_count >= TSC_CALIBRATION_STABLE_CYCLES)
+ break;
+ }
+ else
+ stable_count = 0;
+
+ prev_tsc = now_tsc;
+ prev_freq_khz = freq_khz;
+ }
+
+ /* Restore the previous clock source */
+ pg_set_timing_clock_source(saved_clock_source);
+
+ if (stable_count < TSC_CALIBRATION_STABLE_CYCLES)
+ return 0; /* did not converge */
+
+ return (uint32) freq_khz;
+}
+
+#endif /* PG_INSTR_TSC_CLOCK */
diff --git a/src/include/portability/instr_time.h b/src/include/portability/instr_time.h
index 115f5176317..5da5eb2c057 100644
--- a/src/include/portability/instr_time.h
+++ b/src/include/portability/instr_time.h
@@ -4,9 +4,10 @@
* portable high-precision interval timing
*
* This file provides an abstraction layer to hide portability issues in
- * interval timing. On Unix we use clock_gettime(), and on Windows we use
- * QueryPerformanceCounter(). These macros also give some breathing room to
- * use other high-precision-timing APIs.
+ * interval timing. On x86 we use the RDTSC/RDTSCP instruction directly in
+ * certain cases, or alternatively clock_gettime() on Unix-like systems and
+ * QueryPerformanceCounter() on Windows. These macros also give some breathing
+ * room to use other high-precision-timing APIs.
*
* The basic data type is instr_time, which all callers should treat as an
* opaque typedef. instr_time can store either an absolute time (of
@@ -17,7 +18,11 @@
*
* INSTR_TIME_SET_ZERO(t) set t to zero (memset is acceptable too)
*
- * INSTR_TIME_SET_CURRENT(t) set t to current time
+ * INSTR_TIME_SET_CURRENT_FAST(t) set t to current time without waiting
+ * for instructions in out-of-order window
+ *
+ * INSTR_TIME_SET_CURRENT(t) set t to current time while waiting for
+ * instructions in OOO to retire
*
*
* INSTR_TIME_ADD(x, y) x += y
@@ -86,28 +91,99 @@ typedef struct instr_time
/*
* PG_INSTR_TICKS_TO_NS controls whether pg_ticks_to_ns/pg_ns_to_ticks needs to
* check ticks_per_ns_scaled and potentially convert ticks <=> nanoseconds.
+ *
+ * PG_INSTR_TSC_CLOCK controls whether the TSC clock source is compiled in, and
+ * potentially used based on timing_tsc_enabled.
*/
-#ifdef WIN32
+#if defined(__x86_64__) || defined(_M_X64)
#define PG_INSTR_TICKS_TO_NS 1
+#define PG_INSTR_TSC_CLOCK 1
+#elif defined(WIN32)
+#define PG_INSTR_TICKS_TO_NS 1
+#define PG_INSTR_TSC_CLOCK 0
#else
#define PG_INSTR_TICKS_TO_NS 0
+#define PG_INSTR_TSC_CLOCK 0
#endif
/*
* Variables used to translate ticks to nanoseconds, initialized by
- * pg_initialize_timing.
+ * pg_initialize_timing and adjusted by pg_set_timing_clock_source calls or
+ * changes of the "timing_clock_source" GUC.
+ *
+ * Note that changing these values after setting an instr_time and before
+ * reading/converting it will lead to incorrect results. This is technically
+ * possible because the GUC can be changed at runtime, but unlikely, and we
+ * allow changing this at runtime to simplify testing of different sources.
*/
extern PGDLLIMPORT uint64 ticks_per_ns_scaled;
extern PGDLLIMPORT uint64 max_ticks_no_overflow;
extern PGDLLIMPORT bool timing_initialized;
+typedef enum
+{
+ TIMING_CLOCK_SOURCE_AUTO,
+ TIMING_CLOCK_SOURCE_SYSTEM,
+#if PG_INSTR_TSC_CLOCK
+ TIMING_CLOCK_SOURCE_TSC
+#endif
+} TimingClockSourceType;
+
+extern int timing_clock_source;
+
/*
* Initialize timing infrastructure
*
- * This must be called at least once before using INSTR_TIME_SET_CURRENT* macros.
+ * This must be called at least once before using INSTR_TIME_SET_CURRENT*
+ * macros.
+ *
+ * If you want to use the TSC clock source in a client program,
+ * pg_set_timing_clock_source() needs to also be called.
*/
extern void pg_initialize_timing(void);
+/*
+ * Sets the time source to be used. Mainly intended for frontend programs,
+ * the backend should set it via the timing_clock_source GUC instead.
+ *
+ * Returns false if the clock source could not be set, for example when TSC
+ * is not available despite being explicitly set.
+ */
+extern bool pg_set_timing_clock_source(TimingClockSourceType source);
+
+/* Whether to actually use TSC based on availability and GUC settings. */
+extern PGDLLIMPORT bool timing_tsc_enabled;
+
+/*
+ * TSC frequency in kHz, set during initialization.
+ *
+ * -1 = not yet initialized, 0 = TSC not usable, >0 = frequency in kHz.
+ */
+extern PGDLLIMPORT int32 timing_tsc_frequency_khz;
+
+#if PG_INSTR_TSC_CLOCK
+
+extern void pg_initialize_timing_tsc(void);
+
+extern uint32 pg_tsc_calibrate_frequency(void);
+
+#endif /* PG_INSTR_TSC_CLOCK */
+
+/*
+ * Returns the current timing clock source effectively in use, resolving
+ * TIMING_CLOCK_SOURCE_AUTO to either TIMING_CLOCK_SOURCE_SYSTEM or
+ * TIMING_CLOCK_SOURCE_TSC.
+ */
+static inline TimingClockSourceType
+pg_current_timing_clock_source(void)
+{
+#if PG_INSTR_TSC_CLOCK
+ if (timing_tsc_enabled)
+ return TIMING_CLOCK_SOURCE_TSC;
+#endif
+ return TIMING_CLOCK_SOURCE_SYSTEM;
+}
+
#ifndef WIN32
/* On POSIX, use clock_gettime() for system clock source */
@@ -125,24 +201,27 @@ extern void pg_initialize_timing(void);
* than CLOCK_MONOTONIC. In particular, as of macOS 10.12, Apple provides
* CLOCK_MONOTONIC_RAW which is both faster to read and higher resolution than
* their version of CLOCK_MONOTONIC.
+ *
+ * Note this does not get used in case the TSC clock source logic is used,
+ * which directly calls architecture specific timing instructions (e.g. RDTSC).
*/
#if defined(__darwin__) && defined(CLOCK_MONOTONIC_RAW)
-#define PG_INSTR_CLOCK CLOCK_MONOTONIC_RAW
+#define PG_INSTR_SYSTEM_CLOCK CLOCK_MONOTONIC_RAW
#elif defined(CLOCK_MONOTONIC)
-#define PG_INSTR_CLOCK CLOCK_MONOTONIC
+#define PG_INSTR_SYSTEM_CLOCK CLOCK_MONOTONIC
#else
-#define PG_INSTR_CLOCK CLOCK_REALTIME
+#define PG_INSTR_SYSTEM_CLOCK CLOCK_REALTIME
#endif
static inline instr_time
-pg_get_ticks(void)
+pg_get_ticks_system(void)
{
instr_time now;
struct timespec tmp;
Assert(timing_initialized);
- clock_gettime(PG_INSTR_CLOCK, &tmp);
+ clock_gettime(PG_INSTR_SYSTEM_CLOCK, &tmp);
now.ticks = tmp.tv_sec * NS_PER_S + tmp.tv_nsec;
return now;
@@ -153,7 +232,7 @@ pg_get_ticks(void)
/* On Windows, use QueryPerformanceCounter() for system clock source */
static inline instr_time
-pg_get_ticks(void)
+pg_get_ticks_system(void)
{
instr_time now;
LARGE_INTEGER tmp;
@@ -248,6 +327,84 @@ pg_ns_to_ticks(int64 ns)
#endif /* PG_INSTR_TICKS_TO_NS */
}
+#if PG_INSTR_TSC_CLOCK
+
+#ifdef _MSC_VER
+#include
+#endif /* defined(_MSC_VER) */
+
+/* Helpers to abstract compiler differences for reading the x86 TSC. */
+static inline int64
+pg_rdtsc(void)
+{
+#ifdef _MSC_VER
+ return __rdtsc();
+#else
+ return __builtin_ia32_rdtsc();
+#endif /* defined(_MSC_VER) */
+}
+
+static inline int64
+pg_rdtscp(void)
+{
+ uint32 unused;
+
+#ifdef _MSC_VER
+ return __rdtscp(&unused);
+#else
+ return __builtin_ia32_rdtscp(&unused);
+#endif /* defined(_MSC_VER) */
+}
+
+/*
+ * Marked always_inline due to a shortcoming in gcc's heuristics leading to
+ * only inlining the function partially.
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=124795
+ */
+static pg_attribute_always_inline instr_time
+pg_get_ticks(void)
+{
+ if (likely(timing_tsc_enabled))
+ {
+ instr_time now;
+
+ now.ticks = pg_rdtscp();
+ return now;
+ }
+
+ return pg_get_ticks_system();
+}
+
+static pg_attribute_always_inline instr_time
+pg_get_ticks_fast(void)
+{
+ if (likely(timing_tsc_enabled))
+ {
+ instr_time now;
+
+ now.ticks = pg_rdtsc();
+ return now;
+ }
+
+ return pg_get_ticks_system();
+}
+
+#else
+
+static pg_attribute_always_inline instr_time
+pg_get_ticks(void)
+{
+ return pg_get_ticks_system();
+}
+
+static pg_attribute_always_inline instr_time
+pg_get_ticks_fast(void)
+{
+ return pg_get_ticks_system();
+}
+
+#endif /* PG_INSTR_TSC_CLOCK */
+
/*
* Common macros
*/
@@ -256,6 +413,9 @@ pg_ns_to_ticks(int64 ns)
#define INSTR_TIME_SET_ZERO(t) ((t).ticks = 0)
+#define INSTR_TIME_SET_CURRENT_FAST(t) \
+ ((t) = pg_get_ticks_fast())
+
#define INSTR_TIME_SET_CURRENT(t) \
((t) = pg_get_ticks())
diff --git a/src/include/utils/guc_hooks.h b/src/include/utils/guc_hooks.h
index b01697c1f60..307f4fbaefe 100644
--- a/src/include/utils/guc_hooks.h
+++ b/src/include/utils/guc_hooks.h
@@ -163,6 +163,9 @@ extern const char *show_timezone(void);
extern bool check_timezone_abbreviations(char **newval, void **extra,
GucSource source);
extern void assign_timezone_abbreviations(const char *newval, void *extra);
+extern void assign_timing_clock_source(int newval, void *extra);
+extern bool check_timing_clock_source(int *newval, void **extra, GucSource source);
+extern const char *show_timing_clock_source(void);
extern bool check_transaction_buffers(int *newval, void **extra, GucSource source);
extern bool check_transaction_deferrable(bool *newval, void **extra, GucSource source);
extern bool check_transaction_isolation(int *newval, void **extra, GucSource source);
diff --git a/src/include/utils/guc_tables.h b/src/include/utils/guc_tables.h
index 71a80161961..63440b8e36c 100644
--- a/src/include/utils/guc_tables.h
+++ b/src/include/utils/guc_tables.h
@@ -60,6 +60,7 @@ enum config_group
CONN_AUTH_TCP,
CONN_AUTH_AUTH,
CONN_AUTH_SSL,
+ RESOURCES_TIME,
RESOURCES_MEM,
RESOURCES_DISK,
RESOURCES_KERNEL,
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 637c669a146..a998bb5e882 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3185,6 +3185,7 @@ TimeoutId
TimeoutType
Timestamp
TimestampTz
+TimingClockSourceType
TmFromChar
TmToChar
ToastAttrInfo