From 821ff5e8fa1df2e09fea157ee3e298eef6cf4ec9 Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Wed, 28 Oct 2015 22:19:18 -0700 Subject: [PATCH] [master] isc_atomic_storeq()/stats improvements 4248. [func] Add an isc_atomic_storeq() function, use it in stats counters to improve performance. [RT #39972] [RT #39979] --- CHANGES | 4 + bin/tests/atomic/t_atomic.c | 162 ++++++++++++++++++++++++++-- configure | 5 + configure.in | 4 + lib/isc/include/isc/platform.h.in | 10 +- lib/isc/stats.c | 84 ++++++++++----- lib/isc/tests/print_test.c | 6 ++ lib/isc/win32/include/isc/atomic.h | 14 ++- lib/isc/x86_32/include/isc/atomic.h | 24 ++++- lib/isc/x86_64/include/isc/atomic.h | 11 +- win32utils/Configure | 2 + 11 files changed, 285 insertions(+), 41 deletions(-) diff --git a/CHANGES b/CHANGES index 6fb6191cbf..2b2845b0d8 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,7 @@ +4248. [func] Add an isc_atomic_storeq() function, use it in + stats counters to improve performance. + [RT #39972] [RT #39979] + 4247. [port] Require both HAVE_JSON and JSON_C_VERSION to be defined to report json library version. [RT #41045] diff --git a/bin/tests/atomic/t_atomic.c b/bin/tests/atomic/t_atomic.c index f63ddf1869..b73da02ad3 100644 --- a/bin/tests/atomic/t_atomic.c +++ b/bin/tests/atomic/t_atomic.c @@ -65,14 +65,10 @@ typedef struct { counter_t counters[TASKS]; -void do_xaddq(isc_task_t *task, isc_event_t *ev); - #if defined(ISC_PLATFORM_HAVEXADD) -isc_int32_t counter_32; +static isc_int32_t counter_32; -void do_xadd(isc_task_t *task, isc_event_t *ev); - -void +static void do_xadd(isc_task_t *task, isc_event_t *ev) { counter_t *state = (counter_t *)ev->ev_arg; int i; @@ -128,11 +124,9 @@ test_atomic_xadd() { #endif #if defined(ISC_PLATFORM_HAVEXADDQ) -isc_int64_t counter_64; +static isc_int64_t counter_64; -void do_xaddq(isc_task_t *task, isc_event_t *ev); - -void +static void do_xaddq(isc_task_t *task, isc_event_t *ev) { counter_t *state = (counter_t *)ev->ev_arg; int i; @@ -188,6 +182,150 @@ test_atomic_xaddq() { } #endif +static isc_int32_t store_32; + +static void +do_store(isc_task_t *task, isc_event_t *ev) { + counter_t *state = (counter_t *)ev->ev_arg; + int i; + isc_uint8_t r; + isc_uint32_t val; + + r = random() % 256; + val = (r << 24) | (r << 16) | (r << 8) | r; + + for (i = 0 ; i < COUNTS_PER_ITERATION ; i++) { + isc_atomic_store(&store_32, val); + } + + state->iteration++; + if (state->iteration < ITERATIONS) { + isc_task_send(task, &ev); + } else { + isc_event_free(&ev); + } +} + +static void +test_atomic_store() { + int test_result; + isc_task_t *tasks[TASKS]; + isc_event_t *event; + int i; + isc_uint8_t r; + isc_uint32_t val; + + t_assert("test_atomic_store", 1, T_REQUIRED, "%s", + "ensure that isc_atomic_store() works."); + + setup(); + + memset(counters, 0, sizeof(counters)); + store_32 = 0; + + /* + * Create our tasks, and allocate an event to get the counters + * going. + */ + for (i = 0 ; i < TASKS ; i++) { + tasks[i] = NULL; + CHECK(isc_task_create(task_manager, 0, &tasks[i])); + event = isc_event_allocate(mctx, NULL, 1000, do_store, + &counters[i], + sizeof(struct isc_event)); + isc_task_sendanddetach(&tasks[i], &event); + } + + teardown(); + + test_result = T_PASS; + r = store_32 & 0xff; + val = (r << 24) | (r << 16) | (r << 8) | r; + t_info("32-bit store 0x%x, expected 0x%x\n", + (isc_uint32_t) store_32, val); + if ((isc_uint32_t) store_32 != val) + test_result = T_FAIL; + t_result(test_result); + + store_32 = 0; +} + +#if defined(ISC_PLATFORM_HAVEATOMICSTOREQ) +static isc_int64_t store_64; + +static void +do_storeq(isc_task_t *task, isc_event_t *ev) { + counter_t *state = (counter_t *)ev->ev_arg; + int i; + isc_uint8_t r; + isc_uint64_t val; + + r = random() % 256; + val = (((isc_uint64_t) r << 24) | + ((isc_uint64_t) r << 16) | + ((isc_uint64_t) r << 8) | + (isc_uint64_t) r); + val |= ((isc_uint64_t) val << 32); + + for (i = 0 ; i < COUNTS_PER_ITERATION ; i++) { + isc_atomic_storeq(&store_64, val); + } + + state->iteration++; + if (state->iteration < ITERATIONS) { + isc_task_send(task, &ev); + } else { + isc_event_free(&ev); + } +} + +static void +test_atomic_storeq() { + int test_result; + isc_task_t *tasks[TASKS]; + isc_event_t *event; + int i; + isc_uint8_t r; + isc_uint64_t val; + + t_assert("test_atomic_storeq", 1, T_REQUIRED, "%s", + "ensure that isc_atomic_storeq() works."); + + setup(); + + memset(counters, 0, sizeof(counters)); + store_64 = 0; + + /* + * Create our tasks, and allocate an event to get the counters going. + */ + for (i = 0 ; i < TASKS ; i++) { + tasks[i] = NULL; + CHECK(isc_task_create(task_manager, 0, &tasks[i])); + event = isc_event_allocate(mctx, NULL, 1000, do_storeq, + &counters[i], sizeof(struct isc_event)); + isc_task_sendanddetach(&tasks[i], &event); + } + + teardown(); + + test_result = T_PASS; + r = store_64 & 0xff; + val = (((isc_uint64_t) r << 24) | + ((isc_uint64_t) r << 16) | + ((isc_uint64_t) r << 8) | + (isc_uint64_t) r); + val |= ((isc_uint64_t) val << 32); + t_info("64-bit store 0x%"ISC_PRINT_QUADFORMAT"x, expected 0x%"ISC_PRINT_QUADFORMAT"x\n", + (isc_uint64_t) store_64, val); + if ((isc_uint64_t) store_64 != val) + test_result = T_FAIL; + t_result(test_result); + + store_64 = 0; +} +#endif /* ISC_PLATFORM_HAVEATOMICSTOREQ */ + testspec_t T_testlist[] = { #if defined(ISC_PLATFORM_HAVEXADD) @@ -195,6 +333,10 @@ testspec_t T_testlist[] = { #endif #if defined(ISC_PLATFORM_HAVEXADDQ) { (PFV) test_atomic_xaddq, "test_atomic_xaddq" }, +#endif + { (PFV) test_atomic_store, "test_atomic_store" }, +#if defined(ISC_PLATFORM_HAVEXADDQ) + { (PFV) test_atomic_storeq, "test_atomic_storeq" }, #endif { (PFV) 0, NULL } }; diff --git a/configure b/configure index 4284f78e51..864645b6bd 100755 --- a/configure +++ b/configure @@ -717,6 +717,7 @@ ISC_PLATFORM_USEMACASM ISC_PLATFORM_USESTDASM ISC_PLATFORM_USEOSFASM ISC_PLATFORM_USEGCCASM +ISC_PLATFORM_HAVEATOMICSTOREQ ISC_PLATFORM_HAVEATOMICSTORE ISC_PLATFORM_HAVECMPXCHG ISC_PLATFORM_HAVEXADDQ @@ -19854,14 +19855,17 @@ if test "$have_atomic" = "yes"; then ISC_PLATFORM_HAVEATOMICSTORE="#define ISC_PLATFORM_HAVEATOMICSTORE 1" if test "$have_xaddq" = "yes"; then ISC_PLATFORM_HAVEXADDQ="#define ISC_PLATFORM_HAVEXADDQ 1" + ISC_PLATFORM_HAVEATOMICSTOREQ="#define ISC_PLATFORM_HAVEATOMICSTOREQ 1" else ISC_PLATFORM_HAVEXADDQ="#undef ISC_PLATFORM_HAVEXADDQ" + ISC_PLATFORM_HAVEATOMICSTOREQ="#undef ISC_PLATFORM_HAVEATOMICSTOREQ" fi else ISC_PLATFORM_HAVEXADD="#undef ISC_PLATFORM_HAVEXADD" ISC_PLATFORM_HAVECMPXCHG="#undef ISC_PLATFORM_HAVECMPXCHG" ISC_PLATFORM_HAVEATOMICSTORE="#undef ISC_PLATFORM_HAVEATOMICSTORE" ISC_PLATFORM_HAVEXADDQ="#undef ISC_PLATFORM_HAVEXADDQ" + ISC_PLATFORM_HAVEATOMICSTOREQ="#undef ISC_PLATFORM_HAVEATOMICSTOREQ" fi @@ -19874,6 +19878,7 @@ fi + ISC_ARCH_DIR=$arch diff --git a/configure.in b/configure.in index 339022bde3..cf06cd9ad0 100644 --- a/configure.in +++ b/configure.in @@ -3841,20 +3841,24 @@ if test "$have_atomic" = "yes"; then ISC_PLATFORM_HAVEATOMICSTORE="#define ISC_PLATFORM_HAVEATOMICSTORE 1" if test "$have_xaddq" = "yes"; then ISC_PLATFORM_HAVEXADDQ="#define ISC_PLATFORM_HAVEXADDQ 1" + ISC_PLATFORM_HAVEATOMICSTOREQ="#define ISC_PLATFORM_HAVEATOMICSTOREQ 1" else ISC_PLATFORM_HAVEXADDQ="#undef ISC_PLATFORM_HAVEXADDQ" + ISC_PLATFORM_HAVEATOMICSTOREQ="#undef ISC_PLATFORM_HAVEATOMICSTOREQ" fi else ISC_PLATFORM_HAVEXADD="#undef ISC_PLATFORM_HAVEXADD" ISC_PLATFORM_HAVECMPXCHG="#undef ISC_PLATFORM_HAVECMPXCHG" ISC_PLATFORM_HAVEATOMICSTORE="#undef ISC_PLATFORM_HAVEATOMICSTORE" ISC_PLATFORM_HAVEXADDQ="#undef ISC_PLATFORM_HAVEXADDQ" + ISC_PLATFORM_HAVEATOMICSTOREQ="#undef ISC_PLATFORM_HAVEATOMICSTOREQ" fi AC_SUBST(ISC_PLATFORM_HAVEXADD) AC_SUBST(ISC_PLATFORM_HAVEXADDQ) AC_SUBST(ISC_PLATFORM_HAVECMPXCHG) AC_SUBST(ISC_PLATFORM_HAVEATOMICSTORE) +AC_SUBST(ISC_PLATFORM_HAVEATOMICSTOREQ) AC_SUBST(ISC_PLATFORM_USEGCCASM) AC_SUBST(ISC_PLATFORM_USEOSFASM) diff --git a/lib/isc/include/isc/platform.h.in b/lib/isc/include/isc/platform.h.in index 3431712c44..75cdec33a3 100644 --- a/lib/isc/include/isc/platform.h.in +++ b/lib/isc/include/isc/platform.h.in @@ -288,11 +288,17 @@ @ISC_PLATFORM_HAVEXADDQ@ /* - * If the "atomic swap" operation is available on this architecture, - * ISC_PLATFORM_HAVEATOMICSTORE" will be defined. + * If the 32-bit "atomic swap" operation is available on this + * architecture, ISC_PLATFORM_HAVEATOMICSTORE" will be defined. */ @ISC_PLATFORM_HAVEATOMICSTORE@ +/* + * If the 64-bit "atomic swap" operation is available on this + * architecture, ISC_PLATFORM_HAVEATOMICSTORE" will be defined. + */ +@ISC_PLATFORM_HAVEATOMICSTOREQ@ + /* * If the "compare-and-exchange" operation is available on this architecture, * ISC_PLATFORM_HAVECMPXCHG will be defined. diff --git a/lib/isc/stats.c b/lib/isc/stats.c index 3a685e59f8..61f3ec2e35 100644 --- a/lib/isc/stats.c +++ b/lib/isc/stats.c @@ -35,13 +35,45 @@ #define ISC_STATS_MAGIC ISC_MAGIC('S', 't', 'a', 't') #define ISC_STATS_VALID(x) ISC_MAGIC_VALID(x, ISC_STATS_MAGIC) -#ifndef ISC_STATS_USEMULTIFIELDS -#if defined(ISC_RWLOCK_USEATOMIC) && defined(ISC_PLATFORM_HAVEXADD) && !defined(ISC_PLATFORM_HAVEXADDQ) +/*% + * Local macro confirming prescence of 64-bit + * increment and store operations, just to make + * the later macros simpler + */ +#if defined(ISC_PLATFORM_HAVEXADDQ) && defined(ISC_PLATFORM_HAVEATOMICSTOREQ) +#define ISC_STATS_HAVEATOMICQ 1 +#else +#define ISC_STATS_HAVEATOMICQ 0 +#endif + +/*% + * Only lock the counters if 64-bit atomic operations are + * not available but cheap atomic lock operations are. + * On a modern 64-bit system this should never be the case. + * + * Normal locks are too expensive to be used whenever a counter + * is updated. + */ +#if !ISC_STATS_HAVEATOMICQ && defined(ISC_RWLOCK_HAVEATOMIC) +#define ISC_STATS_LOCKCOUNTERS 1 +#else +#define ISC_STATS_LOCKCOUNTERS 0 +#endif + +/*% + * If 64-bit atomic operations are not available but + * 32-bit operations are then split the counter into two, + * using the atomic operations to try to ensure that any carry + * from the low word is correctly carried into the high word. + * + * Otherwise, just rely on standard 64-bit data types + * and operations + */ +#if !ISC_STATS_HAVEATOMICQ && defined(ISC_PLATFORM_HAVEXADD) #define ISC_STATS_USEMULTIFIELDS 1 #else #define ISC_STATS_USEMULTIFIELDS 0 #endif -#endif /* ISC_STATS_USEMULTIFIELDS */ #if ISC_STATS_USEMULTIFIELDS typedef struct { @@ -65,7 +97,7 @@ struct isc_stats { * Locked by counterlock or unlocked if efficient rwlock is not * available. */ -#ifdef ISC_RWLOCK_USEATOMIC +#if ISC_STATS_LOCKCOUNTERS isc_rwlock_t counterlock; #endif isc_stat_t *counters; @@ -111,7 +143,7 @@ create_stats(isc_mem_t *mctx, int ncounters, isc_stats_t **statsp) { goto clean_counters; } -#ifdef ISC_RWLOCK_USEATOMIC +#if ISC_STATS_LOCKCOUNTERS result = isc_rwlock_init(&stats->counterlock, 0, 0); if (result != ISC_R_SUCCESS) goto clean_copiedcounters; @@ -131,7 +163,7 @@ create_stats(isc_mem_t *mctx, int ncounters, isc_stats_t **statsp) { clean_counters: isc_mem_put(mctx, stats->counters, sizeof(isc_stat_t) * ncounters); -#ifdef ISC_RWLOCK_USEATOMIC +#if ISC_STATS_LOCKCOUNTERS clean_copiedcounters: isc_mem_put(mctx, stats->copiedcounters, sizeof(isc_stat_t) * ncounters); @@ -177,7 +209,7 @@ isc_stats_detach(isc_stats_t **statsp) { sizeof(isc_stat_t) * stats->ncounters); UNLOCK(&stats->lock); DESTROYLOCK(&stats->lock); -#ifdef ISC_RWLOCK_USEATOMIC +#if ISC_STATS_LOCKCOUNTERS isc_rwlock_destroy(&stats->counterlock); #endif isc_mem_putanddetach(&stats->mctx, stats, sizeof(*stats)); @@ -198,7 +230,7 @@ static inline void incrementcounter(isc_stats_t *stats, int counter) { isc_int32_t prev; -#ifdef ISC_RWLOCK_USEATOMIC +#if ISC_STATS_LOCKCOUNTERS /* * We use a "read" lock to prevent other threads from reading the * counter while we "writing" a counter field. The write access itself @@ -219,7 +251,7 @@ incrementcounter(isc_stats_t *stats, int counter) { */ if (prev == (isc_int32_t)0xffffffff) isc_atomic_xadd((isc_int32_t *)&stats->counters[counter].hi, 1); -#elif defined(ISC_PLATFORM_HAVEXADDQ) +#elif ISC_STATS_HAVEATOMICQ UNUSED(prev); isc_atomic_xaddq((isc_int64_t *)&stats->counters[counter], 1); #else @@ -227,7 +259,7 @@ incrementcounter(isc_stats_t *stats, int counter) { stats->counters[counter]++; #endif -#ifdef ISC_RWLOCK_USEATOMIC +#if ISC_STATS_LOCKCOUNTERS isc_rwlock_unlock(&stats->counterlock, isc_rwlocktype_read); #endif } @@ -236,7 +268,7 @@ static inline void decrementcounter(isc_stats_t *stats, int counter) { isc_int32_t prev; -#ifdef ISC_RWLOCK_USEATOMIC +#if ISC_STATS_LOCKCOUNTERS isc_rwlock_lock(&stats->counterlock, isc_rwlocktype_read); #endif @@ -245,7 +277,7 @@ decrementcounter(isc_stats_t *stats, int counter) { if (prev == 0) isc_atomic_xadd((isc_int32_t *)&stats->counters[counter].hi, -1); -#elif defined(ISC_PLATFORM_HAVEXADDQ) +#elif ISC_STATS_HAVEATOMICQ UNUSED(prev); isc_atomic_xaddq((isc_int64_t *)&stats->counters[counter], -1); #else @@ -253,7 +285,7 @@ decrementcounter(isc_stats_t *stats, int counter) { stats->counters[counter]--; #endif -#ifdef ISC_RWLOCK_USEATOMIC +#if ISC_STATS_LOCKCOUNTERS isc_rwlock_unlock(&stats->counterlock, isc_rwlocktype_read); #endif } @@ -262,7 +294,7 @@ static void copy_counters(isc_stats_t *stats) { int i; -#ifdef ISC_RWLOCK_USEATOMIC +#if ISC_STATS_LOCKCOUNTERS /* * We use a "write" lock before "reading" the statistics counters as * an exclusive lock. @@ -270,19 +302,21 @@ copy_counters(isc_stats_t *stats) { isc_rwlock_lock(&stats->counterlock, isc_rwlocktype_write); #endif -#if ISC_STATS_USEMULTIFIELDS for (i = 0; i < stats->ncounters; i++) { +#if ISC_STATS_USEMULTIFIELDS stats->copiedcounters[i] = - (isc_uint64_t)(stats->counters[i].hi) << 32 | - stats->counters[i].lo; - } + (isc_uint64_t)(stats->counters[i].hi) << 32 | + stats->counters[i].lo; +#elif ISC_STATS_HAVEATOMICQ + /* use xaddq(..., 0) as an atomic load */ + stats->copiedcounters[i] = + (isc_uint64_t)isc_atomic_xaddq((isc_int64_t *)&stats->counters[i], 0); #else - UNUSED(i); - memmove(stats->copiedcounters, stats->counters, - stats->ncounters * sizeof(isc_stat_t)); + stats->copiedcounters[i] = stats->counters[i]; #endif + } -#ifdef ISC_RWLOCK_USEATOMIC +#if ISC_STATS_LOCKCOUNTERS isc_rwlock_unlock(&stats->counterlock, isc_rwlocktype_write); #endif } @@ -335,7 +369,7 @@ isc_stats_set(isc_stats_t *stats, isc_uint64_t val, REQUIRE(ISC_STATS_VALID(stats)); REQUIRE(counter < stats->ncounters); -#ifdef ISC_RWLOCK_USEATOMIC +#if ISC_STATS_LOCKCOUNTERS /* * We use a "write" lock before "reading" the statistics counters as * an exclusive lock. @@ -346,11 +380,13 @@ isc_stats_set(isc_stats_t *stats, isc_uint64_t val, #if ISC_STATS_USEMULTIFIELDS stats->counters[counter].hi = (isc_uint32_t)((val >> 32) & 0xffffffff); stats->counters[counter].lo = (isc_uint32_t)(val & 0xffffffff); +#elif ISC_STATS_HAVEATOMICQ + isc_atomic_storeq((isc_int64_t *)&stats->counters[counter], val); #else stats->counters[counter] = val; #endif -#ifdef ISC_RWLOCK_USEATOMIC +#if ISC_STATS_LOCKCOUNTERS isc_rwlock_unlock(&stats->counterlock, isc_rwlocktype_write); #endif } diff --git a/lib/isc/tests/print_test.c b/lib/isc/tests/print_test.c index 7d0342ba8a..19bcfaf85e 100644 --- a/lib/isc/tests/print_test.c +++ b/lib/isc/tests/print_test.c @@ -117,6 +117,12 @@ ATF_TC_BODY(snprintf, tc) { n = isc_print_snprintf(buf, sizeof(buf), "%zo", size); ATF_CHECK_EQ(n, 4); ATF_CHECK_STREQ(buf, "1750"); + + zz = 0xf5f5f5f5f5f5f5f5; + memset(buf, 0xff, sizeof(buf)); + n = isc_print_snprintf(buf, sizeof(buf), "0x%"ISC_PRINT_QUADFORMAT"x", zz); + ATF_CHECK_EQ(n, 18); + ATF_CHECK_STREQ(buf, "0xf5f5f5f5f5f5f5f5"); } ATF_TC(fprintf); diff --git a/lib/isc/win32/include/isc/atomic.h b/lib/isc/win32/include/isc/atomic.h index 1c99c44097..f0bcd901fe 100644 --- a/lib/isc/win32/include/isc/atomic.h +++ b/lib/isc/win32/include/isc/atomic.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2013, 2015 Internet Systems Consortium, Inc. ("ISC") * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -43,7 +43,7 @@ isc_atomic_xaddq(isc_int64_t *p, isc_int64_t val) { #endif /* - * This routine atomically stores the value 'val' in 'p'. + * This routine atomically stores the value 'val' in 'p' (32-bit version). */ #ifdef ISC_PLATFORM_HAVEATOMICSTORE static __inline void @@ -52,6 +52,16 @@ isc_atomic_store(isc_int32_t *p, isc_int32_t val) { } #endif +/* + * This routine atomically stores the value 'val' in 'p' (64-bit version). + */ +#ifdef ISC_PLATFORM_HAVEATOMICSTOREQ +static __inline void +isc_atomic_storeq(isc_int64_t *p, isc_int64_t val) { + (void) _InterlockedExchange64((__int64 *)p, (__int64)val); +} +#endif + /* * This routine atomically replaces the value in 'p' with 'val', if the * original value is equal to 'cmpval'. The original value is returned in any diff --git a/lib/isc/x86_32/include/isc/atomic.h b/lib/isc/x86_32/include/isc/atomic.h index bf2148cb33..fa0ec15dbd 100644 --- a/lib/isc/x86_32/include/isc/atomic.h +++ b/lib/isc/x86_32/include/isc/atomic.h @@ -62,7 +62,7 @@ isc_atomic_xaddq(isc_int64_t *p, isc_int64_t val) { #endif /* ISC_PLATFORM_HAVEXADDQ */ /* - * This routine atomically stores the value 'val' in 'p'. + * This routine atomically stores the value 'val' in 'p' (32-bit version). */ static __inline__ void isc_atomic_store(isc_int32_t *p, isc_int32_t val) { @@ -81,6 +81,28 @@ isc_atomic_store(isc_int32_t *p, isc_int32_t val) { : "memory"); } +#ifdef ISC_PLATFORM_HAVEATOMICSTOREQ +/* + * This routine atomically stores the value 'val' in 'p' (64-bit version). + */ +static __inline__ void +isc_atomic_storeq(isc_int64_t *p, isc_int64_t val) { + __asm__ volatile( +#ifdef ISC_PLATFORM_USETHREADS + /* + * xchg should automatically lock memory, but we add it + * explicitly just in case (it at least doesn't harm) + */ + "lock;" +#endif + + "xchgq %1, %0" + : + : "r"(val), "m"(*p) + : "memory"); +} +#endif /* ISC_PLATFORM_HAVEATOMICSTOREQ */ + /* * This routine atomically replaces the value in 'p' with 'val', if the * original value is equal to 'cmpval'. The original value is returned in any diff --git a/lib/isc/x86_64/include/isc/atomic.h b/lib/isc/x86_64/include/isc/atomic.h index f57bd2a786..3f4a5c3459 100644 --- a/lib/isc/x86_64/include/isc/atomic.h +++ b/lib/isc/x86_64/include/isc/atomic.h @@ -100,6 +100,9 @@ isc_atomic_cmpxchg(isc_int32_t *p, isc_int32_t cmpval, isc_int32_t val) { UNUSED(val); __asm ( + /* + * p is %rdi, cmpval is %esi, val is %edx. + */ "movl %edx, %ecx\n" "movl %esi, %eax\n" "movq %rdi, %rdx\n" @@ -108,8 +111,12 @@ isc_atomic_cmpxchg(isc_int32_t *p, isc_int32_t cmpval, isc_int32_t val) { "lock;" #endif /* - * If (%rdi) == %eax then (%rdi) := %edx. - * %eax is set to old (%ecx), which will be the return value. + * If [%rdi] == %eax then [%rdi] := %ecx (equal to %edx + * from above), and %eax is untouched (equal to %esi) + * from above. + * + * Else if [%rdi] != %eax then [%rdi] := [%rdi] + * (rewritten in write cycle) and %eax := [%rdi]. */ "cmpxchgl %ecx, (%rdx)" ); diff --git a/win32utils/Configure b/win32utils/Configure index 25fb03ff4e..2476d45c40 100644 --- a/win32utils/Configure +++ b/win32utils/Configure @@ -383,6 +383,7 @@ my @substdefh = ("AES_SIT", my %configdefp; my @substdefp = ("ISC_PLATFORM_HAVEATOMICSTORE", + "ISC_PLATFORM_HAVEATOMICSTOREQ", "ISC_PLATFORM_HAVECMPXCHG", "ISC_PLATFORM_HAVEXADD", "ISC_PLATFORM_HAVEXADDQ", @@ -1305,6 +1306,7 @@ if ($enable_intrinsics eq "yes") { $configdefp{"ISC_PLATFORM_HAVEXADD"} = 1; if ($want_x64 eq "yes") { $configdefp{"ISC_PLATFORM_HAVEXADDQ"} = 1; + $configdefp{"ISC_PLATFORM_HAVEATOMICSTOREQ"} = 1; } $configdefp{"ISC_PLATFORM_HAVEATOMICSTORE"} = 1; $configdefp{"ISC_PLATFORM_HAVECMPXCHG"} = 1;