From afb8a2bbc4ade1ccd45f84f15fcce0fcff487345 Mon Sep 17 00:00:00 2001 From: Martin Dimitrov Date: Tue, 19 May 2026 20:36:48 -0700 Subject: [PATCH] bitops: use unsigned char pointer for SIMD scan arithmetic Refactor redisBitposScanAVX512/AVX2 to take unsigned char *p instead of unsigned long *l, replacing the repeated (unsigned long*)((unsigned char*)l + N) casts with simple p += N pointer arithmetic. At the call sites, work with the existing 'c' char pointer throughout the SIMD phase and cast to unsigned long * only once before the scalar tail loop. --- src/bitops.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/bitops.c b/src/bitops.c index 83a90e935..816af5c80 100644 --- a/src/bitops.c +++ b/src/bitops.c @@ -390,17 +390,17 @@ static inline long long redisPopcountAuto(const unsigned char *p, long count) { * ----------------------------------------------------------------------- */ #ifdef HAVE_AVX512 ATTRIBUTE_TARGET_AVX512 -static unsigned long redisBitposScanAVX512(unsigned long *l, +static unsigned long redisBitposScanAVX512(unsigned char *p, unsigned long count, int bit) { unsigned long scanned = 0; __m512i skip = bit ? _mm512_setzero_si512() : _mm512_set1_epi64(-1LL); while (count >= 64) { - __m512i data = _mm512_loadu_si512(l); + __m512i data = _mm512_loadu_si512(p); __mmask8 eq = _mm512_cmpeq_epi64_mask(data, skip); if (eq != 0xFF) break; - l = (unsigned long *)((unsigned char *)l + 64); + p += 64; count -= 64; scanned += 64; } @@ -410,17 +410,17 @@ static unsigned long redisBitposScanAVX512(unsigned long *l, #ifdef HAVE_AVX2 ATTRIBUTE_TARGET_AVX2 -static unsigned long redisBitposScanAVX2(unsigned long *l, +static unsigned long redisBitposScanAVX2(unsigned char *p, unsigned long count, int bit) { unsigned long scanned = 0; __m256i skip = bit ? _mm256_setzero_si256() : _mm256_set1_epi8((char)0xFF); while (count >= 32) { - __m256i data = _mm256_loadu_si256((__m256i *)l); + __m256i data = _mm256_loadu_si256((__m256i *)p); int eq = _mm256_movemask_epi8(_mm256_cmpeq_epi8(data, skip)); if (eq != -1) break; - l = (unsigned long *)((unsigned char *)l + 32); + p += 32; count -= 32; scanned += 32; } @@ -468,7 +468,6 @@ long long redisBitpos(void *s, unsigned long count, int bit) { /* Skip bits with full word step. Use SIMD when available for the * bulk of the scan, then fall through to scalar for the tail. */ - l = (unsigned long*) c; if (!found) { skipval = bit ? 0 : ULONG_MAX; @@ -478,8 +477,8 @@ long long redisBitpos(void *s, unsigned long count, int bit) { #if defined(HAVE_AVX512) if (BITOP_USE_AVX512 && count >= 64) { - unsigned long advanced = redisBitposScanAVX512(l, count, bit); - l = (unsigned long*)((unsigned char*)l + advanced); + unsigned long advanced = redisBitposScanAVX512(c, count, bit); + c += advanced; count -= advanced; pos += advanced * 8; useAVX = 1; @@ -488,8 +487,8 @@ long long redisBitpos(void *s, unsigned long count, int bit) { #if defined(HAVE_AVX2) if (!useAVX && BITOP_USE_AVX2 && count >= 32) { - unsigned long advanced = redisBitposScanAVX2(l, count, bit); - l = (unsigned long*)((unsigned char*)l + advanced); + unsigned long advanced = redisBitposScanAVX2(c, count, bit); + c += advanced; count -= advanced; pos += advanced * 8; } @@ -497,12 +496,14 @@ long long redisBitpos(void *s, unsigned long count, int bit) { /* Scalar word-at-a-time scan handles the tail after SIMD and * serves as the sole scan path when SIMD is unavailable. */ + l = (unsigned long *)c; while (count >= sizeof(*l)) { if (*l != skipval) break; l++; count -= sizeof(*l); pos += sizeof(*l)*8; } + c = (unsigned char *)l; } /* Load bytes into "word" considering the first byte as the most significant @@ -512,7 +513,6 @@ long long redisBitpos(void *s, unsigned long count, int bit) { * * Note that the loading is designed to work even when the bytes left * (count) are less than a full word. We pad it with zero on the right. */ - c = (unsigned char*)l; for (j = 0; j < sizeof(*l); j++) { word <<= 8; if (count) {