Add MSan and integrate it with CI (#13916)

## Description
Memory sanitizer (MSAN) is used to detect use-of-uninitialized memory
issues. While Address Sanitizer catches a wide range of memory safety
issues, it doesn't specifically detect uninitialized memory usage.
Therefore, Memory Sanitizer complements Address Sanitizer. This PR adds
MSAN run to the daily build, with the possibility of incorporating it
into the ci.yml workflow in the future if needed.

Changes in source files fix false-positive issues and they should not
introduce any runtime implications.

Note: Valgrind performs similar checks to both ASAN and MSAN but
sanitizers run significantly faster.

## Limitations
- Memory sanitizer is only supported by Clang.
- MSAN documentation states that all dependencies, including the
standard library, must be compiled with MSAN. However, it also mentions
there are interceptors for common libc functions, so compiling the
standard library with the MSAN flag is not strictly necessary.
Therefore, we are not compiling libc with MSAN.

---------

Co-authored-by: Ozan Tezcan <ozantezcan@gmail.com>
This commit is contained in:
Mincho Paskalev 2025-05-09 11:44:54 +03:00 committed by GitHub
parent 8148e4116e
commit fdbf88032c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 147 additions and 47 deletions

View file

@ -606,6 +606,50 @@ jobs:
if: true && !contains(github.event.inputs.skiptests, 'unittest')
run: ./src/redis-server test all
test-sanitizer-memory:
runs-on: ubuntu-latest
if: |
(github.event_name == 'workflow_dispatch' || (github.event_name != 'workflow_dispatch' && github.repository == 'redis/redis')) &&
!contains(github.event.inputs.skipjobs, 'sanitizer')
timeout-minutes: 14400
env:
CC: clang # MSan work only with clang
steps:
- name: prep
if: github.event_name == 'workflow_dispatch'
run: |
echo "GITHUB_REPOSITORY=${{github.event.inputs.use_repo}}" >> $GITHUB_ENV
echo "GITHUB_HEAD_REF=${{github.event.inputs.use_git_ref}}" >> $GITHUB_ENV
echo "skipjobs: ${{github.event.inputs.skipjobs}}"
echo "skiptests: ${{github.event.inputs.skiptests}}"
echo "test_args: ${{github.event.inputs.test_args}}"
echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}"
- uses: actions/checkout@v4
with:
repository: ${{ env.GITHUB_REPOSITORY }}
ref: ${{ env.GITHUB_HEAD_REF }}
- name: make
run: make SANITIZER=memory REDIS_CFLAGS='-DREDIS_TEST -Werror -DDEBUG_ASSERTIONS'
- name: testprep
run: |
sudo apt-get update
sudo apt-get install tcl8.6 tclx -y
- name: test
if: true && !contains(github.event.inputs.skiptests, 'redis')
run: ./runtest --accurate --verbose --dump-logs ${{github.event.inputs.test_args}}
- name: module api test
if: true && !contains(github.event.inputs.skiptests, 'modules')
run: SANITIZER=memory CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
- name: sentinel tests
if: true && !contains(github.event.inputs.skiptests, 'sentinel')
run: ./runtest-sentinel ${{github.event.inputs.cluster_test_args}}
- name: cluster tests
if: true && !contains(github.event.inputs.skiptests, 'cluster')
run: ./runtest-cluster ${{github.event.inputs.cluster_test_args}}
- name: unittest
if: true && !contains(github.event.inputs.skiptests, 'unittest')
run: ./src/redis-server test all
test-sanitizer-undefined:
runs-on: ubuntu-latest
if: |

31
deps/Makefile vendored
View file

@ -12,6 +12,23 @@ BINCOLOR="\033[37;1m"
MAKECOLOR="\033[32;1m"
ENDCOLOR="\033[0m"
DEPS_CFLAGS := $(CFLAGS)
DEPS_LDFLAGS := $(LDFLAGS)
CLANG := $(findstring clang,$(shell sh -c '$(CC) --version | head -1'))
# MSan looks for errors related to uninitialized memory.
# Make sure to build the dependencies with MSan as it needs all the code to be instrumented.
# A library could be used to initialize memory but if it's not build with --fsanitize=memory then
# MSan doesn't know about it and will spit false positive error when that memory is then used.
ifeq ($(SANITIZER),memory)
ifeq (clang, $(CLANG))
DEPS_CFLAGS+=-fsanitize=memory -fsanitize-memory-track-origins=2 -fno-sanitize-recover=all -fno-omit-frame-pointer
DEPS_LDFLAGS+=-fsanitize=memory
else
$(error "MemorySanitizer needs to be compiled and linked with clang. Please use CC=clang")
endif
endif
default:
@echo "Explicit target required"
@ -53,31 +70,31 @@ endif
hiredis: .make-prerequisites
@printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR)
cd hiredis && $(MAKE) static $(HIREDIS_MAKE_FLAGS)
cd hiredis && $(MAKE) static $(HIREDIS_MAKE_FLAGS) CFLAGS="$(DEPS_CFLAGS)" LDFLAGS="$(DEPS_LDFLAGS)"
.PHONY: hiredis
linenoise: .make-prerequisites
@printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR)
cd linenoise && $(MAKE)
cd linenoise && $(MAKE) CFLAGS="$(DEPS_CFLAGS)" LDFLAGS="$(DEPS_LDFLAGS)"
.PHONY: linenoise
hdr_histogram: .make-prerequisites
@printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR)
cd hdr_histogram && $(MAKE)
cd hdr_histogram && $(MAKE) CFLAGS="$(DEPS_CFLAGS)" LDFLAGS="$(DEPS_LDFLAGS)"
.PHONY: hdr_histogram
fpconv: .make-prerequisites
@printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR)
cd fpconv && $(MAKE)
cd fpconv && $(MAKE) CFLAGS="$(DEPS_CFLAGS)" LDFLAGS="$(DEPS_LDFLAGS)"
.PHONY: fpconv
fast_float: .make-prerequisites
@printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR)
cd fast_float && $(MAKE) libfast_float
cd fast_float && $(MAKE) libfast_float CFLAGS="$(DEPS_CFLAGS)" LDFLAGS="$(DEPS_LDFLAGS)"
.PHONY: fast_float
@ -86,8 +103,8 @@ ifeq ($(uname_S),SunOS)
LUA_CFLAGS= -D__C99FEATURES__=1
endif
LUA_CFLAGS+= -Wall -DLUA_ANSI -DENABLE_CJSON_GLOBAL -DREDIS_STATIC='' -DLUA_USE_MKSTEMP $(CFLAGS)
LUA_LDFLAGS+= $(LDFLAGS)
LUA_CFLAGS+= -Wall -DLUA_ANSI -DENABLE_CJSON_GLOBAL -DREDIS_STATIC='' -DLUA_USE_MKSTEMP $(DEPS_CFLAGS)
LUA_LDFLAGS+= $(DEPS_LDFLAGS)
ifeq ($(LUA_DEBUG),yes)
LUA_CFLAGS+= -O0 -g -DLUA_USE_APICHECK
else

View file

@ -118,12 +118,24 @@ else
ifeq ($(SANITIZER),thread)
CFLAGS+=-fsanitize=thread -fno-sanitize-recover=all -fno-omit-frame-pointer
LDFLAGS+=-fsanitize=thread
else
ifeq ($(SANITIZER),memory)
ifeq (clang, $(CLANG))
export CXX:=clang
export LD:=clang
MALLOC=libc # MSan provides its own allocator so make sure not to use jemalloc as they clash
CFLAGS+=-fsanitize=memory -fsanitize-memory-track-origins=2 -fno-sanitize-recover=all -fno-omit-frame-pointer
LDFLAGS+=-fsanitize=memory
else
$(error "MemorySanitizer needs to be compiled and linked with clang. Please use CC=clang")
endif
else
$(error "unknown sanitizer=${SANITIZER}")
endif
endif
endif
endif
endif
# Override default settings if possible
-include .make-settings

View file

@ -168,6 +168,12 @@
#define REDIS_NO_SANITIZE(sanitizer)
#endif
#if defined(__clang__)
#define REDIS_NO_SANITIZE_MSAN(sanitizer) REDIS_NO_SANITIZE(sanitizer)
#else
#define REDIS_NO_SANITIZE_MSAN(sanitizer)
#endif
/* Define rdb_fsync_range to sync_file_range() on Linux, otherwise we use
* the plain fsync() call. */
#if (defined(__linux__) && defined(SYNC_FILE_RANGE_WAIT_BEFORE))

View file

@ -1378,6 +1378,7 @@ static void* getAndSetMcontextEip(ucontext_t *uc, void *eip) {
#undef NOT_SUPPORTED
}
REDIS_NO_SANITIZE_MSAN("memory")
REDIS_NO_SANITIZE("address")
void logStackContent(void **sp) {
if (server.hide_user_data_from_log) {
@ -2729,7 +2730,7 @@ static size_t get_ready_to_signal_threads_tids(int sig_num, pid_t tids[TIDS_MAX_
pid_t calling_tid = syscall(SYS_gettid);
int current_thread_index = -1;
long nread;
char buff[PATH_MAX];
char buff[PATH_MAX] = {0};
/* readdir() is not async-signal-safe (AS-safe).
Hence, we read the file using SYS_getdents64, which is considered AS-sync*/

View file

@ -96,6 +96,12 @@
# define NO_SANITIZE(sanitizer)
#endif
#if defined(__clang__)
#define NO_SANITIZE_MSAN(sanitizer) NO_SANITIZE(sanitizer)
#else
#define NO_SANITIZE_MSAN(sanitizer)
#endif
/*
* compressed format
*
@ -105,6 +111,7 @@
*
*/
NO_SANITIZE("alignment")
NO_SANITIZE_MSAN("memory")
size_t
lzf_compress (const void *const in_data, size_t in_len,
void *out_data, size_t out_len

View file

@ -36,16 +36,6 @@
#define ULONG_ZEROONE 0x5555555555555555UL
#endif
#if defined(__has_attribute)
#if __has_attribute(no_sanitize)
#define NO_SANITIZE(sanitizer) __attribute__((no_sanitize(sanitizer)))
#endif
#endif
#if !defined(NO_SANITIZE)
#define NO_SANITIZE(sanitizer)
#endif
static struct winsize ws;
size_t progress_printed; /* Printed chars in screen-wide progress bar. */
size_t progress_full; /* How many chars to write to fill the progress bar. */
@ -268,7 +258,7 @@ int memtest_test(unsigned long *m, size_t bytes, int passes, int interactive) {
* the cache. */
#define MEMTEST_DECACHE_SIZE (1024*8)
NO_SANITIZE("undefined")
REDIS_NO_SANITIZE("undefined")
int memtest_preserving_test(unsigned long *m, size_t bytes, int passes) {
unsigned long backup[MEMTEST_BACKUP_WORDS];
unsigned long *p = m;

View file

@ -438,6 +438,7 @@ static void clientDone(client c) {
}
}
REDIS_NO_SANITIZE_MSAN("memory")
static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
client c = privdata;
void *reply = NULL;

View file

@ -74,8 +74,8 @@ static void maybeConvertToIntset(robj *set) {
if (setTypeSize(set) > intsetMaxEntries()) return; /* can't use intset */
intset *is = intsetNew();
char *str;
size_t len;
int64_t llval;
size_t len = 0;
int64_t llval = 0;
setTypeIterator *si = setTypeInitIterator(set);
while (setTypeNext(si, &str, &len, &llval) != -1) {
if (str) {
@ -371,7 +371,7 @@ int setTypeNext(setTypeIterator *si, char **str, size_t *len, int64_t *llele) {
}
if (lpi == NULL) return -1;
si->lpi = lpi;
unsigned int l;
unsigned int l = 0;
*str = (char *)lpGetValue(lpi, &l, (long long *)llele);
*len = (size_t)l;
} else {
@ -388,9 +388,9 @@ int setTypeNext(setTypeIterator *si, char **str, size_t *len, int64_t *llele) {
* This function is the way to go for write operations where COW is not
* an issue. */
sds setTypeNextObject(setTypeIterator *si) {
int64_t intele;
int64_t intele = 0;
char *str;
size_t len;
size_t len = 0;
if (setTypeNext(si, &str, &len, &intele) == -1) return NULL;
if (str != NULL) return sdsnewlen(str, len);
@ -522,8 +522,8 @@ int setTypeConvertAndExpand(robj *setobj, int enc, unsigned long cap, int panic)
}
unsigned char *lp = lpNew(estcap);
char *str;
size_t len;
int64_t llele;
size_t len = 0;
int64_t llele = 0;
si = setTypeInitIterator(setobj);
while (setTypeNext(si, &str, &len, &llele) != -1) {
if (str != NULL)
@ -574,8 +574,8 @@ robj *setTypeDup(robj *o) {
dictExpand(set->ptr, dictSize(d));
si = setTypeInitIterator(o);
char *str;
size_t len;
int64_t intobj;
size_t len = 0;
int64_t intobj = 0;
while (setTypeNext(si, &str, &len, &intobj) != -1) {
setTypeAdd(set, (sds)str);
}
@ -818,8 +818,8 @@ void spopWithCountCommand(client *c) {
/* Common iteration vars. */
char *str;
size_t len;
int64_t llele;
size_t len = 0;
int64_t llele = 0;
unsigned long remaining = size-count; /* Elements left after SPOP. */
/* If we are here, the number of requested elements is less than the
@ -839,7 +839,7 @@ void spopWithCountCommand(client *c) {
unsigned char **ps = zmalloc(sizeof(char *) * count);
for (unsigned long i = 0; i < count; i++) {
p = lpNextRandom(lp, p, &index, count - i, 1);
unsigned int len;
unsigned int len = 0;
str = (char *)lpGetValue(p, &len, (long long *)&llele);
if (str) {
@ -903,7 +903,7 @@ void spopWithCountCommand(client *c) {
unsigned char **ps = zmalloc(sizeof(char *) * remaining);
for (unsigned long i = 0; i < remaining; i++) {
p = lpNextRandom(lp, p, &index, remaining - i, 1);
unsigned int len;
unsigned int len = 0;
str = (char *)lpGetValue(p, &len, (long long *)&llele);
setTypeAddAux(newset, str, len, llele, 0);
ps[i] = p;
@ -1030,8 +1030,8 @@ void srandmemberWithCountCommand(client *c) {
int uniq = 1;
robj *set;
char *str;
size_t len;
int64_t llele;
size_t len = 0;
int64_t llele = 0;
dict *d;
@ -1232,8 +1232,8 @@ void srandmemberWithCountCommand(client *c) {
void srandmemberCommand(client *c) {
robj *set;
char *str;
size_t len;
int64_t llele;
size_t len = 0;
int64_t llele = 0;
if (c->argc == 3) {
srandmemberWithCountCommand(c);
@ -1288,8 +1288,8 @@ void sinterGenericCommand(client *c, robj **setkeys,
setTypeIterator *si;
robj *dstset = NULL;
char *str;
size_t len;
int64_t intobj;
size_t len = 0;
int64_t intobj = 0;
void *replylen = NULL;
unsigned long j, cardinality = 0;
int encoding, empty = 0;
@ -1453,8 +1453,8 @@ void sinterCommand(client *c) {
void smembersCommand(client *c) {
setTypeIterator *si;
char *str;
size_t len;
int64_t intobj;
size_t len = 0;
int64_t intobj = 0;
robj *setobj = lookupKeyRead(c->db, c->argv[1]);
if (checkType(c,setobj,OBJ_SET)) return;
if (!setobj) {
@ -1523,8 +1523,8 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
robj *dstset = NULL;
int dstset_encoding = OBJ_ENCODING_INTSET;
char *str;
size_t len;
int64_t llval;
size_t len = 0;
int64_t llval = 0;
int encoding;
int j, cardinality = 0;
int diff_algo = 1;

View file

@ -257,7 +257,7 @@ static inline int64_t lpGetIntegerIfValid(unsigned char *ele, int *valid) {
/* The following code path should never be used for how listpacks work:
* they should always be able to store an int64_t value in integer
* encoded form. However the implementation may change. */
long long ll;
long long ll = 0;
int ret = string2ll((char*)e,v,&ll);
if (valid)
*valid = ret;

View file

@ -11,9 +11,27 @@ else # Linux, others
SHOBJ_LDFLAGS ?= -shared
endif
CLANG := $(findstring clang,$(shell sh -c '$(CC) --version | head -1'))
ifeq ($(SANITIZER),memory)
ifeq (clang, $(CLANG))
LD=clang
MALLOC=libc
CFLAGS+=-fsanitize=memory -fsanitize-memory-track-origins=2 -fno-sanitize-recover=all -fno-omit-frame-pointer
LDFLAGS+=-fsanitize=memory
else
$(error "MemorySanitizer needs to be compiled and linked with clang. Please use CC=clang")
endif
endif
# This is a hack to override the default CC. When running with SANITIZER=memory
# tough we want to keep the compiler as clang as MSan is not supported for gcc
ifeq ($(uname_S),Linux)
LD = gcc
CC = gcc
ifneq ($(SANITIZER),memory)
LD = gcc
CC = gcc
endif
endif
# OS X 11.x doesn't have /usr/lib/libSystem.dylib and needs an explicit setting.

View file

@ -294,7 +294,7 @@ proc spawn_server {config_file stdout stderr args} {
# ASAN_OPTIONS environment variable is for address sanitizer. If a test
# tries to allocate huge memory area and expects allocator to return
# NULL, address sanitizer throws an error without this setting.
set pid [exec /usr/bin/env ASAN_OPTIONS=allocator_may_return_null=1 {*}$cmd >> $stdout 2>> $stderr &]
set pid [exec /usr/bin/env ASAN_OPTIONS=allocator_may_return_null=1 MSAN_OPTIONS=allocator_may_return_null=1 {*}$cmd >> $stdout 2>> $stderr &]
}
if {$::wait_server} {

View file

@ -70,11 +70,15 @@ proc sanitizer_errors_from_file {filename} {
set lines [split [exec cat $filename] "\n"]
foreach line $lines {
# Ignore huge allocation warnings
# Ignore huge allocation warnings for both ASan and MSan
if ([string match {*WARNING: AddressSanitizer failed to allocate*} $line]) {
continue
}
if ([string match {*WARNING: MemorySanitizer failed to allocate*} $line]) {
continue
}
# GCC UBSAN output does not contain 'Sanitizer' but 'runtime error'.
if {[string match {*runtime error*} $line] ||
[string match {*Sanitizer*} $line]} {