haproxy/include/haproxy/pool-t.h
Willy Tarreau 0bae075928 MEDIUM: pools: add CONFIG_HAP_NO_GLOBAL_POOLS and CONFIG_HAP_GLOBAL_POOLS
We've reached a point where the global pools represent a significant
bottleneck with threads. On a 64-core machine, the performance was
divided by 8 between 32 and 64 H2 connections only because there were
not enough entries in the local caches to avoid picking from the global
pools, and the contention on the list there was very high. It becomes
obvious that we need to have an array of lists, but that will require
more changes.

In parallel, standard memory allocators have improved, with tcmalloc
and jemalloc finding their ways through mainstream systems, and glibc
having upgraded to a thread-aware ptmalloc variant, keeping this level
of contention here isn't justified anymore when we have both the local
per-thread pool caches and a fast process-wide allocator.

For these reasons, this patch introduces a new compile time setting
CONFIG_HAP_NO_GLOBAL_POOLS which is set by default when threads are
enabled with thread local pool caches, and we know we have a fast
thread-aware memory allocator (currently set for glibc>=2.26). In this
case we entirely bypass the global pool and directly use the standard
memory allocator when missing objects from the local pools. It is also
possible to force it at compile time when a good allocator is used with
another setup.

It is still possible to re-enable the global pools using
CONFIG_HAP_GLOBAL_POOLS, if a corner case is discovered regarding the
operating system's default allocator, or when building with a recent
libc but a different allocator which provides other benefits but does
not scale well with threads.
2021-03-05 08:30:08 +01:00

130 lines
4.4 KiB
C

/*
* include/haproxy/pool-t.h
* Memory pools configuration and type definitions.
*
* Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, version 2.1
* exclusively.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _HAPROXY_POOL_T_H
#define _HAPROXY_POOL_T_H
#include <haproxy/api-t.h>
#include <haproxy/list-t.h>
#include <haproxy/thread-t.h>
/* On architectures supporting threads and double-word CAS, we can implement
* lock-less memory pools. This isn't supported for debugging modes however.
*/
#if defined(USE_THREAD) && defined(HA_HAVE_CAS_DW) && !defined(DEBUG_NO_LOCKLESS_POOLS) && !defined(DEBUG_UAF) && !defined(DEBUG_FAIL_ALLOC)
#define CONFIG_HAP_LOCKLESS_POOLS
#endif
/* On architectures supporting threads we can amortize the locking cost using
* local pools.
*/
#if defined(USE_THREAD) && !defined(DEBUG_NO_LOCAL_POOLS) && !defined(DEBUG_UAF) && !defined(DEBUG_FAIL_ALLOC)
#define CONFIG_HAP_LOCAL_POOLS
#endif
/* On modern architectures with many threads, a fast memory allocator, and
* local pools, the global pools with their single list can be way slower than
* the standard allocator which already has its own per-thread arenas. In this
* case we disable global pools. The global pools may still be enforced
* using CONFIG_HAP_GLOBAL_POOLS though.
*/
#if defined(USE_THREAD) && defined(HA_HAVE_FAST_MALLOC) && defined(CONFIG_HAP_LOCAL_POOLS) && !defined(CONFIG_HAP_GLOBAL_POOLS)
#define CONFIG_HAP_NO_GLOBAL_POOLS
#endif
/* Pools of very similar size are shared by default, unless macro
* DEBUG_DONT_SHARE_POOLS is set.
*/
#ifndef DEBUG_DONT_SHARE_POOLS
#define MEM_F_SHARED 0x1
#else
#define MEM_F_SHARED 0
#endif
#define MEM_F_EXACT 0x2
/* By default, free objects are linked by a pointer stored at the beginning of
* the memory area. When DEBUG_MEMORY_POOLS is set, the allocated area is
* inflated by the size of a pointer so that the link is placed at the end
* of the objects. Hence free objects in pools remain intact. In addition,
* this location is used to keep a pointer to the pool the object was
* allocated from, and verify it's freed into the appropriate one.
*/
#ifdef DEBUG_MEMORY_POOLS
#define POOL_EXTRA (sizeof(void *))
#define POOL_LINK(pool, item) (void **)(((char *)(item)) + ((pool)->size))
#else
#define POOL_EXTRA (0)
#define POOL_LINK(pool, item) ((void **)(item))
#endif
#ifndef MAX_BASE_POOLS
#define MAX_BASE_POOLS 64
#endif
#define POOL_AVG_SAMPLES 1024
struct pool_cache_head {
struct list list; /* head of objects in this pool */
size_t size; /* size of an object */
unsigned int count; /* number of objects in this pool */
};
struct pool_cache_item {
struct list by_pool; /* link to objects in this pool */
struct list by_lru; /* link to objects by LRU order */
};
struct pool_free_list {
void **free_list;
uintptr_t seq;
};
/* Note below, in case of lockless pools, we still need the lock only for
* the flush() operation.
*/
struct pool_head {
void **free_list;
#ifdef CONFIG_HAP_LOCKLESS_POOLS
uintptr_t seq;
#endif
__decl_thread(HA_SPINLOCK_T lock); /* the spin lock */
unsigned int used; /* how many chunks are currently in use */
unsigned int needed_avg;/* floating indicator between used and allocated */
unsigned int allocated; /* how many chunks have been allocated */
unsigned int limit; /* hard limit on the number of chunks */
unsigned int minavail; /* how many chunks are expected to be used */
unsigned int size; /* chunk size */
unsigned int flags; /* MEM_F_* */
unsigned int users; /* number of pools sharing this zone */
unsigned int failed; /* failed allocations */
struct list list; /* list of all known pools */
char name[12]; /* name of the pool */
} __attribute__((aligned(64)));
#endif /* _HAPROXY_POOL_T_H */
/*
* Local variables:
* c-indent-level: 8
* c-basic-offset: 8
* End:
*/