haproxy/include/types/global.h

346 lines
14 KiB
C
Raw Normal View History

/*
* include/types/global.h
* Global variables.
*
* Copyright (C) 2000-2012 Willy Tarreau - w@1wt.eu
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, version 2.1
* exclusively.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _TYPES_GLOBAL_H
#define _TYPES_GLOBAL_H
#include <netinet/in.h>
#include <common/config.h>
#include <common/initcall.h>
#include <common/hathreads.h>
#include <common/standard.h>
#include <types/listener.h>
#include <types/proxy.h>
#include <types/task.h>
#include <types/vars.h>
#ifndef UNIX_MAX_PATH
#define UNIX_MAX_PATH 108
#endif
/* modes of operation (global.mode) */
#define MODE_DEBUG 0x01
#define MODE_DAEMON 0x02
#define MODE_QUIET 0x04
#define MODE_CHECK 0x08
#define MODE_VERBOSE 0x10
#define MODE_STARTING 0x20
#define MODE_FOREGROUND 0x40
#define MODE_MWORKER 0x80 /* Master Worker */
#define MODE_MWORKER_WAIT 0x100 /* Master Worker wait mode */
/* list of last checks to perform, depending on config options */
#define LSTCHK_CAP_BIND 0x00000001 /* check that we can bind to any port */
#define LSTCHK_NETADM 0x00000002 /* check that we have CAP_NET_ADMIN */
/* Global tuning options */
/* available polling mechanisms */
#define GTUNE_USE_SELECT (1<<0)
#define GTUNE_USE_POLL (1<<1)
#define GTUNE_USE_EPOLL (1<<2)
#define GTUNE_USE_KQUEUE (1<<3)
/* platform-specific options */
#define GTUNE_USE_SPLICE (1<<4)
#define GTUNE_USE_GAI (1<<5)
#define GTUNE_USE_REUSEPORT (1<<6)
#define GTUNE_RESOLVE_DONTFAIL (1<<7)
#define GTUNE_SOCKET_TRANSFER (1<<8)
#define GTUNE_NOEXIT_ONFAILURE (1<<9)
MEDIUM: mworker: Add systemd `Type=notify` support This patch adds support for `Type=notify` to the systemd unit. Supporting `Type=notify` improves both starting as well as reloading of the unit, because systemd will be let known when the action completed. See this quote from `systemd.service(5)`: > Note however that reloading a daemon by sending a signal (as with the > example line above) is usually not a good choice, because this is an > asynchronous operation and hence not suitable to order reloads of > multiple services against each other. It is strongly recommended to > set ExecReload= to a command that not only triggers a configuration > reload of the daemon, but also synchronously waits for it to complete. By making systemd aware of a reload in progress it is able to wait until the reload actually succeeded. This patch introduces both a new `USE_SYSTEMD` build option which controls including the sd-daemon library as well as a `-Ws` runtime option which runs haproxy in master-worker mode with systemd support. When haproxy is running in master-worker mode with systemd support it will send status messages to systemd using `sd_notify(3)` in the following cases: - The master process forked off the worker processes (READY=1) - The master process entered the `mworker_reload()` function (RELOADING=1) - The master process received the SIGUSR1 or SIGTERM signal (STOPPING=1) Change the unit file to specify `Type=notify` and replace master-worker mode (`-W`) with master-worker mode with systemd support (`-Ws`). Future evolutions of this feature could include making use of the `STATUS` feature of `sd_notify()` to send information about the number of active connections to systemd. This would require bidirectional communication between the master and the workers and thus is left for future work.
2017-11-20 09:58:35 -05:00
#define GTUNE_USE_SYSTEMD (1<<10)
MINOR: polling: add an option to support busy polling In some situations, especially when dealing with low latency on processors supporting a variable frequency or when running inside virtual machines, each time the process waits for an I/O using the poller, the processor goes back to sleep or is offered to another VM for a long time, and it causes excessively high latencies. A solution to this provided by this patch is to enable busy polling using a global option. When busy polling is enabled, the pollers never sleep and loop over themselves waiting for an I/O event to happen or for a timeout to occur. On multi-processor machines it can significantly overheat the processor but it usually results in much lower latencies. A typical test consisting in injecting traffic over a single connection at a time over the loopback shows a bump from 4640 to 8540 connections per second on forwarded connections, indicating a latency reduction of 98 microseconds for each connection, and a bump from 12500 to 21250 for locally terminated connections (redirects), indicating a reduction of 33 microseconds. It is only usable with epoll and kqueue because select() and poll()'s API is not convenient for such usages, and the level of performance they are used in doesn't benefit from this anyway. The option, which obviously remains disabled by default, can be turned on using "busy-polling" in the global section, and turned off later using "no busy-polling". Its status is reported in "show info" to help troubleshooting suspicious CPU spikes.
2018-11-22 12:07:59 -05:00
#define GTUNE_BUSY_POLLING (1<<11)
#define GTUNE_LISTENER_MQ (1<<12)
#define GTUNE_SET_DUMPABLE (1<<13)
#define GTUNE_USE_EVPORTS (1<<14)
#define GTUNE_STRICT_LIMITS (1<<15)
MEDIUM: init: prevent process and thread creation at runtime Some concerns are regularly raised about the risk to inherit some Lua files which make use of a fork (e.g. via os.execute()) as well as whether or not some of bugs we fix might or not be exploitable to run some code. Given that haproxy is event-driven, any foreground activity completely stops processing and is easy to detect, but background activity is a different story. A Lua script could very well discretely fork a sub-process connecting to a remote location and taking commands, and some injected code could also try to hide its activity by creating a process or a thread without blocking the rest of the processing. While such activities should be extremely limited when run in an empty chroot without any permission, it would be better to get a higher assurance they cannot happen. This patch introduces something very simple: it limits the number of processes and threads to zero in the workers after the last thread was created. By doing so, it effectively instructs the system to fail on any fork() or clone() syscall. Thus any undesired activity has to happen in the foreground and is way easier to detect. This will obviously break external checks (whose concept is already totally insecure), and for this reason a new option "insecure-fork-wanted" was added to disable this protection, and it is suggested in the fork() error report from the checks. It is obviously recommended not to use it and to reconsider the reasons leading to it being enabled in the first place. If for any reason we fail to disable forks, we still start because it could be imaginable that some operating systems refuse to set this limit to zero, but in this case we emit a warning, that may or may not be reported since we're after the fork point. Ideally over the long term it should be conditionned by strict-limits and cause a hard fail.
2019-12-03 01:07:36 -05:00
#define GTUNE_INSECURE_FORK (1<<16)
#define GTUNE_INSECURE_SETUID (1<<17)
/* SSL server verify mode */
enum {
SSL_SERVER_VERIFY_NONE = 0,
SSL_SERVER_VERIFY_REQUIRED = 1,
};
/* FIXME : this will have to be redefined correctly */
struct global {
int uid;
int gid;
int external_check;
int nbproc;
int nbthread;
unsigned int hard_stop_after; /* maximum time allowed to perform a soft-stop */
int maxconn, hardmaxconn;
int maxsslconn;
int ssl_session_max_cost; /* how many bytes an SSL session may cost */
int ssl_handshake_max_cost; /* how many bytes an SSL handshake may use */
int ssl_used_frontend; /* non-zero if SSL is used in a frontend */
int ssl_used_backend; /* non-zero if SSL is used in a backend */
int ssl_used_async_engines; /* number of used async engines */
unsigned int ssl_server_verify; /* default verify mode on servers side */
struct freq_ctr conn_per_sec;
struct freq_ctr sess_per_sec;
struct freq_ctr ssl_per_sec;
struct freq_ctr ssl_fe_keys_per_sec;
struct freq_ctr ssl_be_keys_per_sec;
struct freq_ctr comp_bps_in; /* bytes per second, before http compression */
struct freq_ctr comp_bps_out; /* bytes per second, after http compression */
struct freq_ctr out_32bps; /* #of 32-byte blocks emitted per second */
unsigned long long out_bytes; /* total #of bytes emitted */
int cps_lim, cps_max;
int sps_lim, sps_max;
int ssl_lim, ssl_max;
int ssl_fe_keys_max, ssl_be_keys_max;
unsigned int shctx_lookups, shctx_misses;
int comp_rate_lim; /* HTTP compression rate limit */
int maxpipes; /* max # of pipes */
int maxsock; /* max # of sockets */
int rlimit_nofile; /* default ulimit-n value : 0=unset */
int rlimit_memmax_all; /* default all-process memory limit in megs ; 0=unset */
int rlimit_memmax; /* default per-process memory limit in megs ; 0=unset */
long maxzlibmem; /* max RAM for zlib in bytes */
int mode;
unsigned int req_count; /* request counter (HTTP or TCP session) for logs and unique_id */
int last_checks;
int spread_checks;
int max_spread_checks;
int max_syslog_len;
char *chroot;
char *pidfile;
char *node, *desc; /* node name & description */
struct buffer log_tag; /* name for syslog */
struct list logsrvs;
char *log_send_hostname; /* set hostname in syslog header */
char *server_state_base; /* path to a directory where server state files can be found */
char *server_state_file; /* path to the file where server states are loaded from */
struct {
int maxpollevents; /* max number of poll events at once */
int maxaccept; /* max number of consecutive accept() */
int options; /* various tuning options */
int runqueue_depth;/* max number of tasks to run at once */
int recv_enough; /* how many input bytes at once are "enough" */
int bufsize; /* buffer size in bytes, defaults to BUFSIZE */
int maxrewrite; /* buffer max rewrite size in bytes, defaults to MAXREWRITE */
MAJOR: session: only wake up as many sessions as available buffers permit We've already experimented with three wake up algorithms when releasing buffers : the first naive one used to wake up far too many sessions, causing many of them not to get any buffer. The second approach which was still in use prior to this patch consisted in waking up either 1 or 2 sessions depending on the number of FDs we had released. And this was still inaccurate. The third one tried to cover the accuracy issues of the second and took into consideration the number of FDs the sessions would be willing to use, but most of the time we ended up waking up too many of them for nothing, or deadlocking by lack of buffers. This patch completely removes the need to allocate two buffers at once. Instead it splits allocations into critical and non-critical ones and implements a reserve in the pool for this. The deadlock situation happens when all buffers are be allocated for requests pending in a maxconn-limited server queue, because then there's no more way to allocate buffers for responses, and these responses are critical to release the servers's connection in order to release the pending requests. In fact maxconn on a server creates a dependence between sessions and particularly between oldest session's responses and latest session's requests. Thus, it is mandatory to get a free buffer for a response in order to release a server connection which will permit to release a request buffer. Since we definitely have non-symmetrical buffers, we need to implement this logic in the buffer allocation mechanism. What this commit does is implement a reserve of buffers which can only be allocated for responses and that will never be allocated for requests. This is made possible by the requester indicating how much margin it wants to leave after the allocation succeeds. Thus it is a cooperative allocation mechanism : the requester (process_session() in general) prefers not to get a buffer in order to respect other's need for response buffers. The session management code always knows if a buffer will be used for requests or responses, so that is not difficult : - either there's an applet on the initiator side and we really need the request buffer (since currently the applet is called in the context of the session) - or we have a connection and we really need the response buffer (in order to support building and sending an error message back) This reserve ensures that we don't take all allocatable buffers for requests waiting in a queue. The downside is that all the extra buffers are really allocated to ensure they can be allocated. But with small values it is not an issue. With this change, we don't observe any more deadlocks even when running with maxconn 1 on a server under severely constrained memory conditions. The code becomes a bit tricky, it relies on the scheduler's run queue to estimate how many sessions are already expected to run so that it doesn't wake up everyone with too few resources. A better solution would probably consist in having two queues, one for urgent requests and one for normal requests. A failed allocation for a session dealing with an error, a connection event, or the need for a response (or request when there's an applet on the left) would go to the urgent request queue, while other requests would go to the other queue. Urgent requests would be served from 1 entry in the pool, while the regular ones would be served only according to the reserve. Despite not yet having this, it works remarkably well. This mechanism is quite efficient, we don't perform too many wake up calls anymore. For 1 million sessions elapsed during massive memory contention, we observe about 4.5M calls to process_session() compared to 4.0M without memory constraints. Previously we used to observe up to 16M calls, which rougly means 12M failures. During a test run under high memory constraints (limit enforced to 27 MB instead of the 58 MB normally needed), performance used to drop by 53% prior to this patch. Now with this patch instead it *increases* by about 1.5%. The best effect of this change is that by limiting the memory usage to about 2/3 to 3/4 of what is needed by default, it's possible to increase performance by up to about 18% mainly due to the fact that pools are reused more often and remain hot in the CPU cache (observed on regular HTTP traffic with 20k objects, buffers.limit = maxconn/10, buffers.reserve = limit/2). Below is an example of scenario which used to cause a deadlock previously : - connection is received - two buffers are allocated in process_session() then released - one is allocated when receiving an HTTP request - the second buffer is allocated then released in process_session() for request parsing then connection establishment. - poll() says we can send, so the request buffer is sent and released - process session gets notified that the connection is now established and allocates two buffers then releases them - all other sessions do the same till one cannot get the request buffer without hitting the margin - and now the server responds. stream_interface allocates the response buffer and manages to get it since it's higher priority being for a response. - but process_session() cannot allocate the request buffer anymore => We could end up with all buffers used by responses so that none may be allocated for a request in process_session(). When the applet processing leaves the session context, the test will have to be changed so that we always allocate a response buffer regardless of the left side (eg: H2->H1 gateway). A final improvement would consists in being able to only retry the failed I/O operation without waking up a task, but to date all experiments to achieve this have proven not to be reliable enough.
2014-11-26 19:11:56 -05:00
int reserved_bufs; /* how many buffers can only be allocated for response */
int buf_limit; /* if not null, how many total buffers may only be allocated */
int client_sndbuf; /* set client sndbuf to this value if not null */
int client_rcvbuf; /* set client rcvbuf to this value if not null */
int server_sndbuf; /* set server sndbuf to this value if not null */
int server_rcvbuf; /* set server rcvbuf to this value if not null */
int chksize; /* check buffer size in bytes, defaults to BUFSIZE */
int pipesize; /* pipe size in bytes, system defaults if zero */
int max_http_hdr; /* max number of HTTP headers, use MAX_HTTP_HDR if zero */
int requri_len; /* max len of request URI, use REQURI_LEN if zero */
int cookie_len; /* max length of cookie captures */
int pattern_cache; /* max number of entries in the pattern cache. */
int sslcachesize; /* SSL cache size in session, defaults to 20000 */
int comp_maxlevel; /* max HTTP compression level */
int pool_low_ratio; /* max ratio of FDs used before we stop using new idle connections */
int pool_high_ratio; /* max ratio of FDs used before we start killing idle connections when creating new connections */
int pool_low_count; /* max number of opened fd before we stop using new idle connections */
int pool_high_count; /* max number of opened fd before we start killing idle connections when creating new connections */
unsigned short idle_timer; /* how long before an empty buffer is considered idle (ms) */
} tune;
struct {
char *prefix; /* path prefix of unix bind socket */
struct { /* UNIX socket permissions */
uid_t uid; /* -1 to leave unchanged */
gid_t gid; /* -1 to leave unchanged */
mode_t mode; /* 0 to leave unchanged */
} ux;
} unix_bind;
struct proxy *stats_fe; /* the frontend holding the stats settings */
struct vars vars; /* list of variables for the process scope. */
#ifdef USE_CPU_AFFINITY
struct {
unsigned long proc[MAX_PROCS]; /* list of CPU masks for the 32/64 first processes */
BUG/MEDIUM: threads: cpu-map designating a single thread/process are ignored Since commit 81492c989 ("MINOR: threads: flatten the per-thread cpu-map"), we don't keep the proc*thread matrix anymore to represent the full binding possibilities, but only the proc and thread ones. The problem is that the per-process binding is not the same for each thread and for the process, and the proc[] array was assumed to store the per-proc first thread value when doing this change. Worse, the logic present there tries to deal with thread ranges and process ranges in a way which automatically exclused the other possibility (since ranges cannot be used on both) but as such fails to apply changes if neither the process nor the thread is expressed as a range. The real problem comes from the fact that specifying cpu-map 1/1 doesn't yet reveal if the per-process mask or the per-thread mask needs to be updated. In practice it's the thread one but then the current storage doesn't allow to store the binding of the first thread of each other process in nbproc>1 configurations. When removing the proc*thread matrix, what ought to have been kept was both the thread column for process 1 and the process line for threads 1, but instead only the thread column was kept. This patch reintroduces the storage of the configuration for the first thread of each process so that it is again possible to store either the per-thread or per-process configuration. As a partial workaround for existing configurations, it is possible to systematically indicate at least two processes or two threads at once and map them by pairs or more so that at least two values are present in the range. E.g : # set processes 1-4 to cpus 0-3 : cpu-map auto:1-4/1 0 1 2 3 # or: cpu-map 1-2/1 0 1 cpu-map 2-3/1 2 3 # set threads 1-4 to cpus 0-3 : cpu-map auto:1/1-4 0 1 2 3 # or : cpu-map 1/1-2 0 1 cpu-map 3/3-4 2 3 This fix must be backported to 2.0.
2019-07-16 09:10:34 -04:00
unsigned long proc_t1[MAX_PROCS]; /* list of CPU masks for the 1st thread of each process */
unsigned long thread[MAX_THREADS]; /* list of CPU masks for the 32/64 first threads of the 1st process */
} cpu_map;
#endif
};
/* options for mworker_proc */
#define PROC_O_TYPE_MASTER 0x00000001
#define PROC_O_TYPE_WORKER 0x00000002
#define PROC_O_TYPE_PROG 0x00000004
/* 0x00000008 unused */
#define PROC_O_LEAVING 0x00000010 /* this process should be leaving */
/* 0x00000020 to 0x00000080 unused */
#define PROC_O_START_RELOAD 0x00000100 /* Start the process even if the master was re-executed */
/*
* Structure used to describe the processes in master worker mode
*/
struct mworker_proc {
int pid;
int options;
char *id;
char **command;
char *path;
char *version;
int ipc_fd[2]; /* 0 is master side, 1 is worker side */
int relative_pid;
int reloads;
int timestamp;
struct server *srv; /* the server entry in the master proxy */
struct list list;
int uid;
int gid;
};
extern struct global global;
extern int pid; /* current process id */
extern int relative_pid; /* process id starting at 1 */
extern unsigned long pid_bit; /* bit corresponding to the process id */
extern unsigned long all_proc_mask; /* mask of all processes */
extern int actconn; /* # of active sessions */
extern int listeners;
extern int jobs; /* # of active jobs (listeners, sessions, open devices) */
extern int unstoppable_jobs; /* # of active jobs that can't be stopped during a soft stop */
extern int active_peers; /* # of active peers (connection attempts and successes) */
extern int connected_peers; /* # of really connected peers */
extern THREAD_LOCAL struct buffer trash;
extern int nb_oldpids; /* contains the number of old pids found */
extern const int zero;
extern const int one;
extern const struct linger nolinger;
extern int stopping; /* non zero means stopping in progress */
extern int killed; /* >0 means a hard-stop is triggered, >1 means hard-stop immediately */
extern char hostname[MAX_HOSTNAME_LEN];
extern char localpeer[MAX_HOSTNAME_LEN];
extern unsigned int warned; /* bitfield of a few warnings to emit just once */
extern volatile unsigned long sleeping_thread_mask;
extern struct list proc_list; /* list of process in mworker mode */
extern struct mworker_proc *proc_self; /* process structure of current process */
extern int master; /* 1 if in master, 0 otherwise */
extern unsigned int rlim_fd_cur_at_boot;
extern unsigned int rlim_fd_max_at_boot;
extern int atexit_flag;
/* bit values to go with "warned" above */
/* unassigned : 0x00000001 (previously: WARN_BLOCK_DEPRECATED) */
/* unassigned : 0x00000002 */
/* unassigned : 0x00000004 (previously: WARN_REDISPATCH_DEPRECATED) */
/* unassigned : 0x00000008 (previously: WARN_CLITO_DEPRECATED) */
/* unassigned : 0x00000010 (previously: WARN_SRVTO_DEPRECATED) */
/* unassigned : 0x00000020 (previously: WARN_CONTO_DEPRECATED) */
#define WARN_FORCECLOSE_DEPRECATED 0x00000040
/* to be used with warned and WARN_* */
static inline int already_warned(unsigned int warning)
{
if (warned & warning)
return 1;
warned |= warning;
return 0;
}
/* returns a mask if set, otherwise all_proc_mask */
static inline unsigned long proc_mask(unsigned long mask)
{
return mask ? mask : all_proc_mask;
}
/* returns a mask if set, otherwise all_threads_mask */
static inline unsigned long thread_mask(unsigned long mask)
{
return mask ? mask : all_threads_mask;
}
int tell_old_pids(int sig);
int delete_oldpid(int pid);
void deinit(void);
void hap_register_build_opts(const char *str, int must_free);
void hap_register_post_check(int (*fct)());
void hap_register_post_proxy_check(int (*fct)(struct proxy *));
void hap_register_post_server_check(int (*fct)(struct server *));
void hap_register_post_deinit(void (*fct)());
void hap_register_proxy_deinit(void (*fct)(struct proxy *));
void hap_register_server_deinit(void (*fct)(struct server *));
void hap_register_per_thread_alloc(int (*fct)());
void hap_register_per_thread_init(int (*fct)());
void hap_register_per_thread_deinit(void (*fct)());
void hap_register_per_thread_free(int (*fct)());
void mworker_accept_wrapper(int fd);
void mworker_reload();
/* simplified way to declare static build options in a file */
#define REGISTER_BUILD_OPTS(str) \
INITCALL2(STG_REGISTER, hap_register_build_opts, (str), 0)
/* simplified way to declare a post-check callback in a file */
#define REGISTER_POST_CHECK(fct) \
INITCALL1(STG_REGISTER, hap_register_post_check, (fct))
/* simplified way to declare a post-proxy-check callback in a file */
#define REGISTER_POST_PROXY_CHECK(fct) \
INITCALL1(STG_REGISTER, hap_register_post_proxy_check, (fct))
/* simplified way to declare a post-server-check callback in a file */
#define REGISTER_POST_SERVER_CHECK(fct) \
INITCALL1(STG_REGISTER, hap_register_post_server_check, (fct))
/* simplified way to declare a post-deinit callback in a file */
#define REGISTER_POST_DEINIT(fct) \
INITCALL1(STG_REGISTER, hap_register_post_deinit, (fct))
/* simplified way to declare a proxy-deinit callback in a file */
#define REGISTER_PROXY_DEINIT(fct) \
INITCALL1(STG_REGISTER, hap_register_proxy_deinit, (fct))
/* simplified way to declare a proxy-deinit callback in a file */
#define REGISTER_SERVER_DEINIT(fct) \
INITCALL1(STG_REGISTER, hap_register_server_deinit, (fct))
/* simplified way to declare a per-thread allocation callback in a file */
#define REGISTER_PER_THREAD_ALLOC(fct) \
INITCALL1(STG_REGISTER, hap_register_per_thread_alloc, (fct))
/* simplified way to declare a per-thread init callback in a file */
#define REGISTER_PER_THREAD_INIT(fct) \
INITCALL1(STG_REGISTER, hap_register_per_thread_init, (fct))
/* simplified way to declare a per-thread deinit callback in a file */
#define REGISTER_PER_THREAD_DEINIT(fct) \
INITCALL1(STG_REGISTER, hap_register_per_thread_deinit, (fct))
/* simplified way to declare a per-thread free callback in a file */
#define REGISTER_PER_THREAD_FREE(fct) \
INITCALL1(STG_REGISTER, hap_register_per_thread_free, (fct))
#endif /* _TYPES_GLOBAL_H */
/*
* Local variables:
* c-indent-level: 8
* c-basic-offset: 8
* End:
*/