haproxy/src/haproxy.c

3300 lines
93 KiB
C
Raw Normal View History

/*
* HA-Proxy : High Availability-enabled HTTP/TCP proxy
[RELEASE] Released version 2.0-dev1 Released version 2.0-dev1 with the following main changes : - MINOR: mux-h2: only increase the connection window with the first update - REGTESTS: remove the expected window updates from H2 handshakes - BUG/MINOR: mux-h2: make empty HEADERS frame return a connection error - BUG/MEDIUM: mux-h2: mark that we have too many CS once we have more than the max - MEDIUM: mux-h2: remove padlen during headers phase - MINOR: h2: add a bit-based frame type representation - MINOR: mux-h2: remove useless check for empty frame length in h2s_decode_headers() - MEDIUM: mux-h2: decode HEADERS frames before allocating the stream - MINOR: mux-h2: make h2c_send_rst_stream() use the dummy stream's error code - MINOR: mux-h2: add a new dummy stream for the REFUSED_STREAM error code - MINOR: mux-h2: fail stream creation more cleanly using RST_STREAM - MINOR: buffers: add a new b_move() function - MINOR: mux-h2: make h2_peek_frame_hdr() support an offset - MEDIUM: mux-h2: handle decoding of CONTINUATION frames - CLEANUP: mux-h2: remove misleading comments about CONTINUATION - BUG/MEDIUM: servers: Don't try to reuse connection if we switched server. - BUG/MEDIUM: tasks: Decrement tasks_run_queue in tasklet_free(). - BUG/MINOR: htx: send the proper authenticate header when using http-request auth - BUG/MEDIUM: mux_h2: Don't add to the idle list if we're full. - BUG/MEDIUM: servers: Fail if we fail to allocate a conn_stream. - BUG/MAJOR: servers: Use the list api correctly to avoid crashes. - BUG/MAJOR: servers: Correctly use LIST_ELEM(). - BUG/MAJOR: sessions: Use an unlimited number of servers for the conn list. - BUG/MEDIUM: servers: Flag the stream_interface on handshake error. - MEDIUM: servers: Be smarter when switching connections. - MEDIUM: sessions: Keep track of which connections are idle. - MINOR: payload: add sample fetch for TLS ALPN - BUG/MEDIUM: log: don't mark log FDs as non-blocking on terminals - MINOR: channel: Add the function channel_add_input - MINOR: stats/htx: Call channel_add_input instead of updating channel state by hand - BUG/MEDIUM: cache: Be sure to end the forwarding when XFER length is unknown - BUG/MAJOR: htx: Return the good block address after a defrag - MINOR: lb: allow redispatch when using consistent hash - CLEANUP: mux-h2: fix end-of-stream flag name when processing headers - BUG/MEDIUM: mux-h2: always restart reading if data are available - BUG/MINOR: mux-h2: set the stream-full flag when leaving h2c_decode_headers() - BUG/MINOR: mux-h2: don't check the CS count in h2c_bck_handle_headers() - BUG/MINOR: mux-h2: mark end-of-stream after processing response HEADERS, not before - BUG/MINOR: mux-h2: only update rxbuf's length for H1 headers - BUG/MEDIUM: mux-h1: use per-direction flags to indicate transitions - BUG/MEDIUM: mux-h1: make HTX chunking consistent with H2 - BUG/MAJOR: stream-int: Update the stream expiration date in stream_int_notify() - BUG/MEDIUM: proto-htx: Set SI_FL_NOHALF on server side when request is done - BUG/MEDIUM: mux-h1: Add a task to handle connection timeouts - MINOR: mux-h2: make h2c_decode_headers() return a status, not a count - MINOR: mux-h2: add a new dummy stream : h2_error_stream - MEDIUM: mux-h2: make h2c_decode_headers() support recoverable errors - BUG/MINOR: mux-h2: detect when the HTX EOM block cannot be added after headers - MINOR: mux-h2: remove a misleading and impossible test - CLEANUP: mux-h2: clean the stream error path on HEADERS frame processing - MINOR: mux-h2: check for too many streams only for idle streams - MINOR: mux-h2: set H2_SF_HEADERS_RCVD when a HEADERS frame was decoded - BUG/MEDIUM: mux-h2: decode trailers in HEADERS frames - MINOR: h2: add h2_make_h1_trailers to turn H2 headers to H1 trailers - MEDIUM: mux-h2: pass trailers to H1 (legacy mode) - MINOR: htx: add a new function to add a block without filling it - MINOR: h2: add h2_make_htx_trailers to turn H2 headers to HTX trailers - MEDIUM: mux-h2: pass trailers to HTX - MINOR: mux-h1: parse the content-length header on output and set H1_MF_CLEN - BUG/MEDIUM: mux-h1: don't enforce chunked encoding on requests - MINOR: mux-h2: make HTX_BLK_EOM processing idempotent - MINOR: h1: make the H1 headers block parser able to parse headers only - MEDIUM: mux-h2: emit HEADERS frames when facing HTX trailers blocks - MINOR: stream/htx: Add info about the HTX structs in "show sess all" command - MINOR: stream: Add the subscription events of SIs in "show sess all" command - MINOR: mux-h1: Add the subscription events in "show fd" command - BUG/MEDIUM: h1: Get the h1m state when restarting the headers parsing - BUG/MINOR: cache/htx: Be sure to count partial trailers - BUG/MEDIUM: h1: In h1_init(), wake the tasklet instead of calling h1_recv(). - BUG/MEDIUM: server: Defer the mux init until after xprt has been initialized. - MINOR: connections: Remove a stall comment. - BUG/MEDIUM: cli: make "show sess" really thread-safe - BUILD: add a new file "version.c" to carry version updates - MINOR: stream/htx: add the HTX flags output in "show sess all" - MINOR: stream/cli: fix the location of the waiting flag in "show sess all" - MINOR: stream/cli: report more info about the HTTP messages on "show sess all" - BUG/MINOR: lua: bad args are returned for Lua actions - BUG/MEDIUM: lua: dead lock when Lua tasks are trigerred - MINOR: htx: Add an helper function to get the max space usable for a block - MINOR: channel/htx: Add HTX version for some helper functions - BUG/MEDIUM: cache/htx: Respect the reserve when cached objects are served - BUG/MINOR: stats/htx: Respect the reserve when the stats page is dumped - DOC: regtest: make it clearer what the purpose of the "broken" series is - REGTEST: mailers: add new test for 'mailers' section - REGTEST: Add a reg test for health-checks over SSL/TLS. - BUG/MINOR: mux-h1: Close connection on shutr only when shutw was really done - MEDIUM: mux-h1: Clarify how shutr/shutw are handled - BUG/MINOR: compression: Disable it if another one is already in progress - BUG/MINOR: filters: Detect cache+compression config on legacy HTTP streams - BUG/MINOR: cache: Disable the cache if any compression filter precedes it - REGTEST: Add some informatoin to test results. - MINOR: htx: Add a function to truncate all blocks after a specific offset - MINOR: channel/htx: Add the HTX version of channel_truncate/erase - BUG/MINOR: proto_htx: Use HTX versions to truncate or erase a buffer - BUG/CRITICAL: mux-h2: re-check the frame length when PRIORITY is used - DOC: Fix typo in req.ssl_alpn example (commit 4afdd138424ab...) - DOC: http-request cache-use / http-response cache-store expects cache name - REGTEST: "capture (request|response)" regtest. - BUG/MINOR: lua/htx: Respect the reserve when data are send from an HTX applet - REGTEST: filters: add compression test - BUG/MEDIUM: init: Initialize idle_orphan_conns for first server in server-template - BUG/MEDIUM: ssl: Disable anti-replay protection and set max data with 0RTT. - DOC: Be a bit more explicit about allow-0rtt security implications. - MINOR: mux-h1: make the mux_h1_ops struct static - BUILD: makefile: add an EXTRA_OBJS variable to help build optional code - BUG/MEDIUM: connection: properly unregister the mux on failed initialization - BUG/MAJOR: cache: fix confusion between zero and uninitialized cache key - REGTESTS: test case for map_regm commit 271022150d - REGTESTS: Basic tests for concat,strcmp,word,field,ipmask converters - REGTESTS: Basic tests for using maps to redirect requests / select backend - DOC: REGTESTS README varnishtest -Dno-htx= define. - MINOR: spoe: Make the SPOE filter compatible with HTX proxies - MINOR: checks: Store the proxy in checks. - BUG/MEDIUM: checks: Avoid having an associated server for email checks. - REGTEST: Switch to vtest. - REGTEST: Adapt reg test doc files to vtest. - BUG/MEDIUM: h1: Make sure we destroy an inactive connectin that did shutw. - BUG/MINOR: base64: dec func ignores padding for output size checking - BUG/MEDIUM: ssl: missing allocation failure checks loading tls key file - MINOR: ssl: add support of aes256 bits ticket keys on file and cli. - BUG/MINOR: backend: don't use url_param_name as a hint for BE_LB_ALGO_PH - BUG/MINOR: backend: balance uri specific options were lost across defaults - BUG/MINOR: backend: BE_LB_LKUP_CHTREE is a value, not a bit - MINOR: backend: move url_param_name/len to lbprm.arg_str/len - MINOR: backend: make headers and RDP cookie also use arg_str/len - MINOR: backend: add new fields in lbprm to store more LB options - MINOR: backend: make the header hash use arg_opt1 for use_domain_only - MINOR: backend: remap the balance uri settings to lbprm.arg_opt{1,2,3} - MINOR: backend: move hash_balance_factor out of chash - MEDIUM: backend: move all LB algo parameters into an union - MINOR: backend: make the random algorithm support a number of draws - BUILD/MEDIUM: da: Necessary code changes for new buffer API. - BUG/MINOR: stick_table: Prevent conn_cur from underflowing - BUG: 51d: Changes to the buffer API in 1.9 were not applied to the 51Degrees code. - BUG/MEDIUM: stats: Get the right scope pointer depending on HTX is used or not - DOC: add a missing space in the documentation for bc_http_major - REGTEST: checks basic stats webpage functionality - BUG/MEDIUM: servers: Make assign_tproxy_address work when ALPN is set. - BUG/MEDIUM: connections: Add the CO_FL_CONNECTED flag if a send succeeded. - DOC: add github issue templates - MINOR: cfgparse: Extract some code to be re-used. - CLEANUP: cfgparse: Return asap from cfg_parse_peers(). - CLEANUP: cfgparse: Code reindentation. - MINOR: cfgparse: Useless frontend initialization in "peers" sections. - MINOR: cfgparse: Rework peers frontend init. - MINOR: cfgparse: Simplication. - MINOR: cfgparse: Make "peer" lines be parsed as "server" lines. - MINOR: peers: Make outgoing connection to SSL/TLS peers work. - MINOR: cfgparse: SSL/TLS binding in "peers" sections. - DOC: peers: SSL/TLS documentation for "peers" - BUG/MINOR: startup: certain goto paths in init_pollers fail to free - BUG/MEDIUM: checks: fix recent regression on agent-check making it crash - BUG/MINOR: server: don't always trust srv_check_health when loading a server state - BUG/MINOR: check: Wake the check task if the check is finished in wake_srv_chk() - BUG/MEDIUM: ssl: Fix handling of TLS 1.3 KeyUpdate messages - DOC: mention the effect of nf_conntrack_tcp_loose on src/dst - BUG/MINOR: proto-htx: Return an error if all headers cannot be received at once - BUG/MEDIUM: mux-h2/htx: Respect the channel's reserve - BUG/MINOR: mux-h1: Apply the reserve on the channel's buffer only - BUG/MINOR: mux-h1: avoid copying output over itself in zero-copy - BUG/MAJOR: mux-h2: don't destroy the stream on failed allocation in h2_snd_buf() - BUG/MEDIUM: backend: also remove from idle list muxes that have no more room - BUG/MEDIUM: mux-h2: properly abort on trailers decoding errors - MINOR: h2: declare new sets of frame types - BUG/MINOR: mux-h2: CONTINUATION in closed state must always return GOAWAY - BUG/MINOR: mux-h2: headers-type frames in HREM are always a connection error - BUG/MINOR: mux-h2: make it possible to set the error code on an already closed stream - BUG/MINOR: hpack: return a compression error on invalid table size updates - MINOR: server: make sure pool-max-conn is >= -1 - BUG/MINOR: stream: take care of synchronous errors when trying to send - CLEANUP: server: fix indentation mess on idle connections - BUG/MINOR: mux-h2: always check the stream ID limit in h2_avail_streams() - BUG/MINOR: mux-h2: refuse to allocate a stream with too high an ID - BUG/MEDIUM: backend: never try to attach to a mux having no more stream available - MINOR: server: add a max-reuse parameter - MINOR: mux-h2: always consider a server's max-reuse parameter - MEDIUM: stream-int: always mark pending outgoing SI_ST_CON - MINOR: stream: don't wait before retrying after a failed connection reuse - MEDIUM: h2: always parse and deduplicate the content-length header - BUG/MINOR: mux-h2: always compare content-length to the sum of DATA frames - CLEANUP: h2: Remove debug printf in mux_h2.c - MINOR: cfgparse: make the process/thread parser support a maximum value - MINOR: threads: make MAX_THREADS configurable at build time - DOC: nbthread is no longer experimental. - BUG/MINOR: listener: always fill the source address for accepted socketpairs - BUG/MINOR: mux-h2: do not report available outgoing streams after GOAWAY - BUG/MINOR: spoe: corrected fragmentation string size - BUG/MINOR: task: fix possibly missed event in inter-thread wakeups - BUG/MEDIUM: servers: Attempt to reuse an unfinished connection on retry. - BUG/MEDIUM: backend: always call si_detach_endpoint() on async connection failure - SCRIPTS: add the issue tracker URL to the announce script - MINOR: peers: Extract some code to be reused. - CLEANUP: peers: Indentation fixes. - MINOR: peers: send code factorization. - MINOR: peers: Add new functions to send code and reduce the I/O handler. - MEDIUM: peers: synchronizaiton code factorization to reduce the size of the I/O handler. - MINOR: peers: Move update receive code to reduce the size of the I/O handler. - MINOR: peers: Move ack, switch and definition receive code to reduce the size of the I/O handler. - MINOR: peers: Move high level receive code to reduce the size of I/O handler. - CLEANUP: peers: Be more generic. - MINOR: peers: move error handling to reduce the size of the I/O handler. - MINOR: peers: move messages treatment code to reduce the size of the I/O handler. - MINOR: peers: move send code to reduce the size of the I/O handler. - CLEANUP: peers: Remove useless statements. - MINOR: peers: move "hello" message treatment code to reduce the size of the I/O handler. - MINOR: peers: move peer initializations code to reduce the size of the I/O handler. - CLEANUP: peers: factor the error handling code in peer_treet_updatemsg() - CLEANUP: peers: factor error handling in peer_treat_definedmsg() - BUILD/MINOR: peers: shut up a build warning introduced during last cleanup - BUG/MEDIUM: mux-h2: only close connection on request frames on closed streams - CLEANUP: mux-h2: remove two useless but misleading assignments - BUG/MEDIUM: checks: Check that conn_install_mux succeeded. - BUG/MEDIUM: servers: Only destroy a conn_stream we just allocated. - BUG/MEDIUM: servers: Don't add an incomplete conn to the server idle list. - BUG/MEDIUM: checks: Don't try to set ALPN if connection failed. - BUG/MEDIUM: h2: In h2_send(), stop the loop if we failed to alloc a buf. - BUG/MEDIUM: peers: Handle mux creation failure. - BUG/MEDIUM: servers: Close the connection if we failed to install the mux. - BUG/MEDIUM: compression: Rewrite strong ETags - BUG/MINOR: deinit: tcp_rep.inspect_rules not deinit, add to deinit - CLEANUP: mux-h2: remove misleading leftover test on h2s' nullity - BUG/MEDIUM: mux-h2: wake up flow-controlled streams on initial window update - BUG/MEDIUM: mux-h2: fix two half-closed to closed transitions - BUG/MEDIUM: mux-h2: make sure never to send GOAWAY on too old streams - BUG/MEDIUM: mux-h2: do not abort HEADERS frame before decoding them - BUG/MINOR: mux-h2: make sure response HEADERS are not received in other states than OPEN and HLOC - MINOR: h2: add a generic frame checker - MEDIUM: mux-h2: check the frame validity before considering the stream state - CLEANUP: mux-h2: remove stream ID and frame length checks from the frame parsers - BUG/MINOR: mux-h2: make sure request trailers on aborted streams don't break the connection - DOC: compression: Update the reasons for disabled compression - BUG/MEDIUM: buffer: Make sure b_is_null handles buffers waiting for allocation. - DOC: htx: make it clear that htxbuf() and htx_from_buf() always return valid pointers - MINOR: htx: never check for null htx pointer in htx_is_{,not_}empty() - MINOR: mux-h2: consistently rely on the htx variable to detect the mode - BUG/MEDIUM: peers: Peer addresses parsing broken. - BUG/MEDIUM: mux-h1: Don't add "transfer-encoding" if message-body is forbidden - BUG/MEDIUM: connections: Don't forget to remove CO_FL_SESS_IDLE. - BUG/MINOR: stream: don't close the front connection when facing a backend error - BUG/MEDIUM: mux-h2: wait for the mux buffer to be empty before closing the connection - MINOR: stream-int: add a new flag to mention that we want the connection to be killed - MINOR: connstream: have a new flag CS_FL_KILL_CONN to kill a connection - BUG/MEDIUM: mux-h2: do not close the connection on aborted streams - BUG/MINOR: server: fix logic flaw in idle connection list management - MINOR: mux-h2: max-concurrent-streams should be unsigned - MINOR: mux-h2: make sure to only check concurrency limit on the frontend - MINOR: mux-h2: learn and store the peer's advertised MAX_CONCURRENT_STREAMS setting - BUG/MEDIUM: mux-h2: properly consider the peer's advertised max-concurrent-streams - MINOR: xref: Add missing barriers. - MINOR: muxes: Don't bother to LIST_DEL(&conn->list) before calling conn_free(). - MINOR: debug: Add an option that causes random allocation failures. - BUG/MEDIUM: backend: always release the previous connection into its own target srv_list - BUG/MEDIUM: htx: check the HTX compatibility in dynamic use-backend rules - BUG/MINOR: tune.fail-alloc: Don't forget to initialize ret. - BUG/MINOR: backend: check srv_conn before dereferencing it - BUG/MEDIUM: mux-h2: always omit :scheme and :path for the CONNECT method - BUG/MEDIUM: mux-h2: always set :authority on request output - BUG/MEDIUM: stream: Don't forget to free s->unique_id in stream_free(). - BUG/MINOR: threads: fix the process range of thread masks - BUG/MINOR: config: fix bind line thread mask validation - CLEANUP: threads: fix misleading comment about all_threads_mask - CLEANUP: threads: use nbits to calculate the thread mask - OPTIM: listener: optimize cache-line packing for struct listener - MINOR: tools: improve the popcount() operation - MINOR: config: keep an all_proc_mask like we have all_threads_mask - MINOR: global: add proc_mask() and thread_mask() - MINOR: config: simplify bind_proc processing using proc_mask() - MINOR: threads: make use of thread_mask() to simplify some thread calculations - BUG/MINOR: compression: properly report compression stats in HTX mode - BUG/MINOR: task: close a tiny race in the inter-thread wakeup - BUG/MAJOR: config: verify that targets of track-sc and stick rules are present - BUG/MAJOR: spoe: verify that backends used by SPOE cover all their callers' processes - BUG/MAJOR: htx/backend: Make all tests on HTTP messages compatible with HTX - BUG/MINOR: config: make sure to count the error on incorrect track-sc/stick rules - DOC: ssl: Clarify when pre TLSv1.3 cipher can be used - DOC: ssl: Stop documenting ciphers example to use - BUG/MINOR: spoe: do not assume agent->rt is valid on exit - BUG/MINOR: lua: initialize the correct idle conn lists for the SSL sockets - BUG/MEDIUM: spoe: initialization depending on nbthread must be done last - BUG/MEDIUM: server: initialize the idle conns list after parsing the config - BUG/MEDIUM: server: initialize the orphaned conns lists and tasks at the end - MINOR: config: make MAX_PROCS configurable at build time - BUG/MAJOR: spoe: Don't try to get agent config during SPOP healthcheck - BUG/MINOR: config: Reinforce validity check when a process number is parsed - BUG/MEDIUM: peers: check that p->srv actually exists before using p->srv->use_ssl - CONTRIB: contrib/prometheus-exporter: Add a Prometheus exporter for HAProxy - BUG/MINOR: mux-h1: verify the request's version before dropping connection: keep-alive - BUG: 51d: In Hash Trie, multi header matching was affected by the header names stored globaly. - MEDIUM: 51d: Enabled multi threaded operation in the 51Degrees module. - BUG/MAJOR: stream: avoid double free on unique_id - BUILD/MINOR: stream: avoid a build warning with threads disabled - BUILD/MINOR: tools: fix build warning in the date conversion functions - BUILD/MINOR: peers: remove an impossible null test in intencode() - BUILD/MINOR: htx: fix some potential null-deref warnings with http_find_stline - BUG/MEDIUM: peers: Missing peer initializations. - BUG/MEDIUM: http_fetch: fix the "base" and "base32" fetch methods in HTX mode - BUG/MEDIUM: proto_htx: Fix data size update if end of the cookie is removed - BUG/MEDIUM: http_fetch: fix "req.body_len" and "req.body_size" fetch methods in HTX mode - BUILD/MEDIUM: initcall: Fix build on MacOS. - BUG/MEDIUM: mux-h2/htx: Always set CS flags before exiting h2_rcv_buf() - MINOR: h2/htx: Set the flag HTX_SL_F_BODYLESS for messages without body - BUG/MINOR: mux-h1: Add "transfer-encoding" header on outgoing requests if needed - BUG/MINOR: mux-h2: Don't add ":status" pseudo-header on trailers - BUG/MINOR: proto-htx: Consider a XFER_LEN message as chunked by default - BUG/MEDIUM: h2/htx: Correctly handle interim responses when HTX is enabled - MINOR: mux-h2: Set HTX extra value when possible - BUG/MEDIUM: htx: count the amount of copied data towards the final count - MINOR: mux-h2: make the H2 MAX_FRAME_SIZE setting configurable - BUG/MEDIUM: mux-h2/htx: send an empty DATA frame on empty HTX trailers - BUG/MEDIUM: servers: Use atomic operations when handling curr_idle_conns. - BUG/MEDIUM: servers: Add a per-thread counter of idle connections. - MINOR: fd: add a new my_closefrom() function to close all FDs - MINOR: checks: use my_closefrom() to close all FDs - MINOR: fd: implement an optimised my_closefrom() function - BUG/MINOR: fd: make sure my_closefrom() doesn't miss some FDs - BUG/MAJOR: fd/threads, task/threads: ensure all spin locks are unlocked - BUG/MAJOR: listener: Make sure the listener exist before using it. - MINOR: fd: Use closefrom() as my_closefrom() if supported. - BUG/MEDIUM: mux-h1: Report the right amount of data xferred in h1_rcv_buf() - BUG/MINOR: channel: Set CF_WROTE_DATA when outgoing data are skipped - MINOR: htx: Add function to drain data from an HTX message - MINOR: channel/htx: Add function to skips output bytes from an HTX channel - BUG/MAJOR: cache/htx: Set the start-line offset when a cached object is served - BUG/MEDIUM: cache: Get objects from the cache only for GET and HEAD requests - BUG/MINOR: cache/htx: Return only the headers of cached objects to HEAD requests - BUG/MINOR: mux-h1: Always initilize h1m variable in h1_process_input() - BUG/MEDIUM: proto_htx: Fix functions applying regex filters on HTX messages - BUG/MEDIUM: h2: advertise to servers that we don't support push - MINOR: standard: Add a function to parse uints (dotted notation). - MINOR: arg: Add support for ARGT_PBUF_FNUM arg type. - MINOR: http_fetch: add "req.ungrpc" sample fetch for gRPC. - MINOR: sample: Add two sample converters for protocol buffers. - DOC: sample: Add gRPC related documentation.
2019-02-26 10:43:49 -05:00
* Copyright 2000-2019 Willy Tarreau <willy@haproxy.org>.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Please refer to RFC7230 - RFC7235 informations about HTTP protocol, and
* RFC6265 for informations about cookies usage. More generally, the IETF HTTP
* Working Group's web site should be consulted for protocol related changes :
*
* http://ftp.ics.uci.edu/pub/ietf/http/
*
* Pending bugs (may be not fixed because never reproduced) :
* - solaris only : sometimes, an HTTP proxy with only a dispatch address causes
* the proxy to terminate (no core) if the client breaks the connection during
* the response. Seen on 1.1.8pre4, but never reproduced. May not be related to
* the snprintf() bug since requests were simple (GET / HTTP/1.0), but may be
* related to missing setsid() (fixed in 1.1.15)
* - a proxy with an invalid config will prevent the startup even if disabled.
*
* ChangeLog has moved to the CHANGELOG file.
*
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include <dirent.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/tcp.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <netdb.h>
#include <fcntl.h>
#include <errno.h>
#include <signal.h>
#include <stdarg.h>
#include <sys/resource.h>
#include <sys/wait.h>
#include <time.h>
#include <syslog.h>
#include <grp.h>
#ifdef USE_CPU_AFFINITY
#include <sched.h>
#if defined(__FreeBSD__) || defined(__DragonFly__)
#include <sys/param.h>
#ifdef __FreeBSD__
#include <sys/cpuset.h>
#endif
#include <pthread_np.h>
#endif
#endif
#if defined(USE_PRCTL)
#include <sys/prctl.h>
#endif
#ifdef DEBUG_FULL
#include <assert.h>
#endif
MEDIUM: mworker: Add systemd `Type=notify` support This patch adds support for `Type=notify` to the systemd unit. Supporting `Type=notify` improves both starting as well as reloading of the unit, because systemd will be let known when the action completed. See this quote from `systemd.service(5)`: > Note however that reloading a daemon by sending a signal (as with the > example line above) is usually not a good choice, because this is an > asynchronous operation and hence not suitable to order reloads of > multiple services against each other. It is strongly recommended to > set ExecReload= to a command that not only triggers a configuration > reload of the daemon, but also synchronously waits for it to complete. By making systemd aware of a reload in progress it is able to wait until the reload actually succeeded. This patch introduces both a new `USE_SYSTEMD` build option which controls including the sd-daemon library as well as a `-Ws` runtime option which runs haproxy in master-worker mode with systemd support. When haproxy is running in master-worker mode with systemd support it will send status messages to systemd using `sd_notify(3)` in the following cases: - The master process forked off the worker processes (READY=1) - The master process entered the `mworker_reload()` function (RELOADING=1) - The master process received the SIGUSR1 or SIGTERM signal (STOPPING=1) Change the unit file to specify `Type=notify` and replace master-worker mode (`-W`) with master-worker mode with systemd support (`-Ws`). Future evolutions of this feature could include making use of the `STATUS` feature of `sd_notify()` to send information about the number of active connections to systemd. This would require bidirectional communication between the master and the workers and thus is left for future work.
2017-11-20 09:58:35 -05:00
#if defined(USE_SYSTEMD)
#include <systemd/sd-daemon.h>
#endif
#include <common/base64.h>
#include <common/cfgparse.h>
#include <common/chunk.h>
#include <common/compat.h>
#include <common/config.h>
#include <common/defaults.h>
#include <common/errors.h>
#include <common/initcall.h>
#include <common/memory.h>
#include <common/mini-clist.h>
MAJOR: namespace: add Linux network namespace support This patch makes it possible to create binds and servers in separate namespaces. This can be used to proxy between multiple completely independent virtual networks (with possibly overlapping IP addresses) and a non-namespace-aware proxy implementation that supports the proxy protocol (v2). The setup is something like this: net1 on VLAN 1 (namespace 1) -\ net2 on VLAN 2 (namespace 2) -- haproxy ==== proxy (namespace 0) net3 on VLAN 3 (namespace 3) -/ The proxy is configured to make server connections through haproxy and sending the expected source/target addresses to haproxy using the proxy protocol. The network namespace setup on the haproxy node is something like this: = 8< = $ cat setup.sh ip netns add 1 ip link add link eth1 type vlan id 1 ip link set eth1.1 netns 1 ip netns exec 1 ip addr add 192.168.91.2/24 dev eth1.1 ip netns exec 1 ip link set eth1.$id up ... = 8< = = 8< = $ cat haproxy.cfg frontend clients bind 127.0.0.1:50022 namespace 1 transparent default_backend scb backend server mode tcp server server1 192.168.122.4:2222 namespace 2 send-proxy-v2 = 8< = A bind line creates the listener in the specified namespace, and connections originating from that listener also have their network namespace set to that of the listener. A server line either forces the connection to be made in a specified namespace or may use the namespace from the client-side connection if that was set. For more documentation please read the documentation included in the patch itself. Signed-off-by: KOVACS Tamas <ktamas@balabit.com> Signed-off-by: Sarkozi Laszlo <laszlo.sarkozi@balabit.com> Signed-off-by: KOVACS Krisztian <hidden@balabit.com>
2014-11-17 09:11:45 -05:00
#include <common/namespace.h>
#include <common/openssl-compat.h>
#include <common/regex.h>
#include <common/standard.h>
#include <common/time.h>
#include <common/uri_auth.h>
#include <common/version.h>
#include <common/hathreads.h>
#include <types/capture.h>
#include <types/cli.h>
MAJOR: filters: Add filters support This patch adds the support of filters in HAProxy. The main idea is to have a way to "easely" extend HAProxy by adding some "modules", called filters, that will be able to change HAProxy behavior in a programmatic way. To do so, many entry points has been added in code to let filters to hook up to different steps of the processing. A filter must define a flt_ops sutrctures (see include/types/filters.h for details). This structure contains all available callbacks that a filter can define: struct flt_ops { /* * Callbacks to manage the filter lifecycle */ int (*init) (struct proxy *p); void (*deinit)(struct proxy *p); int (*check) (struct proxy *p); /* * Stream callbacks */ void (*stream_start) (struct stream *s); void (*stream_accept) (struct stream *s); void (*session_establish)(struct stream *s); void (*stream_stop) (struct stream *s); /* * HTTP callbacks */ int (*http_start) (struct stream *s, struct http_msg *msg); int (*http_start_body) (struct stream *s, struct http_msg *msg); int (*http_start_chunk) (struct stream *s, struct http_msg *msg); int (*http_data) (struct stream *s, struct http_msg *msg); int (*http_last_chunk) (struct stream *s, struct http_msg *msg); int (*http_end_chunk) (struct stream *s, struct http_msg *msg); int (*http_chunk_trailers)(struct stream *s, struct http_msg *msg); int (*http_end_body) (struct stream *s, struct http_msg *msg); void (*http_end) (struct stream *s, struct http_msg *msg); void (*http_reset) (struct stream *s, struct http_msg *msg); int (*http_pre_process) (struct stream *s, struct http_msg *msg); int (*http_post_process) (struct stream *s, struct http_msg *msg); void (*http_reply) (struct stream *s, short status, const struct chunk *msg); }; To declare and use a filter, in the configuration, the "filter" keyword must be used in a listener/frontend section: frontend test ... filter <FILTER-NAME> [OPTIONS...] The filter referenced by the <FILTER-NAME> must declare a configuration parser on its own name to fill flt_ops and filter_conf field in the proxy's structure. An exemple will be provided later to make it perfectly clear. For now, filters cannot be used in backend section. But this is only a matter of time. Documentation will also be added later. This is the first commit of a long list about filters. It is possible to have several filters on the same listener/frontend. These filters are stored in an array of at most MAX_FILTERS elements (define in include/types/filters.h). Again, this will be replaced later by a list of filters. The filter API has been highly refactored. Main changes are: * Now, HA supports an infinite number of filters per proxy. To do so, filters are stored in list. * Because filters are stored in list, filters state has been moved from the channel structure to the filter structure. This is cleaner because there is no more info about filters in channel structure. * It is possible to defined filters on backends only. For such filters, stream_start/stream_stop callbacks are not called. Of course, it is possible to mix frontend and backend filters. * Now, TCP streams are also filtered. All callbacks without the 'http_' prefix are called for all kind of streams. In addition, 2 new callbacks were added to filter data exchanged through a TCP stream: - tcp_data: it is called when new data are available or when old unprocessed data are still waiting. - tcp_forward_data: it is called when some data can be consumed. * New callbacks attached to channel were added: - channel_start_analyze: it is called when a filter is ready to process data exchanged through a channel. 2 new analyzers (a frontend and a backend) are attached to channels to call this callback. For a frontend filter, it is called before any other analyzer. For a backend filter, it is called when a backend is attached to a stream. So some processing cannot be filtered in that case. - channel_analyze: it is called before each analyzer attached to a channel, expects analyzers responsible for data sending. - channel_end_analyze: it is called when all other analyzers have finished their processing. A new analyzers is attached to channels to call this callback. For a TCP stream, this is always the last one called. For a HTTP one, the callback is called when a request/response ends, so it is called one time for each request/response. * 'session_established' callback has been removed. Everything that is done in this callback can be handled by 'channel_start_analyze' on the response channel. * 'http_pre_process' and 'http_post_process' callbacks have been replaced by 'channel_analyze'. * 'http_start' callback has been replaced by 'http_headers'. This new one is called just before headers sending and parsing of the body. * 'http_end' callback has been replaced by 'channel_end_analyze'. * It is possible to set a forwarder for TCP channels. It was already possible to do it for HTTP ones. * Forwarders can partially consumed forwardable data. For this reason a new HTTP message state was added before HTTP_MSG_DONE : HTTP_MSG_ENDING. Now all filters can define corresponding callbacks (http_forward_data and tcp_forward_data). Each filter owns 2 offsets relative to buf->p, next and forward, to track, respectively, input data already parsed but not forwarded yet by the filter and parsed data considered as forwarded by the filter. A any time, we have the warranty that a filter cannot parse or forward more input than previous ones. And, of course, it cannot forward more input than it has parsed. 2 macros has been added to retrieve these offets: FLT_NXT and FLT_FWD. In addition, 2 functions has been added to change the 'next size' and the 'forward size' of a filter. When a filter parses input data, it can alter these data, so the size of these data can vary. This action has an effet on all previous filters that must be handled. To do so, the function 'filter_change_next_size' must be called, passing the size variation. In the same spirit, if a filter alter forwarded data, it must call the function 'filter_change_forward_size'. 'filter_change_next_size' can be called in 'http_data' and 'tcp_data' callbacks and only these ones. And 'filter_change_forward_size' can be called in 'http_forward_data' and 'tcp_forward_data' callbacks and only these ones. The data changes are the filter responsability, but with some limitation. It must not change already parsed/forwarded data or data that previous filters have not parsed/forwarded yet. Because filters can be used on backends, when we the backend is set for a stream, we add filters defined for this backend in the filter list of the stream. But we must only do that when the backend and the frontend of the stream are not the same. Else same filters are added a second time leading to undefined behavior. The HTTP compression code had to be moved. So it simplifies http_response_forward_body function. To do so, the way the data are forwarded has changed. Now, a filter (and only one) can forward data. In a commit to come, this limitation will be removed to let all filters take part to data forwarding. There are 2 new functions that filters should use to deal with this feature: * flt_set_http_data_forwarder: This function sets the filter (using its id) that will forward data for the specified HTTP message. It is possible if it was not already set by another filter _AND_ if no data was yet forwarded (msg->msg_state <= HTTP_MSG_BODY). It returns -1 if an error occurs. * flt_http_data_forwarder: This function returns the filter id that will forward data for the specified HTTP message. If there is no forwarder set, it returns -1. When an HTTP data forwarder is set for the response, the HTTP compression is disabled. Of course, this is not definitive.
2015-04-30 05:48:27 -04:00
#include <types/filters.h>
#include <types/global.h>
#include <types/acl.h>
#include <types/peers.h>
#include <proto/acl.h>
#include <proto/activity.h>
#include <proto/arg.h>
#include <proto/auth.h>
#include <proto/backend.h>
#include <proto/channel.h>
#include <proto/cli.h>
#include <proto/connection.h>
#include <proto/fd.h>
MAJOR: filters: Add filters support This patch adds the support of filters in HAProxy. The main idea is to have a way to "easely" extend HAProxy by adding some "modules", called filters, that will be able to change HAProxy behavior in a programmatic way. To do so, many entry points has been added in code to let filters to hook up to different steps of the processing. A filter must define a flt_ops sutrctures (see include/types/filters.h for details). This structure contains all available callbacks that a filter can define: struct flt_ops { /* * Callbacks to manage the filter lifecycle */ int (*init) (struct proxy *p); void (*deinit)(struct proxy *p); int (*check) (struct proxy *p); /* * Stream callbacks */ void (*stream_start) (struct stream *s); void (*stream_accept) (struct stream *s); void (*session_establish)(struct stream *s); void (*stream_stop) (struct stream *s); /* * HTTP callbacks */ int (*http_start) (struct stream *s, struct http_msg *msg); int (*http_start_body) (struct stream *s, struct http_msg *msg); int (*http_start_chunk) (struct stream *s, struct http_msg *msg); int (*http_data) (struct stream *s, struct http_msg *msg); int (*http_last_chunk) (struct stream *s, struct http_msg *msg); int (*http_end_chunk) (struct stream *s, struct http_msg *msg); int (*http_chunk_trailers)(struct stream *s, struct http_msg *msg); int (*http_end_body) (struct stream *s, struct http_msg *msg); void (*http_end) (struct stream *s, struct http_msg *msg); void (*http_reset) (struct stream *s, struct http_msg *msg); int (*http_pre_process) (struct stream *s, struct http_msg *msg); int (*http_post_process) (struct stream *s, struct http_msg *msg); void (*http_reply) (struct stream *s, short status, const struct chunk *msg); }; To declare and use a filter, in the configuration, the "filter" keyword must be used in a listener/frontend section: frontend test ... filter <FILTER-NAME> [OPTIONS...] The filter referenced by the <FILTER-NAME> must declare a configuration parser on its own name to fill flt_ops and filter_conf field in the proxy's structure. An exemple will be provided later to make it perfectly clear. For now, filters cannot be used in backend section. But this is only a matter of time. Documentation will also be added later. This is the first commit of a long list about filters. It is possible to have several filters on the same listener/frontend. These filters are stored in an array of at most MAX_FILTERS elements (define in include/types/filters.h). Again, this will be replaced later by a list of filters. The filter API has been highly refactored. Main changes are: * Now, HA supports an infinite number of filters per proxy. To do so, filters are stored in list. * Because filters are stored in list, filters state has been moved from the channel structure to the filter structure. This is cleaner because there is no more info about filters in channel structure. * It is possible to defined filters on backends only. For such filters, stream_start/stream_stop callbacks are not called. Of course, it is possible to mix frontend and backend filters. * Now, TCP streams are also filtered. All callbacks without the 'http_' prefix are called for all kind of streams. In addition, 2 new callbacks were added to filter data exchanged through a TCP stream: - tcp_data: it is called when new data are available or when old unprocessed data are still waiting. - tcp_forward_data: it is called when some data can be consumed. * New callbacks attached to channel were added: - channel_start_analyze: it is called when a filter is ready to process data exchanged through a channel. 2 new analyzers (a frontend and a backend) are attached to channels to call this callback. For a frontend filter, it is called before any other analyzer. For a backend filter, it is called when a backend is attached to a stream. So some processing cannot be filtered in that case. - channel_analyze: it is called before each analyzer attached to a channel, expects analyzers responsible for data sending. - channel_end_analyze: it is called when all other analyzers have finished their processing. A new analyzers is attached to channels to call this callback. For a TCP stream, this is always the last one called. For a HTTP one, the callback is called when a request/response ends, so it is called one time for each request/response. * 'session_established' callback has been removed. Everything that is done in this callback can be handled by 'channel_start_analyze' on the response channel. * 'http_pre_process' and 'http_post_process' callbacks have been replaced by 'channel_analyze'. * 'http_start' callback has been replaced by 'http_headers'. This new one is called just before headers sending and parsing of the body. * 'http_end' callback has been replaced by 'channel_end_analyze'. * It is possible to set a forwarder for TCP channels. It was already possible to do it for HTTP ones. * Forwarders can partially consumed forwardable data. For this reason a new HTTP message state was added before HTTP_MSG_DONE : HTTP_MSG_ENDING. Now all filters can define corresponding callbacks (http_forward_data and tcp_forward_data). Each filter owns 2 offsets relative to buf->p, next and forward, to track, respectively, input data already parsed but not forwarded yet by the filter and parsed data considered as forwarded by the filter. A any time, we have the warranty that a filter cannot parse or forward more input than previous ones. And, of course, it cannot forward more input than it has parsed. 2 macros has been added to retrieve these offets: FLT_NXT and FLT_FWD. In addition, 2 functions has been added to change the 'next size' and the 'forward size' of a filter. When a filter parses input data, it can alter these data, so the size of these data can vary. This action has an effet on all previous filters that must be handled. To do so, the function 'filter_change_next_size' must be called, passing the size variation. In the same spirit, if a filter alter forwarded data, it must call the function 'filter_change_forward_size'. 'filter_change_next_size' can be called in 'http_data' and 'tcp_data' callbacks and only these ones. And 'filter_change_forward_size' can be called in 'http_forward_data' and 'tcp_forward_data' callbacks and only these ones. The data changes are the filter responsability, but with some limitation. It must not change already parsed/forwarded data or data that previous filters have not parsed/forwarded yet. Because filters can be used on backends, when we the backend is set for a stream, we add filters defined for this backend in the filter list of the stream. But we must only do that when the backend and the frontend of the stream are not the same. Else same filters are added a second time leading to undefined behavior. The HTTP compression code had to be moved. So it simplifies http_response_forward_body function. To do so, the way the data are forwarded has changed. Now, a filter (and only one) can forward data. In a commit to come, this limitation will be removed to let all filters take part to data forwarding. There are 2 new functions that filters should use to deal with this feature: * flt_set_http_data_forwarder: This function sets the filter (using its id) that will forward data for the specified HTTP message. It is possible if it was not already set by another filter _AND_ if no data was yet forwarded (msg->msg_state <= HTTP_MSG_BODY). It returns -1 if an error occurs. * flt_http_data_forwarder: This function returns the filter id that will forward data for the specified HTTP message. If there is no forwarder set, it returns -1. When an HTTP data forwarder is set for the response, the HTTP compression is disabled. Of course, this is not definitive.
2015-04-30 05:48:27 -04:00
#include <proto/filters.h>
#include <proto/hlua.h>
#include <proto/http_rules.h>
#include <proto/listener.h>
#include <proto/log.h>
#include <proto/mworker.h>
#include <proto/pattern.h>
#include <proto/protocol.h>
#include <proto/http_ana.h>
#include <proto/proxy.h>
#include <proto/queue.h>
#include <proto/server.h>
#include <proto/session.h>
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
#include <proto/stream.h>
#include <proto/signal.h>
#include <proto/task.h>
#include <proto/dns.h>
#include <proto/vars.h>
#include <proto/ssl_sock.h>
BUILD: re-implement an initcall variant without using executable sections The current initcall implementation relies on dedicated sections (one section per init stage) to store the initcall descriptors. Then upon startup, these sections are scanned from beginning to end and all items found there are called in sequence. On platforms like AIX or Cygwin it seems difficult to figure the beginning and end of sections as the linker doesn't seem to provide the corresponding symbols. In order to replace this, this patch simply implements an array of single linked (one per init stage) which are fed using constructors for each register call. These constructors are declared static, with a name depending on their line number in the file, in order to avoid name clashes. The final effect is the same, except that the method is slightly more expensive in that it explicitly produces code to register these initcalls : $ size haproxy.sections haproxy.constructor text data bss dec hex filename 4060312 249176 1457652 5767140 57ffe4 haproxy.sections 4062862 260408 1457652 5780922 5835ba haproxy.constructor This mechanism is enabled as an alternative to the default one when build option USE_OBSOLETE_LINKER is set. This option is currently enabled by default only on AIX and Cygwin, and may be attempted for any target which fails to build complaining about missing symbols __start_init_* and/or __stop_init_*. Once confirmed as a reliable fix, this will likely have to be backported to 1.9 where AIX and Cygwin do not build anymore.
2019-03-29 16:30:17 -04:00
/* array of init calls for older platforms */
DECLARE_INIT_STAGES;
/* list of config files */
static struct list cfg_cfgfiles = LIST_HEAD_INIT(cfg_cfgfiles);
int pid; /* current process id */
2007-11-26 10:13:36 -05:00
int relative_pid = 1; /* process id starting at 1 */
unsigned long pid_bit = 1; /* bit corresponding to the process id */
unsigned long all_proc_mask = 1; /* mask of all processes */
volatile unsigned long sleeping_thread_mask; /* Threads that are about to sleep in poll() */
/* global options */
struct global global = {
.hard_stop_after = TICK_ETERNITY,
.nbproc = 1,
.nbthread = 0,
.req_count = 0,
.logsrvs = LIST_HEAD_INIT(global.logsrvs),
.maxzlibmem = 0,
.comp_rate_lim = 0,
.ssl_server_verify = SSL_SERVER_VERIFY_REQUIRED,
.unix_bind = {
.ux = {
.uid = -1,
.gid = -1,
.mode = 0,
}
},
.tune = {
.options = GTUNE_LISTENER_MQ,
.bufsize = (BUFSIZE + 2*sizeof(void *) - 1) & -(2*sizeof(void *)),
.maxrewrite = -1,
.chksize = (BUFSIZE + 2*sizeof(void *) - 1) & -(2*sizeof(void *)),
MAJOR: session: only wake up as many sessions as available buffers permit We've already experimented with three wake up algorithms when releasing buffers : the first naive one used to wake up far too many sessions, causing many of them not to get any buffer. The second approach which was still in use prior to this patch consisted in waking up either 1 or 2 sessions depending on the number of FDs we had released. And this was still inaccurate. The third one tried to cover the accuracy issues of the second and took into consideration the number of FDs the sessions would be willing to use, but most of the time we ended up waking up too many of them for nothing, or deadlocking by lack of buffers. This patch completely removes the need to allocate two buffers at once. Instead it splits allocations into critical and non-critical ones and implements a reserve in the pool for this. The deadlock situation happens when all buffers are be allocated for requests pending in a maxconn-limited server queue, because then there's no more way to allocate buffers for responses, and these responses are critical to release the servers's connection in order to release the pending requests. In fact maxconn on a server creates a dependence between sessions and particularly between oldest session's responses and latest session's requests. Thus, it is mandatory to get a free buffer for a response in order to release a server connection which will permit to release a request buffer. Since we definitely have non-symmetrical buffers, we need to implement this logic in the buffer allocation mechanism. What this commit does is implement a reserve of buffers which can only be allocated for responses and that will never be allocated for requests. This is made possible by the requester indicating how much margin it wants to leave after the allocation succeeds. Thus it is a cooperative allocation mechanism : the requester (process_session() in general) prefers not to get a buffer in order to respect other's need for response buffers. The session management code always knows if a buffer will be used for requests or responses, so that is not difficult : - either there's an applet on the initiator side and we really need the request buffer (since currently the applet is called in the context of the session) - or we have a connection and we really need the response buffer (in order to support building and sending an error message back) This reserve ensures that we don't take all allocatable buffers for requests waiting in a queue. The downside is that all the extra buffers are really allocated to ensure they can be allocated. But with small values it is not an issue. With this change, we don't observe any more deadlocks even when running with maxconn 1 on a server under severely constrained memory conditions. The code becomes a bit tricky, it relies on the scheduler's run queue to estimate how many sessions are already expected to run so that it doesn't wake up everyone with too few resources. A better solution would probably consist in having two queues, one for urgent requests and one for normal requests. A failed allocation for a session dealing with an error, a connection event, or the need for a response (or request when there's an applet on the left) would go to the urgent request queue, while other requests would go to the other queue. Urgent requests would be served from 1 entry in the pool, while the regular ones would be served only according to the reserve. Despite not yet having this, it works remarkably well. This mechanism is quite efficient, we don't perform too many wake up calls anymore. For 1 million sessions elapsed during massive memory contention, we observe about 4.5M calls to process_session() compared to 4.0M without memory constraints. Previously we used to observe up to 16M calls, which rougly means 12M failures. During a test run under high memory constraints (limit enforced to 27 MB instead of the 58 MB normally needed), performance used to drop by 53% prior to this patch. Now with this patch instead it *increases* by about 1.5%. The best effect of this change is that by limiting the memory usage to about 2/3 to 3/4 of what is needed by default, it's possible to increase performance by up to about 18% mainly due to the fact that pools are reused more often and remain hot in the CPU cache (observed on regular HTTP traffic with 20k objects, buffers.limit = maxconn/10, buffers.reserve = limit/2). Below is an example of scenario which used to cause a deadlock previously : - connection is received - two buffers are allocated in process_session() then released - one is allocated when receiving an HTTP request - the second buffer is allocated then released in process_session() for request parsing then connection establishment. - poll() says we can send, so the request buffer is sent and released - process session gets notified that the connection is now established and allocates two buffers then releases them - all other sessions do the same till one cannot get the request buffer without hitting the margin - and now the server responds. stream_interface allocates the response buffer and manages to get it since it's higher priority being for a response. - but process_session() cannot allocate the request buffer anymore => We could end up with all buffers used by responses so that none may be allocated for a request in process_session(). When the applet processing leaves the session context, the test will have to be changed so that we always allocate a response buffer regardless of the left side (eg: H2->H1 gateway). A final improvement would consists in being able to only retry the failed I/O operation without waking up a task, but to date all experiments to achieve this have proven not to be reliable enough.
2014-11-26 19:11:56 -05:00
.reserved_bufs = RESERVED_BUFS,
.pattern_cache = DEFAULT_PAT_LRU_SIZE,
.pool_low_ratio = 20,
.pool_high_ratio = 25,
.max_http_hdr = MAX_HTTP_HDR,
#ifdef USE_OPENSSL
.sslcachesize = SSLCACHESIZE,
#endif
.comp_maxlevel = 1,
#ifdef DEFAULT_IDLE_TIMER
.idle_timer = DEFAULT_IDLE_TIMER,
#else
.idle_timer = 1000, /* 1 second */
#endif
},
#ifdef USE_OPENSSL
#ifdef DEFAULT_MAXSSLCONN
.maxsslconn = DEFAULT_MAXSSLCONN,
#endif
#endif
/* others NULL OK */
};
/*********************************************************************/
int stopping; /* non zero means stopping in progress */
int killed; /* non zero means a hard-stop is triggered */
int jobs = 0; /* number of active jobs (conns, listeners, active tasks, ...) */
int unstoppable_jobs = 0; /* number of active jobs that can't be stopped during a soft stop */
int active_peers = 0; /* number of active peers (connection attempts and connected) */
int connected_peers = 0; /* number of connected peers (verified ones) */
/* Here we store informations about the pids of the processes we may pause
* or kill. We will send them a signal every 10 ms until we can bind to all
* our ports. With 200 retries, that's about 2 seconds.
*/
#define MAX_START_RETRIES 200
static int *oldpids = NULL;
static int oldpids_sig; /* use USR1 or TERM */
/* Path to the unix socket we use to retrieve listener sockets from the old process */
static const char *old_unixsocket;
static char *cur_unixsocket = NULL;
int atexit_flag = 0;
int nb_oldpids = 0;
const int zero = 0;
const int one = 1;
const struct linger nolinger = { .l_onoff = 1, .l_linger = 0 };
char hostname[MAX_HOSTNAME_LEN];
char localpeer[MAX_HOSTNAME_LEN];
/* used from everywhere just to drain results we don't want to read and which
* recent versions of gcc increasingly and annoyingly complain about.
*/
int shut_your_big_mouth_gcc_int = 0;
static char **next_argv = NULL;
struct list proc_list = LIST_HEAD_INIT(proc_list);
int master = 0; /* 1 if in master, 0 if in child */
unsigned int rlim_fd_cur_at_boot = 0;
unsigned int rlim_fd_max_at_boot = 0;
struct mworker_proc *proc_self = NULL;
/* list of the temporarily limited listeners because of lack of resource */
struct list global_listener_queue = LIST_HEAD_INIT(global_listener_queue);
struct task *global_listener_queue_task;
static struct task *manage_global_listener_queue(struct task *t, void *context, unsigned short state);
static void *run_thread_poll_loop(void *data);
/* bitfield of a few warnings to emit just once (WARN_*) */
unsigned int warned = 0;
/* master CLI configuration (-S flag) */
struct list mworker_cli_conf = LIST_HEAD_INIT(mworker_cli_conf);
/* These are strings to be reported in the output of "haproxy -vv". They may
* either be constants (in which case must_free must be zero) or dynamically
* allocated strings to pass to free() on exit, and in this case must_free
* must be non-zero.
*/
struct list build_opts_list = LIST_HEAD_INIT(build_opts_list);
struct build_opts_str {
struct list list;
const char *str;
int must_free;
};
/* These functions are called just after the point where the program exits
* after a config validity check, so they are generally suited for resource
* allocation and slow initializations that should be skipped during basic
* config checks. The functions must return 0 on success, or a combination
* of ERR_* flags (ERR_WARN, ERR_ABORT, ERR_FATAL, ...). The 2 latter cause
* and immediate exit, so the function must have emitted any useful error.
*/
struct list post_check_list = LIST_HEAD_INIT(post_check_list);
struct post_check_fct {
struct list list;
int (*fct)();
};
/* These functions are called for each thread just after the thread creation
* and before running the init functions. They should be used to do per-thread
* (re-)allocations that are needed by subsequent functoins. They must return 0
* if an error occurred. */
struct list per_thread_alloc_list = LIST_HEAD_INIT(per_thread_alloc_list);
struct per_thread_alloc_fct {
struct list list;
int (*fct)();
};
/* These functions are called for each thread just after the thread creation
* and before running the scheduler. They should be used to do per-thread
* initializations. They must return 0 if an error occurred. */
struct list per_thread_init_list = LIST_HEAD_INIT(per_thread_init_list);
struct per_thread_init_fct {
struct list list;
int (*fct)();
};
/* These functions are called when freeing the global sections at the end of
* deinit, after everything is stopped. They don't return anything. They should
* not release shared resources that are possibly used by other deinit
* functions, only close/release what is private. Use the per_thread_free_list
* to release shared resources.
*/
struct list post_deinit_list = LIST_HEAD_INIT(post_deinit_list);
struct post_deinit_fct {
struct list list;
void (*fct)();
};
/* These functions are called when freeing the global sections at the end of
* deinit, after the thread deinit functions, to release unneeded memory
* allocations. They don't return anything, and they work in best effort mode
* as their sole goal is to make valgrind mostly happy.
*/
struct list per_thread_free_list = LIST_HEAD_INIT(per_thread_free_list);
struct per_thread_free_fct {
struct list list;
int (*fct)();
};
/* These functions are called for each thread just after the scheduler loop and
* before exiting the thread. They don't return anything and, as for post-deinit
* functions, they work in best effort mode as their sole goal is to make
* valgrind mostly happy. */
struct list per_thread_deinit_list = LIST_HEAD_INIT(per_thread_deinit_list);
struct per_thread_deinit_fct {
struct list list;
void (*fct)();
};
/*********************************************************************/
/* general purpose functions ***************************************/
/*********************************************************************/
/* used to register some build option strings at boot. Set must_free to
* non-zero if the string must be freed upon exit.
*/
void hap_register_build_opts(const char *str, int must_free)
{
struct build_opts_str *b;
b = calloc(1, sizeof(*b));
if (!b) {
fprintf(stderr, "out of memory\n");
exit(1);
}
b->str = str;
b->must_free = must_free;
LIST_ADDQ(&build_opts_list, &b->list);
}
/* used to register some initialization functions to call after the checks. */
void hap_register_post_check(int (*fct)())
{
struct post_check_fct *b;
b = calloc(1, sizeof(*b));
if (!b) {
fprintf(stderr, "out of memory\n");
exit(1);
}
b->fct = fct;
LIST_ADDQ(&post_check_list, &b->list);
}
/* used to register some de-initialization functions to call after everything
* has stopped.
*/
void hap_register_post_deinit(void (*fct)())
{
struct post_deinit_fct *b;
b = calloc(1, sizeof(*b));
if (!b) {
fprintf(stderr, "out of memory\n");
exit(1);
}
b->fct = fct;
LIST_ADDQ(&post_deinit_list, &b->list);
}
/* used to register some allocation functions to call for each thread. */
void hap_register_per_thread_alloc(int (*fct)())
{
struct per_thread_alloc_fct *b;
b = calloc(1, sizeof(*b));
if (!b) {
fprintf(stderr, "out of memory\n");
exit(1);
}
b->fct = fct;
LIST_ADDQ(&per_thread_alloc_list, &b->list);
}
/* used to register some initialization functions to call for each thread. */
void hap_register_per_thread_init(int (*fct)())
{
struct per_thread_init_fct *b;
b = calloc(1, sizeof(*b));
if (!b) {
fprintf(stderr, "out of memory\n");
exit(1);
}
b->fct = fct;
LIST_ADDQ(&per_thread_init_list, &b->list);
}
/* used to register some de-initialization functions to call for each thread. */
void hap_register_per_thread_deinit(void (*fct)())
{
struct per_thread_deinit_fct *b;
b = calloc(1, sizeof(*b));
if (!b) {
fprintf(stderr, "out of memory\n");
exit(1);
}
b->fct = fct;
LIST_ADDQ(&per_thread_deinit_list, &b->list);
}
/* used to register some free functions to call for each thread. */
void hap_register_per_thread_free(int (*fct)())
{
struct per_thread_free_fct *b;
b = calloc(1, sizeof(*b));
if (!b) {
fprintf(stderr, "out of memory\n");
exit(1);
}
b->fct = fct;
LIST_ADDQ(&per_thread_free_list, &b->list);
}
static void display_version()
{
printf("HA-Proxy version %s %s - https://haproxy.org/\n", haproxy_version, haproxy_date);
}
static void display_build_opts()
{
struct build_opts_str *item;
printf("Build options :"
#ifdef BUILD_TARGET
"\n TARGET = " BUILD_TARGET
#endif
#ifdef BUILD_CPU
"\n CPU = " BUILD_CPU
#endif
#ifdef BUILD_CC
"\n CC = " BUILD_CC
#endif
#ifdef BUILD_CFLAGS
"\n CFLAGS = " BUILD_CFLAGS
#endif
#ifdef BUILD_OPTIONS
"\n OPTIONS = " BUILD_OPTIONS
#endif
#ifdef BUILD_FEATURES
"\n\nFeature list : " BUILD_FEATURES
#endif
"\n\nDefault settings :"
"\n bufsize = %d, maxrewrite = %d, maxpollevents = %d"
"\n\n",
BUFSIZE, MAXREWRITE, MAX_POLL_EVENTS);
list_for_each_entry(item, &build_opts_list, list) {
puts(item->str);
}
putchar('\n');
list_pollers(stdout);
putchar('\n');
list_mux_proto(stdout);
putchar('\n');
list_services(stdout);
putchar('\n');
list_filters(stdout);
putchar('\n');
}
/*
* This function prints the command line usage and exits
*/
static void usage(char *name)
{
display_version();
fprintf(stderr,
"Usage : %s [-f <cfgfile|cfgdir>]* [ -vdV"
"D ] [ -n <maxconn> ] [ -N <maxpconn> ]\n"
" [ -p <pidfile> ] [ -m <max megs> ] [ -C <dir> ] [-- <cfgfile>*]\n"
" -v displays version ; -vv shows known build options.\n"
" -d enters debug mode ; -db only disables background mode.\n"
" -dM[<byte>] poisons memory with <byte> (defaults to 0x50)\n"
" -V enters verbose mode (disables quiet mode)\n"
" -D goes daemon ; -C changes to <dir> before loading files.\n"
" -W master-worker mode.\n"
MEDIUM: mworker: Add systemd `Type=notify` support This patch adds support for `Type=notify` to the systemd unit. Supporting `Type=notify` improves both starting as well as reloading of the unit, because systemd will be let known when the action completed. See this quote from `systemd.service(5)`: > Note however that reloading a daemon by sending a signal (as with the > example line above) is usually not a good choice, because this is an > asynchronous operation and hence not suitable to order reloads of > multiple services against each other. It is strongly recommended to > set ExecReload= to a command that not only triggers a configuration > reload of the daemon, but also synchronously waits for it to complete. By making systemd aware of a reload in progress it is able to wait until the reload actually succeeded. This patch introduces both a new `USE_SYSTEMD` build option which controls including the sd-daemon library as well as a `-Ws` runtime option which runs haproxy in master-worker mode with systemd support. When haproxy is running in master-worker mode with systemd support it will send status messages to systemd using `sd_notify(3)` in the following cases: - The master process forked off the worker processes (READY=1) - The master process entered the `mworker_reload()` function (RELOADING=1) - The master process received the SIGUSR1 or SIGTERM signal (STOPPING=1) Change the unit file to specify `Type=notify` and replace master-worker mode (`-W`) with master-worker mode with systemd support (`-Ws`). Future evolutions of this feature could include making use of the `STATUS` feature of `sd_notify()` to send information about the number of active connections to systemd. This would require bidirectional communication between the master and the workers and thus is left for future work.
2017-11-20 09:58:35 -05:00
#if defined(USE_SYSTEMD)
" -Ws master-worker mode with systemd notify support.\n"
#endif
" -q quiet mode : don't display messages\n"
" -c check mode : only check config files and exit\n"
" -n sets the maximum total # of connections (uses ulimit -n)\n"
" -m limits the usable amount of memory (in MB)\n"
" -N sets the default, per-proxy maximum # of connections (%d)\n"
" -L set local peer name (default to hostname)\n"
" -p writes pids of all children to this file\n"
#if defined(USE_EPOLL)
" -de disables epoll() usage even when available\n"
#endif
#if defined(USE_KQUEUE)
" -dk disables kqueue() usage even when available\n"
#endif
#if defined(USE_EVPORTS)
" -dv disables event ports usage even when available\n"
#endif
#if defined(USE_POLL)
" -dp disables poll() usage even when available\n"
#endif
#if defined(USE_LINUX_SPLICE)
" -dS disables splice usage (broken on old kernels)\n"
#endif
#if defined(USE_GETADDRINFO)
" -dG disables getaddrinfo() usage\n"
#endif
#if defined(SO_REUSEPORT)
" -dR disables SO_REUSEPORT usage\n"
#endif
" -dr ignores server address resolution failures\n"
" -dV disables SSL verify on servers side\n"
" -sf/-st [pid ]* finishes/terminates old pids.\n"
" -x <unix_socket> get listening sockets from a unix socket\n"
" -S <bind>[,<bind options>...] new master CLI\n"
"\n",
name, cfg_maxpconn);
exit(1);
}
/*********************************************************************/
/* more specific functions ***************************************/
/*********************************************************************/
/* sends the signal <sig> to all pids found in <oldpids>. Returns the number of
* pids the signal was correctly delivered to.
*/
int tell_old_pids(int sig)
{
int p;
int ret = 0;
for (p = 0; p < nb_oldpids; p++)
if (kill(oldpids[p], sig) == 0)
ret++;
return ret;
}
/*
* remove a pid forom the olpid array and decrease nb_oldpids
* return 1 pid was found otherwise return 0
*/
int delete_oldpid(int pid)
{
int i;
for (i = 0; i < nb_oldpids; i++) {
if (oldpids[i] == pid) {
oldpids[i] = oldpids[nb_oldpids - 1];
oldpids[nb_oldpids - 1] = 0;
nb_oldpids--;
return 1;
}
}
return 0;
}
static void get_cur_unixsocket()
{
/* if -x was used, try to update the stat socket if not available anymore */
if (global.stats_fe) {
struct bind_conf *bind_conf;
/* pass through all stats socket */
list_for_each_entry(bind_conf, &global.stats_fe->conf.bind, by_fe) {
struct listener *l;
list_for_each_entry(l, &bind_conf->listeners, by_bind) {
if (l->addr.ss_family == AF_UNIX &&
(bind_conf->level & ACCESS_FD_LISTENERS)) {
const struct sockaddr_un *un;
un = (struct sockaddr_un *)&l->addr;
/* priority to old_unixsocket */
if (!cur_unixsocket) {
cur_unixsocket = strdup(un->sun_path);
} else {
if (old_unixsocket && !strcmp(un->sun_path, old_unixsocket)) {
free(cur_unixsocket);
cur_unixsocket = strdup(old_unixsocket);
return;
}
}
}
}
}
}
if (!cur_unixsocket && old_unixsocket)
cur_unixsocket = strdup(old_unixsocket);
}
/*
* When called, this function reexec haproxy with -sf followed by current
* children PIDs and possibly old children PIDs if they didn't leave yet.
*/
void mworker_reload()
{
int next_argc = 0;
char *msg = NULL;
struct rlimit limit;
struct per_thread_deinit_fct *ptdf;
mworker_block_signals();
MEDIUM: mworker: Add systemd `Type=notify` support This patch adds support for `Type=notify` to the systemd unit. Supporting `Type=notify` improves both starting as well as reloading of the unit, because systemd will be let known when the action completed. See this quote from `systemd.service(5)`: > Note however that reloading a daemon by sending a signal (as with the > example line above) is usually not a good choice, because this is an > asynchronous operation and hence not suitable to order reloads of > multiple services against each other. It is strongly recommended to > set ExecReload= to a command that not only triggers a configuration > reload of the daemon, but also synchronously waits for it to complete. By making systemd aware of a reload in progress it is able to wait until the reload actually succeeded. This patch introduces both a new `USE_SYSTEMD` build option which controls including the sd-daemon library as well as a `-Ws` runtime option which runs haproxy in master-worker mode with systemd support. When haproxy is running in master-worker mode with systemd support it will send status messages to systemd using `sd_notify(3)` in the following cases: - The master process forked off the worker processes (READY=1) - The master process entered the `mworker_reload()` function (RELOADING=1) - The master process received the SIGUSR1 or SIGTERM signal (STOPPING=1) Change the unit file to specify `Type=notify` and replace master-worker mode (`-W`) with master-worker mode with systemd support (`-Ws`). Future evolutions of this feature could include making use of the `STATUS` feature of `sd_notify()` to send information about the number of active connections to systemd. This would require bidirectional communication between the master and the workers and thus is left for future work.
2017-11-20 09:58:35 -05:00
#if defined(USE_SYSTEMD)
if (global.tune.options & GTUNE_USE_SYSTEMD)
sd_notify(0, "RELOADING=1");
#endif
setenv("HAPROXY_MWORKER_REEXEC", "1", 1);
mworker_proc_list_to_env(); /* put the children description in the env */
/* during the reload we must ensure that every FDs that can't be
* reuse (ie those that are not referenced in the proc_list)
* are closed or they will leak. */
/* close the listeners FD */
mworker_cli_proxy_stop();
if (getenv("HAPROXY_MWORKER_WAIT_ONLY") == NULL) {
/* close the poller FD and the thread waker pipe FD */
list_for_each_entry(ptdf, &per_thread_deinit_list, list)
ptdf->fct();
if (fdtab)
deinit_pollers();
}
#if defined(USE_OPENSSL) && (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L)
if (global.ssl_used_frontend || global.ssl_used_backend)
/* close random device FDs */
RAND_keep_random_devices_open(0);
#endif
/* restore the initial FD limits */
limit.rlim_cur = rlim_fd_cur_at_boot;
limit.rlim_max = rlim_fd_max_at_boot;
if (setrlimit(RLIMIT_NOFILE, &limit) == -1) {
getrlimit(RLIMIT_NOFILE, &limit);
ha_warning("Failed to restore initial FD limits (cur=%u max=%u), using cur=%u max=%u\n",
rlim_fd_cur_at_boot, rlim_fd_max_at_boot,
(unsigned int)limit.rlim_cur, (unsigned int)limit.rlim_max);
}
/* compute length */
while (next_argv[next_argc])
next_argc++;
/* 1 for haproxy -sf, 2 for -x /socket */
next_argv = realloc(next_argv, (next_argc + 1 + 2 + mworker_child_nb() + nb_oldpids + 1) * sizeof(char *));
if (next_argv == NULL)
goto alloc_error;
/* add -sf <PID>* to argv */
if (mworker_child_nb() > 0) {
struct mworker_proc *child;
next_argv[next_argc++] = "-sf";
list_for_each_entry(child, &proc_list, list) {
if (!(child->options & (PROC_O_TYPE_WORKER|PROC_O_TYPE_PROG)))
continue;
next_argv[next_argc] = memprintf(&msg, "%d", child->pid);
if (next_argv[next_argc] == NULL)
goto alloc_error;
msg = NULL;
next_argc++;
}
}
next_argv[next_argc] = NULL;
/* add the -x option with the stat socket */
if (cur_unixsocket) {
next_argv[next_argc++] = "-x";
next_argv[next_argc++] = (char *)cur_unixsocket;
next_argv[next_argc++] = NULL;
}
ha_warning("Reexecuting Master process\n");
execvp(next_argv[0], next_argv);
ha_warning("Failed to reexecute the master process [%d]: %s\n", pid, strerror(errno));
return;
alloc_error:
ha_warning("Failed to reexecute the master process [%d]: Cannot allocate memory\n", pid);
return;
}
static void mworker_loop()
{
#if defined(USE_SYSTEMD)
if (global.tune.options & GTUNE_USE_SYSTEMD)
sd_notifyf(0, "READY=1\nMAINPID=%lu", (unsigned long)getpid());
#endif
/* Busy polling makes no sense in the master :-) */
global.tune.options &= ~GTUNE_BUSY_POLLING;
master = 1;
signal_unregister(SIGUSR1);
signal_unregister(SIGHUP);
signal_unregister(SIGQUIT);
signal_register_fct(SIGTERM, mworker_catch_sigterm, SIGTERM);
signal_register_fct(SIGUSR1, mworker_catch_sigterm, SIGUSR1);
signal_register_fct(SIGINT, mworker_catch_sigterm, SIGINT);
signal_register_fct(SIGHUP, mworker_catch_sighup, SIGHUP);
signal_register_fct(SIGUSR2, mworker_catch_sighup, SIGUSR2);
signal_register_fct(SIGCHLD, mworker_catch_sigchld, SIGCHLD);
mworker_unblock_signals();
mworker_cleanlisteners();
mworker_cleantasks();
mworker_catch_sigchld(NULL); /* ensure we clean the children in case
some SIGCHLD were lost */
global.nbthread = 1;
relative_pid = 1;
pid_bit = 1;
all_proc_mask = 1;
#ifdef USE_THREAD
tid_bit = 1;
all_threads_mask = 1;
#endif
jobs++; /* this is the "master" job, we want to take care of the
signals even if there is no listener so the poll loop don't
leave */
fork_poller();
run_thread_poll_loop(0);
}
/*
* Reexec the process in failure mode, instead of exiting
*/
void reexec_on_failure()
{
if (!atexit_flag)
return;
setenv("HAPROXY_MWORKER_WAIT_ONLY", "1", 1);
ha_warning("Reexecuting Master process in waitpid mode\n");
mworker_reload();
}
/*
* upon SIGUSR1, let's have a soft stop. Note that soft_stop() broadcasts
* a signal zero to all subscribers. This means that it's as easy as
* subscribing to signal 0 to get informed about an imminent shutdown.
*/
static void sig_soft_stop(struct sig_handler *sh)
{
soft_stop();
signal_unregister_handler(sh);
pool_gc(NULL);
}
/*
* upon SIGTTOU, we pause everything
*/
static void sig_pause(struct sig_handler *sh)
{
pause_proxies();
pool_gc(NULL);
}
/*
* upon SIGTTIN, let's have a soft stop.
*/
static void sig_listen(struct sig_handler *sh)
{
resume_proxies();
}
/*
* this function dumps every server's state when the process receives SIGHUP.
*/
static void sig_dump_state(struct sig_handler *sh)
{
struct proxy *p = proxies_list;
ha_warning("SIGHUP received, dumping servers states.\n");
while (p) {
struct server *s = p->srv;
send_log(p, LOG_NOTICE, "SIGHUP received, dumping servers states for proxy %s.\n", p->id);
while (s) {
chunk_printf(&trash,
"SIGHUP: Server %s/%s is %s. Conn: %d act, %d pend, %lld tot.",
p->id, s->id,
(s->cur_state != SRV_ST_STOPPED) ? "UP" : "DOWN",
s->cur_sess, s->nbpend, s->counters.cum_sess);
ha_warning("%s\n", trash.area);
send_log(p, LOG_NOTICE, "%s\n", trash.area);
s = s->next;
}
/* FIXME: those info are a bit outdated. We should be able to distinguish between FE and BE. */
if (!p->srv) {
chunk_printf(&trash,
"SIGHUP: Proxy %s has no servers. Conn: act(FE+BE): %d+%d, %d pend (%d unass), tot(FE+BE): %lld+%lld.",
p->id,
p->feconn, p->beconn, p->totpend, p->nbpend, p->fe_counters.cum_conn, p->be_counters.cum_conn);
} else if (p->srv_act == 0) {
chunk_printf(&trash,
"SIGHUP: Proxy %s %s ! Conn: act(FE+BE): %d+%d, %d pend (%d unass), tot(FE+BE): %lld+%lld.",
p->id,
(p->srv_bck) ? "is running on backup servers" : "has no server available",
p->feconn, p->beconn, p->totpend, p->nbpend, p->fe_counters.cum_conn, p->be_counters.cum_conn);
} else {
chunk_printf(&trash,
"SIGHUP: Proxy %s has %d active servers and %d backup servers available."
" Conn: act(FE+BE): %d+%d, %d pend (%d unass), tot(FE+BE): %lld+%lld.",
p->id, p->srv_act, p->srv_bck,
p->feconn, p->beconn, p->totpend, p->nbpend, p->fe_counters.cum_conn, p->be_counters.cum_conn);
}
ha_warning("%s\n", trash.area);
send_log(p, LOG_NOTICE, "%s\n", trash.area);
p = p->next;
}
}
static void dump(struct sig_handler *sh)
{
/* dump memory usage then free everything possible */
dump_pools();
pool_gc(NULL);
}
/*
* This function dup2 the stdio FDs (0,1,2) with <fd>, then closes <fd>
* If <fd> < 0, it opens /dev/null and use it to dup
*
* In the case of chrooting, you have to open /dev/null before the chroot, and
* pass the <fd> to this function
*/
static void stdio_quiet(int fd)
{
if (fd < 0)
fd = open("/dev/null", O_RDWR, 0);
if (fd > -1) {
fclose(stdin);
fclose(stdout);
fclose(stderr);
dup2(fd, 0);
dup2(fd, 1);
dup2(fd, 2);
if (fd > 2)
close(fd);
return;
}
ha_alert("Cannot open /dev/null\n");
exit(EXIT_FAILURE);
}
/* This function checks if cfg_cfgfiles contains directories.
* If it finds one, it adds all the files (and only files) it contains
* in cfg_cfgfiles in place of the directory (and removes the directory).
* It adds the files in lexical order.
* It adds only files with .cfg extension.
* It doesn't add files with name starting with '.'
*/
static void cfgfiles_expand_directories(void)
{
struct wordlist *wl, *wlb;
char *err = NULL;
list_for_each_entry_safe(wl, wlb, &cfg_cfgfiles, list) {
struct stat file_stat;
struct dirent **dir_entries = NULL;
int dir_entries_nb;
int dir_entries_it;
if (stat(wl->s, &file_stat)) {
ha_alert("Cannot open configuration file/directory %s : %s\n",
wl->s,
strerror(errno));
exit(1);
}
if (!S_ISDIR(file_stat.st_mode))
continue;
/* from this point wl->s is a directory */
dir_entries_nb = scandir(wl->s, &dir_entries, NULL, alphasort);
if (dir_entries_nb < 0) {
ha_alert("Cannot open configuration directory %s : %s\n",
wl->s,
strerror(errno));
exit(1);
}
/* for each element in the directory wl->s */
for (dir_entries_it = 0; dir_entries_it < dir_entries_nb; dir_entries_it++) {
struct dirent *dir_entry = dir_entries[dir_entries_it];
char *filename = NULL;
char *d_name_cfgext = strstr(dir_entry->d_name, ".cfg");
/* don't add filename that begin with .
* only add filename with .cfg extension
*/
if (dir_entry->d_name[0] == '.' ||
!(d_name_cfgext && d_name_cfgext[4] == '\0'))
goto next_dir_entry;
if (!memprintf(&filename, "%s/%s", wl->s, dir_entry->d_name)) {
ha_alert("Cannot load configuration files %s : out of memory.\n",
filename);
exit(1);
}
if (stat(filename, &file_stat)) {
ha_alert("Cannot open configuration file %s : %s\n",
wl->s,
strerror(errno));
exit(1);
}
/* don't add anything else than regular file in cfg_cfgfiles
* this way we avoid loops
*/
if (!S_ISREG(file_stat.st_mode))
goto next_dir_entry;
if (!list_append_word(&wl->list, filename, &err)) {
ha_alert("Cannot load configuration files %s : %s\n",
filename,
err);
exit(1);
}
next_dir_entry:
free(filename);
free(dir_entry);
}
free(dir_entries);
/* remove the current directory (wl) from cfg_cfgfiles */
free(wl->s);
LIST_DEL(&wl->list);
free(wl);
}
free(err);
}
static int get_old_sockets(const char *unixsocket)
{
char *cmsgbuf = NULL, *tmpbuf = NULL;
int *tmpfd = NULL;
struct sockaddr_un addr;
struct cmsghdr *cmsg;
struct msghdr msghdr;
struct iovec iov;
struct xfer_sock_list *xfer_sock = NULL;
struct timeval tv = { .tv_sec = 1, .tv_usec = 0 };
int sock = -1;
int ret = -1;
int ret2 = -1;
int fd_nb;
int got_fd = 0;
int i = 0;
size_t maxoff = 0, curoff = 0;
memset(&msghdr, 0, sizeof(msghdr));
cmsgbuf = malloc(CMSG_SPACE(sizeof(int)) * MAX_SEND_FD);
if (!cmsgbuf) {
ha_warning("Failed to allocate memory to send sockets\n");
goto out;
}
sock = socket(PF_UNIX, SOCK_STREAM, 0);
if (sock < 0) {
ha_warning("Failed to connect to the old process socket '%s'\n",
unixsocket);
goto out;
}
strncpy(addr.sun_path, unixsocket, sizeof(addr.sun_path));
addr.sun_path[sizeof(addr.sun_path) - 1] = 0;
addr.sun_family = PF_UNIX;
ret = connect(sock, (struct sockaddr *)&addr, sizeof(addr));
if (ret < 0) {
ha_warning("Failed to connect to the old process socket '%s'\n",
unixsocket);
goto out;
}
setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (void *)&tv, sizeof(tv));
iov.iov_base = &fd_nb;
iov.iov_len = sizeof(fd_nb);
msghdr.msg_iov = &iov;
msghdr.msg_iovlen = 1;
send(sock, "_getsocks\n", strlen("_getsocks\n"), 0);
/* First, get the number of file descriptors to be received */
if (recvmsg(sock, &msghdr, MSG_WAITALL) != sizeof(fd_nb)) {
ha_warning("Failed to get the number of sockets to be transferred !\n");
goto out;
}
if (fd_nb == 0) {
ret = 0;
goto out;
}
tmpbuf = malloc(fd_nb * (1 + MAXPATHLEN + 1 + IFNAMSIZ + sizeof(int)));
if (tmpbuf == NULL) {
ha_warning("Failed to allocate memory while receiving sockets\n");
goto out;
}
tmpfd = malloc(fd_nb * sizeof(int));
if (tmpfd == NULL) {
ha_warning("Failed to allocate memory while receiving sockets\n");
goto out;
}
msghdr.msg_control = cmsgbuf;
msghdr.msg_controllen = CMSG_SPACE(sizeof(int)) * MAX_SEND_FD;
iov.iov_len = MAX_SEND_FD * (1 + MAXPATHLEN + 1 + IFNAMSIZ + sizeof(int));
do {
int ret3;
iov.iov_base = tmpbuf + curoff;
ret = recvmsg(sock, &msghdr, 0);
if (ret == -1 && errno == EINTR)
continue;
if (ret <= 0)
break;
/* Send an ack to let the sender know we got the sockets
* and it can send some more
*/
do {
ret3 = send(sock, &got_fd, sizeof(got_fd), 0);
} while (ret3 == -1 && errno == EINTR);
for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg != NULL;
cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
if (cmsg->cmsg_level == SOL_SOCKET &&
cmsg->cmsg_type == SCM_RIGHTS) {
size_t totlen = cmsg->cmsg_len -
CMSG_LEN(0);
if (totlen / sizeof(int) + got_fd > fd_nb) {
ha_warning("Got to many sockets !\n");
goto out;
}
/*
* Be paranoid and use memcpy() to avoid any
* potential alignement issue.
*/
memcpy(&tmpfd[got_fd], CMSG_DATA(cmsg), totlen);
got_fd += totlen / sizeof(int);
}
}
curoff += ret;
} while (got_fd < fd_nb);
if (got_fd != fd_nb) {
ha_warning("We didn't get the expected number of sockets (expecting %d got %d)\n",
fd_nb, got_fd);
goto out;
}
maxoff = curoff;
curoff = 0;
for (i = 0; i < got_fd; i++) {
int fd = tmpfd[i];
socklen_t socklen;
int len;
xfer_sock = calloc(1, sizeof(*xfer_sock));
if (!xfer_sock) {
ha_warning("Failed to allocate memory in get_old_sockets() !\n");
break;
}
xfer_sock->fd = -1;
socklen = sizeof(xfer_sock->addr);
if (getsockname(fd, (struct sockaddr *)&xfer_sock->addr, &socklen) != 0) {
ha_warning("Failed to get socket address\n");
free(xfer_sock);
xfer_sock = NULL;
continue;
}
if (curoff >= maxoff) {
ha_warning("Inconsistency while transferring sockets\n");
goto out;
}
len = tmpbuf[curoff++];
if (len > 0) {
/* We have a namespace */
if (curoff + len > maxoff) {
ha_warning("Inconsistency while transferring sockets\n");
goto out;
}
xfer_sock->namespace = malloc(len + 1);
if (!xfer_sock->namespace) {
ha_warning("Failed to allocate memory while transferring sockets\n");
goto out;
}
memcpy(xfer_sock->namespace, &tmpbuf[curoff], len);
xfer_sock->namespace[len] = 0;
curoff += len;
}
if (curoff >= maxoff) {
ha_warning("Inconsistency while transferring sockets\n");
goto out;
}
len = tmpbuf[curoff++];
if (len > 0) {
/* We have an interface */
if (curoff + len > maxoff) {
ha_warning("Inconsistency while transferring sockets\n");
goto out;
}
xfer_sock->iface = malloc(len + 1);
if (!xfer_sock->iface) {
ha_warning("Failed to allocate memory while transferring sockets\n");
goto out;
}
memcpy(xfer_sock->iface, &tmpbuf[curoff], len);
xfer_sock->iface[len] = 0;
curoff += len;
}
if (curoff + sizeof(int) > maxoff) {
ha_warning("Inconsistency while transferring sockets\n");
goto out;
}
memcpy(&xfer_sock->options, &tmpbuf[curoff],
sizeof(xfer_sock->options));
curoff += sizeof(xfer_sock->options);
xfer_sock->fd = fd;
if (xfer_sock_list)
xfer_sock_list->prev = xfer_sock;
xfer_sock->next = xfer_sock_list;
xfer_sock->prev = NULL;
xfer_sock_list = xfer_sock;
xfer_sock = NULL;
}
ret2 = 0;
out:
/* If we failed midway make sure to close the remaining
* file descriptors
*/
if (tmpfd != NULL && i < got_fd) {
for (; i < got_fd; i++) {
close(tmpfd[i]);
}
}
free(tmpbuf);
free(tmpfd);
free(cmsgbuf);
if (sock != -1)
close(sock);
if (xfer_sock) {
free(xfer_sock->namespace);
free(xfer_sock->iface);
if (xfer_sock->fd != -1)
close(xfer_sock->fd);
free(xfer_sock);
}
return (ret2);
}
/*
* copy and cleanup the current argv
* Remove the -sf /-st parameters
* Return an allocated copy of argv
*/
static char **copy_argv(int argc, char **argv)
{
char **newargv;
int i = 0, j = 0;
newargv = calloc(argc + 2, sizeof(char *));
if (newargv == NULL) {
ha_warning("Cannot allocate memory\n");
return NULL;
}
while (i < argc) {
/* -sf or -st or -x */
if (i > 0 && argv[i][0] == '-' &&
((argv[i][1] == 's' && (argv[i][2] == 'f' || argv[i][2] == 't')) || argv[i][1] == 'x' )) {
/* list of pids to finish ('f') or terminate ('t') or unix socket (-x) */
i++;
while (i < argc && argv[i][0] != '-') {
i++;
}
continue;
}
newargv[j++] = argv[i++];
}
return newargv;
}
/* considers splicing proxies' maxconn, computes the ideal global.maxpipes
* setting, and returns it. It may return -1 meaning "unlimited" if some
* unlimited proxies have been found and the global.maxconn value is not yet
* set. It may also return a value greater than maxconn if it's not yet set.
* Note that a value of zero means there is no need for pipes. -1 is never
* returned if global.maxconn is valid.
*/
static int compute_ideal_maxpipes()
{
struct proxy *cur;
int nbfe = 0, nbbe = 0;
int unlimited = 0;
int pipes;
int max;
for (cur = proxies_list; cur; cur = cur->next) {
if (cur->options2 & (PR_O2_SPLIC_ANY)) {
if (cur->cap & PR_CAP_FE) {
max = cur->maxconn;
nbfe += max;
if (!max) {
unlimited = 1;
break;
}
}
if (cur->cap & PR_CAP_BE) {
max = cur->fullconn ? cur->fullconn : global.maxconn;
nbbe += max;
if (!max) {
unlimited = 1;
break;
}
}
}
}
pipes = MAX(nbfe, nbbe);
if (global.maxconn) {
if (pipes > global.maxconn || unlimited)
pipes = global.maxconn;
} else if (unlimited) {
pipes = -1;
}
return pipes >= 4 ? pipes / 4 : pipes;
}
/* considers global.maxsocks, global.maxpipes, async engines, SSL frontends and
* rlimits and computes an ideal maxconn. It's meant to be called only when
* maxsock contains the sum of listening FDs, before it is updated based on
* maxconn and pipes. If there are not enough FDs left, DEFAULT_MAXCONN (by
* default 100) is returned as it is expected that it will even run on tight
* environments, and will maintain compatibility with previous packages that
* used to rely on this value as the default one. The system will emit a
* warning indicating how many FDs are missing anyway if needed.
*/
static int compute_ideal_maxconn()
{
int ssl_sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
int engine_fds = global.ssl_used_async_engines * ssl_sides;
int pipes = compute_ideal_maxpipes();
int remain = rlim_fd_cur_at_boot;
int maxconn;
/* we have to take into account these elements :
* - number of engine_fds, which inflates the number of FD needed per
* connection by this number.
* - number of pipes per connection on average : for the unlimited
* case, this is 0.5 pipe FDs per connection, otherwise it's a
* fixed value of 2*pipes.
* - two FDs per connection
*/
/* subtract listeners and checks */
remain -= global.maxsock;
/* one epoll_fd/kqueue_fd per thread */
remain -= global.nbthread;
/* one wake-up pipe (2 fd) per thread */
remain -= 2 * global.nbthread;
/* Fixed pipes values : we only subtract them if they're not larger
* than the remaining FDs because pipes are optional.
*/
if (pipes >= 0 && pipes * 2 < remain)
remain -= pipes * 2;
if (pipes < 0) {
/* maxsock = maxconn * 2 + maxconn/4 * 2 + maxconn * engine_fds.
* = maxconn * (2 + 0.5 + engine_fds)
* = maxconn * (4 + 1 + 2*engine_fds) / 2
*/
maxconn = 2 * remain / (5 + 2 * engine_fds);
} else {
/* maxsock = maxconn * 2 + maxconn * engine_fds.
* = maxconn * (2 + engine_fds)
*/
maxconn = remain / (2 + engine_fds);
}
return MAX(maxconn, DEFAULT_MAXCONN);
}
/*
* This function initializes all the necessary variables. It only returns
* if everything is OK. If something fails, it exits.
*/
static void init(int argc, char **argv)
{
int arg_mode = 0; /* MODE_DEBUG, ... */
char *tmp;
char *cfg_pidfile = NULL;
int err_code = 0;
char *err_msg = NULL;
struct wordlist *wl;
char *progname;
char *change_dir = NULL;
MAJOR: filters: Add filters support This patch adds the support of filters in HAProxy. The main idea is to have a way to "easely" extend HAProxy by adding some "modules", called filters, that will be able to change HAProxy behavior in a programmatic way. To do so, many entry points has been added in code to let filters to hook up to different steps of the processing. A filter must define a flt_ops sutrctures (see include/types/filters.h for details). This structure contains all available callbacks that a filter can define: struct flt_ops { /* * Callbacks to manage the filter lifecycle */ int (*init) (struct proxy *p); void (*deinit)(struct proxy *p); int (*check) (struct proxy *p); /* * Stream callbacks */ void (*stream_start) (struct stream *s); void (*stream_accept) (struct stream *s); void (*session_establish)(struct stream *s); void (*stream_stop) (struct stream *s); /* * HTTP callbacks */ int (*http_start) (struct stream *s, struct http_msg *msg); int (*http_start_body) (struct stream *s, struct http_msg *msg); int (*http_start_chunk) (struct stream *s, struct http_msg *msg); int (*http_data) (struct stream *s, struct http_msg *msg); int (*http_last_chunk) (struct stream *s, struct http_msg *msg); int (*http_end_chunk) (struct stream *s, struct http_msg *msg); int (*http_chunk_trailers)(struct stream *s, struct http_msg *msg); int (*http_end_body) (struct stream *s, struct http_msg *msg); void (*http_end) (struct stream *s, struct http_msg *msg); void (*http_reset) (struct stream *s, struct http_msg *msg); int (*http_pre_process) (struct stream *s, struct http_msg *msg); int (*http_post_process) (struct stream *s, struct http_msg *msg); void (*http_reply) (struct stream *s, short status, const struct chunk *msg); }; To declare and use a filter, in the configuration, the "filter" keyword must be used in a listener/frontend section: frontend test ... filter <FILTER-NAME> [OPTIONS...] The filter referenced by the <FILTER-NAME> must declare a configuration parser on its own name to fill flt_ops and filter_conf field in the proxy's structure. An exemple will be provided later to make it perfectly clear. For now, filters cannot be used in backend section. But this is only a matter of time. Documentation will also be added later. This is the first commit of a long list about filters. It is possible to have several filters on the same listener/frontend. These filters are stored in an array of at most MAX_FILTERS elements (define in include/types/filters.h). Again, this will be replaced later by a list of filters. The filter API has been highly refactored. Main changes are: * Now, HA supports an infinite number of filters per proxy. To do so, filters are stored in list. * Because filters are stored in list, filters state has been moved from the channel structure to the filter structure. This is cleaner because there is no more info about filters in channel structure. * It is possible to defined filters on backends only. For such filters, stream_start/stream_stop callbacks are not called. Of course, it is possible to mix frontend and backend filters. * Now, TCP streams are also filtered. All callbacks without the 'http_' prefix are called for all kind of streams. In addition, 2 new callbacks were added to filter data exchanged through a TCP stream: - tcp_data: it is called when new data are available or when old unprocessed data are still waiting. - tcp_forward_data: it is called when some data can be consumed. * New callbacks attached to channel were added: - channel_start_analyze: it is called when a filter is ready to process data exchanged through a channel. 2 new analyzers (a frontend and a backend) are attached to channels to call this callback. For a frontend filter, it is called before any other analyzer. For a backend filter, it is called when a backend is attached to a stream. So some processing cannot be filtered in that case. - channel_analyze: it is called before each analyzer attached to a channel, expects analyzers responsible for data sending. - channel_end_analyze: it is called when all other analyzers have finished their processing. A new analyzers is attached to channels to call this callback. For a TCP stream, this is always the last one called. For a HTTP one, the callback is called when a request/response ends, so it is called one time for each request/response. * 'session_established' callback has been removed. Everything that is done in this callback can be handled by 'channel_start_analyze' on the response channel. * 'http_pre_process' and 'http_post_process' callbacks have been replaced by 'channel_analyze'. * 'http_start' callback has been replaced by 'http_headers'. This new one is called just before headers sending and parsing of the body. * 'http_end' callback has been replaced by 'channel_end_analyze'. * It is possible to set a forwarder for TCP channels. It was already possible to do it for HTTP ones. * Forwarders can partially consumed forwardable data. For this reason a new HTTP message state was added before HTTP_MSG_DONE : HTTP_MSG_ENDING. Now all filters can define corresponding callbacks (http_forward_data and tcp_forward_data). Each filter owns 2 offsets relative to buf->p, next and forward, to track, respectively, input data already parsed but not forwarded yet by the filter and parsed data considered as forwarded by the filter. A any time, we have the warranty that a filter cannot parse or forward more input than previous ones. And, of course, it cannot forward more input than it has parsed. 2 macros has been added to retrieve these offets: FLT_NXT and FLT_FWD. In addition, 2 functions has been added to change the 'next size' and the 'forward size' of a filter. When a filter parses input data, it can alter these data, so the size of these data can vary. This action has an effet on all previous filters that must be handled. To do so, the function 'filter_change_next_size' must be called, passing the size variation. In the same spirit, if a filter alter forwarded data, it must call the function 'filter_change_forward_size'. 'filter_change_next_size' can be called in 'http_data' and 'tcp_data' callbacks and only these ones. And 'filter_change_forward_size' can be called in 'http_forward_data' and 'tcp_forward_data' callbacks and only these ones. The data changes are the filter responsability, but with some limitation. It must not change already parsed/forwarded data or data that previous filters have not parsed/forwarded yet. Because filters can be used on backends, when we the backend is set for a stream, we add filters defined for this backend in the filter list of the stream. But we must only do that when the backend and the frontend of the stream are not the same. Else same filters are added a second time leading to undefined behavior. The HTTP compression code had to be moved. So it simplifies http_response_forward_body function. To do so, the way the data are forwarded has changed. Now, a filter (and only one) can forward data. In a commit to come, this limitation will be removed to let all filters take part to data forwarding. There are 2 new functions that filters should use to deal with this feature: * flt_set_http_data_forwarder: This function sets the filter (using its id) that will forward data for the specified HTTP message. It is possible if it was not already set by another filter _AND_ if no data was yet forwarded (msg->msg_state <= HTTP_MSG_BODY). It returns -1 if an error occurs. * flt_http_data_forwarder: This function returns the filter id that will forward data for the specified HTTP message. If there is no forwarder set, it returns -1. When an HTTP data forwarder is set for the response, the HTTP compression is disabled. Of course, this is not definitive.
2015-04-30 05:48:27 -04:00
struct proxy *px;
struct post_check_fct *pcf;
int ideal_maxconn;
global.mode = MODE_STARTING;
next_argv = copy_argv(argc, argv);
if (!init_trash_buffers(1)) {
ha_alert("failed to initialize trash buffers.\n");
exit(1);
}
/* NB: POSIX does not make it mandatory for gethostname() to NULL-terminate
* the string in case of truncation, and at least FreeBSD appears not to do
* it.
*/
memset(hostname, 0, sizeof(hostname));
gethostname(hostname, sizeof(hostname) - 1);
memset(localpeer, 0, sizeof(localpeer));
memcpy(localpeer, hostname, (sizeof(hostname) > sizeof(localpeer) ? sizeof(localpeer) : sizeof(hostname)) - 1);
setenv("HAPROXY_LOCALPEER", localpeer, 1);
/*
* Initialize the previously static variables.
*/
totalconn = actconn = listeners = stopping = 0;
killed = 0;
#ifdef HAPROXY_MEMMAX
global.rlimit_memmax_all = HAPROXY_MEMMAX;
#endif
tzset();
tv_update_date(-1,-1);
start_date = now;
srandom(now_ms - getpid());
if (init_acl() != 0)
exit(1);
/* Initialise lua. */
hlua_init();
/* Initialize process vars */
vars_init(&global.vars, SCOPE_PROC);
global.tune.options |= GTUNE_USE_SELECT; /* select() is always available */
#if defined(USE_POLL)
global.tune.options |= GTUNE_USE_POLL;
#endif
#if defined(USE_EPOLL)
global.tune.options |= GTUNE_USE_EPOLL;
#endif
#if defined(USE_KQUEUE)
global.tune.options |= GTUNE_USE_KQUEUE;
#endif
#if defined(USE_EVPORTS)
global.tune.options |= GTUNE_USE_EVPORTS;
#endif
#if defined(USE_LINUX_SPLICE)
global.tune.options |= GTUNE_USE_SPLICE;
#endif
#if defined(USE_GETADDRINFO)
global.tune.options |= GTUNE_USE_GAI;
#endif
#if defined(SO_REUSEPORT)
global.tune.options |= GTUNE_USE_REUSEPORT;
#endif
pid = getpid();
progname = *argv;
while ((tmp = strchr(progname, '/')) != NULL)
progname = tmp + 1;
/* the process name is used for the logs only */
chunk_initstr(&global.log_tag, strdup(progname));
argc--; argv++;
while (argc > 0) {
char *flag;
if (**argv == '-') {
flag = *argv+1;
/* 1 arg */
if (*flag == 'v') {
display_version();
if (flag[1] == 'v') /* -vv */
display_build_opts();
exit(0);
}
#if defined(USE_EPOLL)
else if (*flag == 'd' && flag[1] == 'e')
global.tune.options &= ~GTUNE_USE_EPOLL;
#endif
#if defined(USE_POLL)
else if (*flag == 'd' && flag[1] == 'p')
global.tune.options &= ~GTUNE_USE_POLL;
#endif
#if defined(USE_KQUEUE)
else if (*flag == 'd' && flag[1] == 'k')
global.tune.options &= ~GTUNE_USE_KQUEUE;
#endif
#if defined(USE_EVPORTS)
else if (*flag == 'd' && flag[1] == 'v')
global.tune.options &= ~GTUNE_USE_EVPORTS;
#endif
#if defined(USE_LINUX_SPLICE)
else if (*flag == 'd' && flag[1] == 'S')
global.tune.options &= ~GTUNE_USE_SPLICE;
#endif
#if defined(USE_GETADDRINFO)
else if (*flag == 'd' && flag[1] == 'G')
global.tune.options &= ~GTUNE_USE_GAI;
#endif
#if defined(SO_REUSEPORT)
else if (*flag == 'd' && flag[1] == 'R')
global.tune.options &= ~GTUNE_USE_REUSEPORT;
#endif
else if (*flag == 'd' && flag[1] == 'V')
global.ssl_server_verify = SSL_SERVER_VERIFY_NONE;
else if (*flag == 'V')
arg_mode |= MODE_VERBOSE;
else if (*flag == 'd' && flag[1] == 'b')
arg_mode |= MODE_FOREGROUND;
else if (*flag == 'd' && flag[1] == 'M')
mem_poison_byte = flag[2] ? strtol(flag + 2, NULL, 0) : 'P';
else if (*flag == 'd' && flag[1] == 'r')
global.tune.options |= GTUNE_RESOLVE_DONTFAIL;
else if (*flag == 'd')
arg_mode |= MODE_DEBUG;
else if (*flag == 'c')
arg_mode |= MODE_CHECK;
else if (*flag == 'D')
arg_mode |= MODE_DAEMON;
MEDIUM: mworker: Add systemd `Type=notify` support This patch adds support for `Type=notify` to the systemd unit. Supporting `Type=notify` improves both starting as well as reloading of the unit, because systemd will be let known when the action completed. See this quote from `systemd.service(5)`: > Note however that reloading a daemon by sending a signal (as with the > example line above) is usually not a good choice, because this is an > asynchronous operation and hence not suitable to order reloads of > multiple services against each other. It is strongly recommended to > set ExecReload= to a command that not only triggers a configuration > reload of the daemon, but also synchronously waits for it to complete. By making systemd aware of a reload in progress it is able to wait until the reload actually succeeded. This patch introduces both a new `USE_SYSTEMD` build option which controls including the sd-daemon library as well as a `-Ws` runtime option which runs haproxy in master-worker mode with systemd support. When haproxy is running in master-worker mode with systemd support it will send status messages to systemd using `sd_notify(3)` in the following cases: - The master process forked off the worker processes (READY=1) - The master process entered the `mworker_reload()` function (RELOADING=1) - The master process received the SIGUSR1 or SIGTERM signal (STOPPING=1) Change the unit file to specify `Type=notify` and replace master-worker mode (`-W`) with master-worker mode with systemd support (`-Ws`). Future evolutions of this feature could include making use of the `STATUS` feature of `sd_notify()` to send information about the number of active connections to systemd. This would require bidirectional communication between the master and the workers and thus is left for future work.
2017-11-20 09:58:35 -05:00
else if (*flag == 'W' && flag[1] == 's') {
arg_mode |= MODE_MWORKER | MODE_FOREGROUND;
MEDIUM: mworker: Add systemd `Type=notify` support This patch adds support for `Type=notify` to the systemd unit. Supporting `Type=notify` improves both starting as well as reloading of the unit, because systemd will be let known when the action completed. See this quote from `systemd.service(5)`: > Note however that reloading a daemon by sending a signal (as with the > example line above) is usually not a good choice, because this is an > asynchronous operation and hence not suitable to order reloads of > multiple services against each other. It is strongly recommended to > set ExecReload= to a command that not only triggers a configuration > reload of the daemon, but also synchronously waits for it to complete. By making systemd aware of a reload in progress it is able to wait until the reload actually succeeded. This patch introduces both a new `USE_SYSTEMD` build option which controls including the sd-daemon library as well as a `-Ws` runtime option which runs haproxy in master-worker mode with systemd support. When haproxy is running in master-worker mode with systemd support it will send status messages to systemd using `sd_notify(3)` in the following cases: - The master process forked off the worker processes (READY=1) - The master process entered the `mworker_reload()` function (RELOADING=1) - The master process received the SIGUSR1 or SIGTERM signal (STOPPING=1) Change the unit file to specify `Type=notify` and replace master-worker mode (`-W`) with master-worker mode with systemd support (`-Ws`). Future evolutions of this feature could include making use of the `STATUS` feature of `sd_notify()` to send information about the number of active connections to systemd. This would require bidirectional communication between the master and the workers and thus is left for future work.
2017-11-20 09:58:35 -05:00
#if defined(USE_SYSTEMD)
global.tune.options |= GTUNE_USE_SYSTEMD;
#else
ha_alert("master-worker mode with systemd support (-Ws) requested, but not compiled. Use master-worker mode (-W) if you are not using Type=notify in your unit file or recompile with USE_SYSTEMD=1.\n\n");
MEDIUM: mworker: Add systemd `Type=notify` support This patch adds support for `Type=notify` to the systemd unit. Supporting `Type=notify` improves both starting as well as reloading of the unit, because systemd will be let known when the action completed. See this quote from `systemd.service(5)`: > Note however that reloading a daemon by sending a signal (as with the > example line above) is usually not a good choice, because this is an > asynchronous operation and hence not suitable to order reloads of > multiple services against each other. It is strongly recommended to > set ExecReload= to a command that not only triggers a configuration > reload of the daemon, but also synchronously waits for it to complete. By making systemd aware of a reload in progress it is able to wait until the reload actually succeeded. This patch introduces both a new `USE_SYSTEMD` build option which controls including the sd-daemon library as well as a `-Ws` runtime option which runs haproxy in master-worker mode with systemd support. When haproxy is running in master-worker mode with systemd support it will send status messages to systemd using `sd_notify(3)` in the following cases: - The master process forked off the worker processes (READY=1) - The master process entered the `mworker_reload()` function (RELOADING=1) - The master process received the SIGUSR1 or SIGTERM signal (STOPPING=1) Change the unit file to specify `Type=notify` and replace master-worker mode (`-W`) with master-worker mode with systemd support (`-Ws`). Future evolutions of this feature could include making use of the `STATUS` feature of `sd_notify()` to send information about the number of active connections to systemd. This would require bidirectional communication between the master and the workers and thus is left for future work.
2017-11-20 09:58:35 -05:00
usage(progname);
#endif
}
else if (*flag == 'W')
arg_mode |= MODE_MWORKER;
else if (*flag == 'q')
arg_mode |= MODE_QUIET;
else if (*flag == 'x') {
if (argc <= 1 || argv[1][0] == '-') {
ha_alert("Unix socket path expected with the -x flag\n\n");
usage(progname);
}
if (old_unixsocket)
ha_warning("-x option already set, overwriting the value\n");
old_unixsocket = argv[1];
argv++;
argc--;
}
else if (*flag == 'S') {
struct wordlist *c;
if (argc <= 1 || argv[1][0] == '-') {
ha_alert("Socket and optional bind parameters expected with the -S flag\n");
usage(progname);
}
if ((c = malloc(sizeof(*c))) == NULL || (c->s = strdup(argv[1])) == NULL) {
ha_alert("Cannot allocate memory\n");
exit(EXIT_FAILURE);
}
LIST_ADD(&mworker_cli_conf, &c->list);
argv++;
argc--;
}
else if (*flag == 's' && (flag[1] == 'f' || flag[1] == 't')) {
/* list of pids to finish ('f') or terminate ('t') */
if (flag[1] == 'f')
oldpids_sig = SIGUSR1; /* finish then exit */
else
oldpids_sig = SIGTERM; /* terminate immediately */
while (argc > 1 && argv[1][0] != '-') {
char * endptr = NULL;
oldpids = realloc(oldpids, (nb_oldpids + 1) * sizeof(int));
if (!oldpids) {
ha_alert("Cannot allocate old pid : out of memory.\n");
exit(1);
}
argc--; argv++;
errno = 0;
oldpids[nb_oldpids] = strtol(*argv, &endptr, 10);
if (errno) {
ha_alert("-%2s option: failed to parse {%s}: %s\n",
flag,
*argv, strerror(errno));
exit(1);
} else if (endptr && strlen(endptr)) {
while (isspace(*endptr)) endptr++;
if (*endptr != 0) {
ha_alert("-%2s option: some bytes unconsumed in PID list {%s}\n",
flag, endptr);
exit(1);
}
}
if (oldpids[nb_oldpids] <= 0)
usage(progname);
nb_oldpids++;
}
}
else if (flag[0] == '-' && flag[1] == 0) { /* "--" */
/* now that's a cfgfile list */
argv++; argc--;
while (argc > 0) {
if (!list_append_word(&cfg_cfgfiles, *argv, &err_msg)) {
ha_alert("Cannot load configuration file/directory %s : %s\n",
*argv,
err_msg);
exit(1);
}
argv++; argc--;
}
break;
}
else { /* >=2 args */
argv++; argc--;
if (argc == 0)
usage(progname);
switch (*flag) {
case 'C' : change_dir = *argv; break;
case 'n' : cfg_maxconn = atol(*argv); break;
case 'm' : global.rlimit_memmax_all = atol(*argv); break;
case 'N' : cfg_maxpconn = atol(*argv); break;
case 'L' :
strncpy(localpeer, *argv, sizeof(localpeer) - 1);
setenv("HAPROXY_LOCALPEER", localpeer, 1);
break;
case 'f' :
if (!list_append_word(&cfg_cfgfiles, *argv, &err_msg)) {
ha_alert("Cannot load configuration file/directory %s : %s\n",
*argv,
err_msg);
exit(1);
}
break;
case 'p' : cfg_pidfile = *argv; break;
default: usage(progname);
}
}
}
else
usage(progname);
argv++; argc--;
}
global.mode |= (arg_mode & (MODE_DAEMON | MODE_MWORKER | MODE_FOREGROUND | MODE_VERBOSE
| MODE_QUIET | MODE_CHECK | MODE_DEBUG));
if (getenv("HAPROXY_MWORKER_WAIT_ONLY")) {
unsetenv("HAPROXY_MWORKER_WAIT_ONLY");
global.mode |= MODE_MWORKER_WAIT;
global.mode &= ~MODE_MWORKER;
}
if ((global.mode & MODE_MWORKER) && (getenv("HAPROXY_MWORKER_REEXEC") != NULL)) {
atexit_flag = 1;
atexit(reexec_on_failure);
}
if (change_dir && chdir(change_dir) < 0) {
ha_alert("Could not change to directory %s : %s\n", change_dir, strerror(errno));
exit(1);
}
global.maxsock = 10; /* reserve 10 fds ; will be incremented by socket eaters */
init_default_instance();
/* in wait mode, we don't try to read the configuration files */
if (!(global.mode & MODE_MWORKER_WAIT)) {
struct buffer *trash = get_trash_chunk();
/* handle cfgfiles that are actually directories */
cfgfiles_expand_directories();
if (LIST_ISEMPTY(&cfg_cfgfiles))
usage(progname);
list_for_each_entry(wl, &cfg_cfgfiles, list) {
int ret;
if (trash->data)
chunk_appendf(trash, ";");
chunk_appendf(trash, "%s", wl->s);
ret = readcfgfile(wl->s);
if (ret == -1) {
ha_alert("Could not open configuration file %s : %s\n",
wl->s, strerror(errno));
exit(1);
}
if (ret & (ERR_ABORT|ERR_FATAL))
ha_alert("Error(s) found in configuration file : %s\n", wl->s);
err_code |= ret;
if (err_code & ERR_ABORT)
exit(1);
}
/* do not try to resolve arguments nor to spot inconsistencies when
* the configuration contains fatal errors caused by files not found
* or failed memory allocations.
*/
if (err_code & (ERR_ABORT|ERR_FATAL)) {
ha_alert("Fatal errors found in configuration.\n");
exit(1);
}
if (trash->data)
setenv("HAPROXY_CFGFILES", trash->area, 1);
MEDIUM: config: don't check config validity when there are fatal errors Overall we do have an issue with the severity of a number of errors. Most fatal errors are reported with ERR_FATAL (which prevents startup) and not ERR_ABORT (which stops parsing ASAP), but check_config_validity() is still called on ERR_FATAL, and will most of the time report bogus errors. This is what caused smp_resolve_args() to be called on a number of unparsable ACLs, and it also is what reports incorrect ordering or unresolvable section names when certain entries could not be properly parsed. This patch stops this domino effect by simply aborting before trying to further check and resolve the configuration when it's already know that there are fatal errors. A concrete example comes from this config : userlist users : user foo insecure-password bar listen foo bind :1234 mode htttp timeout client 10S timeout server 10s timeout connect 10s stats uri /stats stats http-request auth unless { http_auth(users) } http-request redirect location /index.html if { path / } It contains a colon after the userlist name, a typo in the client timeout value, another one in "mode http" which cause some other configuration elements not to be properly handled. Previously it would confusingly report : [ALERT] 108/114851 (20224) : parsing [err-report.cfg:1] : 'userlist' cannot handle unexpected argument ':'. [ALERT] 108/114851 (20224) : parsing [err-report.cfg:6] : unknown proxy mode 'htttp'. [ALERT] 108/114851 (20224) : parsing [err-report.cfg:7] : unexpected character 'S' in 'timeout client' [ALERT] 108/114851 (20224) : Error(s) found in configuration file : err-report.cfg [ALERT] 108/114851 (20224) : parsing [err-report.cfg:11] : unable to find userlist 'users' referenced in arg 1 of ACL keyword 'http_auth' in proxy 'foo'. [WARNING] 108/114851 (20224) : config : missing timeouts for proxy 'foo'. | While not properly invalid, you will certainly encounter various problems | with such a configuration. To fix this, please ensure that all following | timeouts are set to a non-zero value: 'client', 'connect', 'server'. [WARNING] 108/114851 (20224) : config : 'stats' statement ignored for proxy 'foo' as it requires HTTP mode. [WARNING] 108/114851 (20224) : config : 'http-request' rules ignored for proxy 'foo' as they require HTTP mode. [ALERT] 108/114851 (20224) : Fatal errors found in configuration. The "requires HTTP mode" errors are just pollution resulting from the improper spelling of this mode earlier. The unresolved reference to the userlist is caused by the extra colon on the declaration, and the warning regarding the missing timeouts is caused by the wrong character. Now it more accurately reports : [ALERT] 108/114900 (20225) : parsing [err-report.cfg:1] : 'userlist' cannot handle unexpected argument ':'. [ALERT] 108/114900 (20225) : parsing [err-report.cfg:6] : unknown proxy mode 'htttp'. [ALERT] 108/114900 (20225) : parsing [err-report.cfg:7] : unexpected character 'S' in 'timeout client' [ALERT] 108/114900 (20225) : Error(s) found in configuration file : err-report.cfg [ALERT] 108/114900 (20225) : Fatal errors found in configuration. Despite not really a fix, this patch should be backported at least to 1.7, possibly even 1.6, and 1.5 since it hardens the config parser against certain bad situations like the recently reported use-after-free and the last null dereference.
2017-04-19 05:24:07 -04:00
}
if (global.mode & MODE_MWORKER) {
int proc;
struct mworker_proc *tmproc;
setenv("HAPROXY_MWORKER", "1", 1);
if (getenv("HAPROXY_MWORKER_REEXEC") == NULL) {
tmproc = calloc(1, sizeof(*tmproc));
if (!tmproc) {
ha_alert("Cannot allocate process structures.\n");
exit(EXIT_FAILURE);
}
tmproc->options |= PROC_O_TYPE_MASTER; /* master */
tmproc->reloads = 0;
tmproc->relative_pid = 0;
tmproc->pid = pid;
tmproc->timestamp = start_date.tv_sec;
tmproc->ipc_fd[0] = -1;
tmproc->ipc_fd[1] = -1;
proc_self = tmproc;
LIST_ADDQ(&proc_list, &tmproc->list);
}
for (proc = 0; proc < global.nbproc; proc++) {
tmproc = calloc(1, sizeof(*tmproc));
if (!tmproc) {
ha_alert("Cannot allocate process structures.\n");
exit(EXIT_FAILURE);
}
tmproc->options |= PROC_O_TYPE_WORKER; /* worker */
tmproc->pid = -1;
tmproc->reloads = 0;
tmproc->timestamp = -1;
tmproc->relative_pid = 1 + proc;
tmproc->ipc_fd[0] = -1;
tmproc->ipc_fd[1] = -1;
if (mworker_cli_sockpair_new(tmproc, proc) < 0) {
exit(EXIT_FAILURE);
}
LIST_ADDQ(&proc_list, &tmproc->list);
}
}
if (global.mode & (MODE_MWORKER|MODE_MWORKER_WAIT)) {
struct wordlist *it, *c;
mworker_env_to_proc_list(); /* get the info of the children in the env */
if (!LIST_ISEMPTY(&mworker_cli_conf)) {
if (mworker_cli_proxy_create() < 0) {
ha_alert("Can't create the master's CLI.\n");
exit(EXIT_FAILURE);
}
list_for_each_entry_safe(c, it, &mworker_cli_conf, list) {
if (mworker_cli_proxy_new_listener(c->s) < 0) {
ha_alert("Can't create the master's CLI.\n");
exit(EXIT_FAILURE);
}
LIST_DEL(&c->list);
free(c->s);
free(c);
}
}
}
err_code |= check_config_validity();
if (err_code & (ERR_ABORT|ERR_FATAL)) {
ha_alert("Fatal errors found in configuration.\n");
exit(1);
}
pattern_finalize_config();
/* recompute the amount of per-process memory depending on nbproc and
* the shared SSL cache size (allowed to exist in all processes).
*/
if (global.rlimit_memmax_all) {
#if defined (USE_OPENSSL) && !defined(USE_PRIVATE_CACHE)
int64_t ssl_cache_bytes = global.tune.sslcachesize * 200LL;
global.rlimit_memmax =
((((int64_t)global.rlimit_memmax_all * 1048576LL) -
ssl_cache_bytes) / global.nbproc +
ssl_cache_bytes + 1048575LL) / 1048576LL;
#else
global.rlimit_memmax = global.rlimit_memmax_all / global.nbproc;
#endif
}
#ifdef USE_NS
MAJOR: namespace: add Linux network namespace support This patch makes it possible to create binds and servers in separate namespaces. This can be used to proxy between multiple completely independent virtual networks (with possibly overlapping IP addresses) and a non-namespace-aware proxy implementation that supports the proxy protocol (v2). The setup is something like this: net1 on VLAN 1 (namespace 1) -\ net2 on VLAN 2 (namespace 2) -- haproxy ==== proxy (namespace 0) net3 on VLAN 3 (namespace 3) -/ The proxy is configured to make server connections through haproxy and sending the expected source/target addresses to haproxy using the proxy protocol. The network namespace setup on the haproxy node is something like this: = 8< = $ cat setup.sh ip netns add 1 ip link add link eth1 type vlan id 1 ip link set eth1.1 netns 1 ip netns exec 1 ip addr add 192.168.91.2/24 dev eth1.1 ip netns exec 1 ip link set eth1.$id up ... = 8< = = 8< = $ cat haproxy.cfg frontend clients bind 127.0.0.1:50022 namespace 1 transparent default_backend scb backend server mode tcp server server1 192.168.122.4:2222 namespace 2 send-proxy-v2 = 8< = A bind line creates the listener in the specified namespace, and connections originating from that listener also have their network namespace set to that of the listener. A server line either forces the connection to be made in a specified namespace or may use the namespace from the client-side connection if that was set. For more documentation please read the documentation included in the patch itself. Signed-off-by: KOVACS Tamas <ktamas@balabit.com> Signed-off-by: Sarkozi Laszlo <laszlo.sarkozi@balabit.com> Signed-off-by: KOVACS Krisztian <hidden@balabit.com>
2014-11-17 09:11:45 -05:00
err_code |= netns_init();
if (err_code & (ERR_ABORT|ERR_FATAL)) {
ha_alert("Failed to initialize namespace support.\n");
MAJOR: namespace: add Linux network namespace support This patch makes it possible to create binds and servers in separate namespaces. This can be used to proxy between multiple completely independent virtual networks (with possibly overlapping IP addresses) and a non-namespace-aware proxy implementation that supports the proxy protocol (v2). The setup is something like this: net1 on VLAN 1 (namespace 1) -\ net2 on VLAN 2 (namespace 2) -- haproxy ==== proxy (namespace 0) net3 on VLAN 3 (namespace 3) -/ The proxy is configured to make server connections through haproxy and sending the expected source/target addresses to haproxy using the proxy protocol. The network namespace setup on the haproxy node is something like this: = 8< = $ cat setup.sh ip netns add 1 ip link add link eth1 type vlan id 1 ip link set eth1.1 netns 1 ip netns exec 1 ip addr add 192.168.91.2/24 dev eth1.1 ip netns exec 1 ip link set eth1.$id up ... = 8< = = 8< = $ cat haproxy.cfg frontend clients bind 127.0.0.1:50022 namespace 1 transparent default_backend scb backend server mode tcp server server1 192.168.122.4:2222 namespace 2 send-proxy-v2 = 8< = A bind line creates the listener in the specified namespace, and connections originating from that listener also have their network namespace set to that of the listener. A server line either forces the connection to be made in a specified namespace or may use the namespace from the client-side connection if that was set. For more documentation please read the documentation included in the patch itself. Signed-off-by: KOVACS Tamas <ktamas@balabit.com> Signed-off-by: Sarkozi Laszlo <laszlo.sarkozi@balabit.com> Signed-off-by: KOVACS Krisztian <hidden@balabit.com>
2014-11-17 09:11:45 -05:00
exit(1);
}
#endif
/* Apply server states */
apply_server_state();
for (px = proxies_list; px; px = px->next)
srv_compute_all_admin_states(px);
/* Apply servers' configured address */
err_code |= srv_init_addr();
if (err_code & (ERR_ABORT|ERR_FATAL)) {
ha_alert("Failed to initialize server(s) addr.\n");
exit(1);
}
if (global.mode & MODE_CHECK) {
struct peers *pr;
struct proxy *px;
for (pr = cfg_peers; pr; pr = pr->next)
if (pr->peers_fe)
break;
for (px = proxies_list; px; px = px->next)
if (px->state == PR_STNEW && !LIST_ISEMPTY(&px->conf.listeners))
break;
if (pr || px) {
/* At least one peer or one listener has been found */
qfprintf(stdout, "Configuration file is valid\n");
exit(0);
}
qfprintf(stdout, "Configuration file has no error but will not start (no listener) => exit(2).\n");
exit(2);
}
global_listener_queue_task = task_new(MAX_THREADS_MASK);
if (!global_listener_queue_task) {
ha_alert("Out of memory when initializing global task\n");
exit(1);
}
/* very simple initialization, users will queue the task if needed */
global_listener_queue_task->context = NULL; /* not even a context! */
global_listener_queue_task->process = manage_global_listener_queue;
CLEANUP: channel: use "channel" instead of "buffer" in function names This is a massive rename of most functions which should make use of the word "channel" instead of the word "buffer" in their names. In concerns the following ones (new names) : unsigned long long channel_forward(struct channel *buf, unsigned long long bytes); static inline void channel_init(struct channel *buf) static inline int channel_input_closed(struct channel *buf) static inline int channel_output_closed(struct channel *buf) static inline void channel_check_timeouts(struct channel *b) static inline void channel_erase(struct channel *buf) static inline void channel_shutr_now(struct channel *buf) static inline void channel_shutw_now(struct channel *buf) static inline void channel_abort(struct channel *buf) static inline void channel_stop_hijacker(struct channel *buf) static inline void channel_auto_connect(struct channel *buf) static inline void channel_dont_connect(struct channel *buf) static inline void channel_auto_close(struct channel *buf) static inline void channel_dont_close(struct channel *buf) static inline void channel_auto_read(struct channel *buf) static inline void channel_dont_read(struct channel *buf) unsigned long long channel_forward(struct channel *buf, unsigned long long bytes) Some functions provided by channel.[ch] have kept their "buffer" name because they are really designed to act on the buffer according to some information gathered from the channel. They have been moved together to the same place in the file for better readability but they were not changed at all. The "buffer" memory pool was also renamed "channel".
2012-08-27 18:06:31 -04:00
/* now we know the buffer size, we can initialize the channels and buffers */
init_buffer();
list_for_each_entry(pcf, &post_check_list, list) {
err_code |= pcf->fct();
if (err_code & (ERR_ABORT|ERR_FATAL))
exit(1);
}
if (cfg_maxconn > 0)
global.maxconn = cfg_maxconn;
if (global.stats_fe)
global.maxsock += global.stats_fe->maxconn;
if (cfg_peers) {
/* peers also need to bypass global maxconn */
struct peers *p = cfg_peers;
for (p = cfg_peers; p; p = p->next)
if (p->peers_fe)
global.maxsock += p->peers_fe->maxconn;
}
if (cfg_pidfile) {
free(global.pidfile);
global.pidfile = strdup(cfg_pidfile);
}
/* Now we want to compute the maxconn and possibly maxsslconn values.
* It's a bit tricky. Maxconn defaults to the pre-computed value based
* on rlim_fd_cur and the number of FDs in use due to the configuration,
* and maxsslconn defaults to DEFAULT_MAXSSLCONN. On top of that we can
* enforce a lower limit based on memmax.
*
* If memmax is set, then it depends on which values are set. If
* maxsslconn is set, we use memmax to determine how many cleartext
* connections may be added, and set maxconn to the sum of the two.
* If maxconn is set and not maxsslconn, maxsslconn is computed from
* the remaining amount of memory between memmax and the cleartext
* connections. If neither are set, then it is considered that all
* connections are SSL-capable, and maxconn is computed based on this,
* then maxsslconn accordingly. We need to know if SSL is used on the
* frontends, backends, or both, because when it's used on both sides,
* we need twice the value for maxsslconn, but we only count the
* handshake once since it is not performed on the two sides at the
* same time (frontend-side is terminated before backend-side begins).
* The SSL stack is supposed to have filled ssl_session_cost and
* ssl_handshake_cost during its initialization. In any case, if
* SYSTEM_MAXCONN is set, we still enforce it as an upper limit for
* maxconn in order to protect the system.
*/
ideal_maxconn = compute_ideal_maxconn();
if (!global.rlimit_memmax) {
if (global.maxconn == 0) {
global.maxconn = ideal_maxconn;
if (global.mode & (MODE_VERBOSE|MODE_DEBUG))
fprintf(stderr, "Note: setting global.maxconn to %d.\n", global.maxconn);
}
}
#ifdef USE_OPENSSL
else if (!global.maxconn && !global.maxsslconn &&
(global.ssl_used_frontend || global.ssl_used_backend)) {
/* memmax is set, compute everything automatically. Here we want
* to ensure that all SSL connections will be served. We take
* care of the number of sides where SSL is used, and consider
* the worst case : SSL used on both sides and doing a handshake
* simultaneously. Note that we can't have more than maxconn
* handshakes at a time by definition, so for the worst case of
* two SSL conns per connection, we count a single handshake.
*/
int sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
int64_t mem = global.rlimit_memmax * 1048576ULL;
mem -= global.tune.sslcachesize * 200; // about 200 bytes per SSL cache entry
mem -= global.maxzlibmem;
mem = mem * MEM_USABLE_RATIO;
global.maxconn = mem /
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
((STREAM_MAX_COST + 2 * global.tune.bufsize) + // stream + 2 buffers per stream
sides * global.ssl_session_max_cost + // SSL buffers, one per side
global.ssl_handshake_max_cost); // 1 handshake per connection max
global.maxconn = MIN(global.maxconn, ideal_maxconn);
global.maxconn = round_2dig(global.maxconn);
#ifdef SYSTEM_MAXCONN
if (global.maxconn > SYSTEM_MAXCONN)
global.maxconn = SYSTEM_MAXCONN;
#endif /* SYSTEM_MAXCONN */
global.maxsslconn = sides * global.maxconn;
if (global.mode & (MODE_VERBOSE|MODE_DEBUG))
fprintf(stderr, "Note: setting global.maxconn to %d and global.maxsslconn to %d.\n",
global.maxconn, global.maxsslconn);
}
else if (!global.maxsslconn &&
(global.ssl_used_frontend || global.ssl_used_backend)) {
/* memmax and maxconn are known, compute maxsslconn automatically.
* maxsslconn being forced, we don't know how many of it will be
* on each side if both sides are being used. The worst case is
* when all connections use only one SSL instance because
* handshakes may be on two sides at the same time.
*/
int sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
int64_t mem = global.rlimit_memmax * 1048576ULL;
int64_t sslmem;
mem -= global.tune.sslcachesize * 200; // about 200 bytes per SSL cache entry
mem -= global.maxzlibmem;
mem = mem * MEM_USABLE_RATIO;
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
sslmem = mem - global.maxconn * (int64_t)(STREAM_MAX_COST + 2 * global.tune.bufsize);
global.maxsslconn = sslmem / (global.ssl_session_max_cost + global.ssl_handshake_max_cost);
global.maxsslconn = round_2dig(global.maxsslconn);
if (sslmem <= 0 || global.maxsslconn < sides) {
ha_alert("Cannot compute the automatic maxsslconn because global.maxconn is already too "
"high for the global.memmax value (%d MB). The absolute maximum possible value "
"without SSL is %d, but %d was found and SSL is in use.\n",
global.rlimit_memmax,
(int)(mem / (STREAM_MAX_COST + 2 * global.tune.bufsize)),
global.maxconn);
exit(1);
}
if (global.maxsslconn > sides * global.maxconn)
global.maxsslconn = sides * global.maxconn;
if (global.mode & (MODE_VERBOSE|MODE_DEBUG))
fprintf(stderr, "Note: setting global.maxsslconn to %d\n", global.maxsslconn);
}
#endif
else if (!global.maxconn) {
/* memmax and maxsslconn are known/unused, compute maxconn automatically */
int sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
int64_t mem = global.rlimit_memmax * 1048576ULL;
int64_t clearmem;
if (global.ssl_used_frontend || global.ssl_used_backend)
mem -= global.tune.sslcachesize * 200; // about 200 bytes per SSL cache entry
mem -= global.maxzlibmem;
mem = mem * MEM_USABLE_RATIO;
clearmem = mem;
if (sides)
clearmem -= (global.ssl_session_max_cost + global.ssl_handshake_max_cost) * (int64_t)global.maxsslconn;
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
global.maxconn = clearmem / (STREAM_MAX_COST + 2 * global.tune.bufsize);
global.maxconn = MIN(global.maxconn, ideal_maxconn);
global.maxconn = round_2dig(global.maxconn);
#ifdef SYSTEM_MAXCONN
if (global.maxconn > SYSTEM_MAXCONN)
global.maxconn = SYSTEM_MAXCONN;
#endif /* SYSTEM_MAXCONN */
if (clearmem <= 0 || !global.maxconn) {
ha_alert("Cannot compute the automatic maxconn because global.maxsslconn is already too "
"high for the global.memmax value (%d MB). The absolute maximum possible value "
"is %d, but %d was found.\n",
global.rlimit_memmax,
(int)(mem / (global.ssl_session_max_cost + global.ssl_handshake_max_cost)),
global.maxsslconn);
exit(1);
}
if (global.mode & (MODE_VERBOSE|MODE_DEBUG)) {
if (sides && global.maxsslconn > sides * global.maxconn) {
fprintf(stderr, "Note: global.maxsslconn is forced to %d which causes global.maxconn "
"to be limited to %d. Better reduce global.maxsslconn to get more "
"room for extra connections.\n", global.maxsslconn, global.maxconn);
}
fprintf(stderr, "Note: setting global.maxconn to %d\n", global.maxconn);
}
}
if (!global.maxpipes)
global.maxpipes = compute_ideal_maxpipes();
global.hardmaxconn = global.maxconn; /* keep this max value */
global.maxsock += global.maxconn * 2; /* each connection needs two sockets */
global.maxsock += global.maxpipes * 2; /* each pipe needs two FDs */
global.maxsock += global.nbthread; /* one epoll_fd/kqueue_fd per thread */
global.maxsock += 2 * global.nbthread; /* one wake-up pipe (2 fd) per thread */
/* compute fd used by async engines */
if (global.ssl_used_async_engines) {
int sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
global.maxsock += global.maxconn * sides * global.ssl_used_async_engines;
}
/* update connection pool thresholds */
global.tune.pool_low_count = ((long long)global.maxsock * global.tune.pool_low_ratio + 99) / 100;
global.tune.pool_high_count = ((long long)global.maxsock * global.tune.pool_high_ratio + 99) / 100;
proxy_adjust_all_maxconn();
if (global.tune.maxpollevents <= 0)
global.tune.maxpollevents = MAX_POLL_EVENTS;
if (global.tune.runqueue_depth <= 0)
global.tune.runqueue_depth = RUNQUEUE_DEPTH;
if (global.tune.recv_enough == 0)
global.tune.recv_enough = MIN_RECV_AT_ONCE_ENOUGH;
if (global.tune.maxrewrite < 0)
global.tune.maxrewrite = MAXREWRITE;
if (global.tune.maxrewrite >= global.tune.bufsize / 2)
global.tune.maxrewrite = global.tune.bufsize / 2;
if (arg_mode & (MODE_DEBUG | MODE_FOREGROUND)) {
/* command line debug mode inhibits configuration mode */
global.mode &= ~(MODE_DAEMON | MODE_QUIET);
global.mode |= (arg_mode & (MODE_DEBUG | MODE_FOREGROUND));
}
if (arg_mode & MODE_DAEMON) {
/* command line daemon mode inhibits foreground and debug modes mode */
global.mode &= ~(MODE_DEBUG | MODE_FOREGROUND);
global.mode |= arg_mode & MODE_DAEMON;
}
global.mode |= (arg_mode & (MODE_QUIET | MODE_VERBOSE));
if ((global.mode & MODE_DEBUG) && (global.mode & (MODE_DAEMON | MODE_QUIET))) {
ha_warning("<debug> mode incompatible with <quiet> and <daemon>. Keeping <debug> only.\n");
global.mode &= ~(MODE_DAEMON | MODE_QUIET);
}
if ((global.nbproc > 1) && !(global.mode & (MODE_DAEMON | MODE_MWORKER))) {
if (!(global.mode & (MODE_FOREGROUND | MODE_DEBUG)))
ha_warning("<nbproc> is only meaningful in daemon mode or master-worker mode. Setting limit to 1 process.\n");
global.nbproc = 1;
}
if (global.nbproc < 1)
global.nbproc = 1;
if (global.nbthread < 1)
global.nbthread = 1;
/* Realloc trash buffers because global.tune.bufsize may have changed */
if (!init_trash_buffers(0)) {
ha_alert("failed to initialize trash buffers.\n");
exit(1);
}
if (!init_log_buffers()) {
ha_alert("failed to initialize log buffers.\n");
exit(1);
}
/*
* Note: we could register external pollers here.
* Built-in pollers have been registered before main().
*/
if (!(global.tune.options & GTUNE_USE_KQUEUE))
disable_poller("kqueue");
if (!(global.tune.options & GTUNE_USE_EVPORTS))
disable_poller("evports");
if (!(global.tune.options & GTUNE_USE_EPOLL))
disable_poller("epoll");
if (!(global.tune.options & GTUNE_USE_POLL))
disable_poller("poll");
if (!(global.tune.options & GTUNE_USE_SELECT))
disable_poller("select");
/* Note: we could disable any poller by name here */
if (global.mode & (MODE_VERBOSE|MODE_DEBUG)) {
list_pollers(stderr);
fprintf(stderr, "\n");
list_filters(stderr);
}
if (!init_pollers()) {
ha_alert("No polling mechanism available.\n"
" It is likely that haproxy was built with TARGET=generic and that FD_SETSIZE\n"
" is too low on this platform to support maxconn and the number of listeners\n"
" and servers. You should rebuild haproxy specifying your system using TARGET=\n"
" in order to support other polling systems (poll, epoll, kqueue) or reduce the\n"
" global maxconn setting to accommodate the system's limitation. For reference,\n"
" FD_SETSIZE=%d on this system, global.maxconn=%d resulting in a maximum of\n"
" %d file descriptors. You should thus reduce global.maxconn by %d. Also,\n"
" check build settings using 'haproxy -vv'.\n\n",
FD_SETSIZE, global.maxconn, global.maxsock, (global.maxsock + 1 - FD_SETSIZE) / 2);
exit(1);
}
if (global.mode & (MODE_VERBOSE|MODE_DEBUG)) {
printf("Using %s() as the polling mechanism.\n", cur_poller.name);
}
if (!global.node)
global.node = strdup(hostname);
if (!hlua_post_init())
exit(1);
free(err_msg);
}
static void deinit_acl_cond(struct acl_cond *cond)
{
struct acl_term_suite *suite, *suiteb;
struct acl_term *term, *termb;
if (!cond)
return;
list_for_each_entry_safe(suite, suiteb, &cond->suites, list) {
list_for_each_entry_safe(term, termb, &suite->terms, list) {
LIST_DEL(&term->list);
free(term);
}
LIST_DEL(&suite->list);
free(suite);
}
free(cond);
}
static void deinit_tcp_rules(struct list *rules)
{
struct act_rule *trule, *truleb;
list_for_each_entry_safe(trule, truleb, rules, list) {
LIST_DEL(&trule->list);
deinit_acl_cond(trule->cond);
free(trule);
}
}
static void deinit_stick_rules(struct list *rules)
{
struct sticking_rule *rule, *ruleb;
list_for_each_entry_safe(rule, ruleb, rules, list) {
LIST_DEL(&rule->list);
deinit_acl_cond(rule->cond);
release_sample_expr(rule->expr);
free(rule);
}
}
void deinit(void)
{
struct proxy *p = proxies_list, *p0;
struct cap_hdr *h,*h_next;
struct server *s,*s_next;
struct listener *l,*l_next;
struct acl_cond *cond, *condb;
[MEDIUM] Fix memory freeing at exit New functions implemented: - deinit_pollers: called at the end of deinit()) - prune_acl: called via list_for_each_entry_safe Add missing pool_destroy2 calls: - p->hdr_idx_pool - pool2_tree64 Implement all task stopping: - health-check: needs new "struct task" in the struct server - queue processing: queue_mgt - appsess_refresh: appsession_refresh before (idle system): ==6079== LEAK SUMMARY: ==6079== definitely lost: 1,112 bytes in 75 blocks. ==6079== indirectly lost: 53,356 bytes in 2,090 blocks. ==6079== possibly lost: 52 bytes in 1 blocks. ==6079== still reachable: 150,996 bytes in 504 blocks. ==6079== suppressed: 0 bytes in 0 blocks. after (idle system): ==6945== LEAK SUMMARY: ==6945== definitely lost: 7,644 bytes in 137 blocks. ==6945== indirectly lost: 9,913 bytes in 587 blocks. ==6945== possibly lost: 0 bytes in 0 blocks. ==6945== still reachable: 0 bytes in 0 blocks. ==6945== suppressed: 0 bytes in 0 blocks. before (running system for ~2m): ==9343== LEAK SUMMARY: ==9343== definitely lost: 1,112 bytes in 75 blocks. ==9343== indirectly lost: 54,199 bytes in 2,122 blocks. ==9343== possibly lost: 52 bytes in 1 blocks. ==9343== still reachable: 151,128 bytes in 509 blocks. ==9343== suppressed: 0 bytes in 0 blocks. after (running system for ~2m): ==11616== LEAK SUMMARY: ==11616== definitely lost: 7,644 bytes in 137 blocks. ==11616== indirectly lost: 9,981 bytes in 591 blocks. ==11616== possibly lost: 0 bytes in 0 blocks. ==11616== still reachable: 4 bytes in 1 blocks. ==11616== suppressed: 0 bytes in 0 blocks. Still not perfect but significant improvement.
2008-05-29 17:53:44 -04:00
struct acl *acl, *aclb;
struct switching_rule *rule, *ruleb;
struct server_rule *srule, *sruleb;
struct redirect_rule *rdr, *rdrb;
struct wordlist *wl, *wlb;
struct uri_auth *uap, *ua = NULL;
struct logsrv *log, *logb;
struct logformat_node *lf, *lfb;
struct bind_conf *bind_conf, *bind_back;
struct build_opts_str *bol, *bolb;
struct post_deinit_fct *pdf;
int i;
deinit_signals();
while (p) {
free(p->conf.file);
free(p->id);
free(p->check_req);
free(p->cookie_name);
free(p->cookie_domain);
free(p->lbprm.arg_str);
free(p->capture_name);
free(p->monitor_uri);
free(p->rdp_cookie_name);
if (p->conf.logformat_string != default_http_log_format &&
p->conf.logformat_string != default_tcp_log_format &&
p->conf.logformat_string != clf_http_log_format)
free(p->conf.logformat_string);
free(p->conf.lfs_file);
free(p->conf.uniqueid_format_string);
free(p->conf.uif_file);
if ((p->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_MAP)
free(p->lbprm.map.srv);
if (p->conf.logformat_sd_string != default_rfc5424_sd_log_format)
free(p->conf.logformat_sd_string);
free(p->conf.lfsd_file);
for (i = 0; i < HTTP_ERR_SIZE; i++)
chunk_destroy(&p->errmsg[i]);
list_for_each_entry_safe(cond, condb, &p->mon_fail_cond, list) {
LIST_DEL(&cond->list);
prune_acl_cond(cond);
free(cond);
}
/* build a list of unique uri_auths */
if (!ua)
ua = p->uri_auth;
else {
/* check if p->uri_auth is unique */
for (uap = ua; uap; uap=uap->next)
if (uap == p->uri_auth)
break;
if (!uap && p->uri_auth) {
/* add it, if it is */
p->uri_auth->next = ua;
ua = p->uri_auth;
}
}
[MEDIUM] Fix memory freeing at exit New functions implemented: - deinit_pollers: called at the end of deinit()) - prune_acl: called via list_for_each_entry_safe Add missing pool_destroy2 calls: - p->hdr_idx_pool - pool2_tree64 Implement all task stopping: - health-check: needs new "struct task" in the struct server - queue processing: queue_mgt - appsess_refresh: appsession_refresh before (idle system): ==6079== LEAK SUMMARY: ==6079== definitely lost: 1,112 bytes in 75 blocks. ==6079== indirectly lost: 53,356 bytes in 2,090 blocks. ==6079== possibly lost: 52 bytes in 1 blocks. ==6079== still reachable: 150,996 bytes in 504 blocks. ==6079== suppressed: 0 bytes in 0 blocks. after (idle system): ==6945== LEAK SUMMARY: ==6945== definitely lost: 7,644 bytes in 137 blocks. ==6945== indirectly lost: 9,913 bytes in 587 blocks. ==6945== possibly lost: 0 bytes in 0 blocks. ==6945== still reachable: 0 bytes in 0 blocks. ==6945== suppressed: 0 bytes in 0 blocks. before (running system for ~2m): ==9343== LEAK SUMMARY: ==9343== definitely lost: 1,112 bytes in 75 blocks. ==9343== indirectly lost: 54,199 bytes in 2,122 blocks. ==9343== possibly lost: 52 bytes in 1 blocks. ==9343== still reachable: 151,128 bytes in 509 blocks. ==9343== suppressed: 0 bytes in 0 blocks. after (running system for ~2m): ==11616== LEAK SUMMARY: ==11616== definitely lost: 7,644 bytes in 137 blocks. ==11616== indirectly lost: 9,981 bytes in 591 blocks. ==11616== possibly lost: 0 bytes in 0 blocks. ==11616== still reachable: 4 bytes in 1 blocks. ==11616== suppressed: 0 bytes in 0 blocks. Still not perfect but significant improvement.
2008-05-29 17:53:44 -04:00
list_for_each_entry_safe(acl, aclb, &p->acl, list) {
LIST_DEL(&acl->list);
prune_acl(acl);
free(acl);
}
list_for_each_entry_safe(srule, sruleb, &p->server_rules, list) {
LIST_DEL(&srule->list);
prune_acl_cond(srule->cond);
free(srule->cond);
free(srule);
}
list_for_each_entry_safe(rule, ruleb, &p->switching_rules, list) {
LIST_DEL(&rule->list);
if (rule->cond) {
prune_acl_cond(rule->cond);
free(rule->cond);
}
free(rule->file);
free(rule);
}
list_for_each_entry_safe(rdr, rdrb, &p->redirect_rules, list) {
LIST_DEL(&rdr->list);
if (rdr->cond) {
prune_acl_cond(rdr->cond);
free(rdr->cond);
}
free(rdr->rdr_str);
list_for_each_entry_safe(lf, lfb, &rdr->rdr_fmt, list) {
LIST_DEL(&lf->list);
free(lf);
}
free(rdr);
}
list_for_each_entry_safe(log, logb, &p->logsrvs, list) {
LIST_DEL(&log->list);
free(log);
}
list_for_each_entry_safe(lf, lfb, &p->logformat, list) {
LIST_DEL(&lf->list);
release_sample_expr(lf->expr);
free(lf->arg);
free(lf);
}
list_for_each_entry_safe(lf, lfb, &p->logformat_sd, list) {
LIST_DEL(&lf->list);
release_sample_expr(lf->expr);
free(lf->arg);
free(lf);
}
deinit_tcp_rules(&p->tcp_req.inspect_rules);
deinit_tcp_rules(&p->tcp_rep.inspect_rules);
deinit_tcp_rules(&p->tcp_req.l4_rules);
deinit_stick_rules(&p->storersp_rules);
deinit_stick_rules(&p->sticking_rules);
h = p->req_cap;
while (h) {
h_next = h->next;
free(h->name);
pool_destroy(h->pool);
free(h);
h = h_next;
}/* end while(h) */
h = p->rsp_cap;
while (h) {
h_next = h->next;
free(h->name);
pool_destroy(h->pool);
free(h);
h = h_next;
}/* end while(h) */
[MEDIUM] Fix memory freeing at exit New functions implemented: - deinit_pollers: called at the end of deinit()) - prune_acl: called via list_for_each_entry_safe Add missing pool_destroy2 calls: - p->hdr_idx_pool - pool2_tree64 Implement all task stopping: - health-check: needs new "struct task" in the struct server - queue processing: queue_mgt - appsess_refresh: appsession_refresh before (idle system): ==6079== LEAK SUMMARY: ==6079== definitely lost: 1,112 bytes in 75 blocks. ==6079== indirectly lost: 53,356 bytes in 2,090 blocks. ==6079== possibly lost: 52 bytes in 1 blocks. ==6079== still reachable: 150,996 bytes in 504 blocks. ==6079== suppressed: 0 bytes in 0 blocks. after (idle system): ==6945== LEAK SUMMARY: ==6945== definitely lost: 7,644 bytes in 137 blocks. ==6945== indirectly lost: 9,913 bytes in 587 blocks. ==6945== possibly lost: 0 bytes in 0 blocks. ==6945== still reachable: 0 bytes in 0 blocks. ==6945== suppressed: 0 bytes in 0 blocks. before (running system for ~2m): ==9343== LEAK SUMMARY: ==9343== definitely lost: 1,112 bytes in 75 blocks. ==9343== indirectly lost: 54,199 bytes in 2,122 blocks. ==9343== possibly lost: 52 bytes in 1 blocks. ==9343== still reachable: 151,128 bytes in 509 blocks. ==9343== suppressed: 0 bytes in 0 blocks. after (running system for ~2m): ==11616== LEAK SUMMARY: ==11616== definitely lost: 7,644 bytes in 137 blocks. ==11616== indirectly lost: 9,981 bytes in 591 blocks. ==11616== possibly lost: 0 bytes in 0 blocks. ==11616== still reachable: 4 bytes in 1 blocks. ==11616== suppressed: 0 bytes in 0 blocks. Still not perfect but significant improvement.
2008-05-29 17:53:44 -04:00
s = p->srv;
while (s) {
s_next = s->next;
[MEDIUM] Fix memory freeing at exit New functions implemented: - deinit_pollers: called at the end of deinit()) - prune_acl: called via list_for_each_entry_safe Add missing pool_destroy2 calls: - p->hdr_idx_pool - pool2_tree64 Implement all task stopping: - health-check: needs new "struct task" in the struct server - queue processing: queue_mgt - appsess_refresh: appsession_refresh before (idle system): ==6079== LEAK SUMMARY: ==6079== definitely lost: 1,112 bytes in 75 blocks. ==6079== indirectly lost: 53,356 bytes in 2,090 blocks. ==6079== possibly lost: 52 bytes in 1 blocks. ==6079== still reachable: 150,996 bytes in 504 blocks. ==6079== suppressed: 0 bytes in 0 blocks. after (idle system): ==6945== LEAK SUMMARY: ==6945== definitely lost: 7,644 bytes in 137 blocks. ==6945== indirectly lost: 9,913 bytes in 587 blocks. ==6945== possibly lost: 0 bytes in 0 blocks. ==6945== still reachable: 0 bytes in 0 blocks. ==6945== suppressed: 0 bytes in 0 blocks. before (running system for ~2m): ==9343== LEAK SUMMARY: ==9343== definitely lost: 1,112 bytes in 75 blocks. ==9343== indirectly lost: 54,199 bytes in 2,122 blocks. ==9343== possibly lost: 52 bytes in 1 blocks. ==9343== still reachable: 151,128 bytes in 509 blocks. ==9343== suppressed: 0 bytes in 0 blocks. after (running system for ~2m): ==11616== LEAK SUMMARY: ==11616== definitely lost: 7,644 bytes in 137 blocks. ==11616== indirectly lost: 9,981 bytes in 591 blocks. ==11616== possibly lost: 0 bytes in 0 blocks. ==11616== still reachable: 4 bytes in 1 blocks. ==11616== suppressed: 0 bytes in 0 blocks. Still not perfect but significant improvement.
2008-05-29 17:53:44 -04:00
task_destroy(s->check.task);
task_destroy(s->agent.task);
[MEDIUM] Fix memory freeing at exit New functions implemented: - deinit_pollers: called at the end of deinit()) - prune_acl: called via list_for_each_entry_safe Add missing pool_destroy2 calls: - p->hdr_idx_pool - pool2_tree64 Implement all task stopping: - health-check: needs new "struct task" in the struct server - queue processing: queue_mgt - appsess_refresh: appsession_refresh before (idle system): ==6079== LEAK SUMMARY: ==6079== definitely lost: 1,112 bytes in 75 blocks. ==6079== indirectly lost: 53,356 bytes in 2,090 blocks. ==6079== possibly lost: 52 bytes in 1 blocks. ==6079== still reachable: 150,996 bytes in 504 blocks. ==6079== suppressed: 0 bytes in 0 blocks. after (idle system): ==6945== LEAK SUMMARY: ==6945== definitely lost: 7,644 bytes in 137 blocks. ==6945== indirectly lost: 9,913 bytes in 587 blocks. ==6945== possibly lost: 0 bytes in 0 blocks. ==6945== still reachable: 0 bytes in 0 blocks. ==6945== suppressed: 0 bytes in 0 blocks. before (running system for ~2m): ==9343== LEAK SUMMARY: ==9343== definitely lost: 1,112 bytes in 75 blocks. ==9343== indirectly lost: 54,199 bytes in 2,122 blocks. ==9343== possibly lost: 52 bytes in 1 blocks. ==9343== still reachable: 151,128 bytes in 509 blocks. ==9343== suppressed: 0 bytes in 0 blocks. after (running system for ~2m): ==11616== LEAK SUMMARY: ==11616== definitely lost: 7,644 bytes in 137 blocks. ==11616== indirectly lost: 9,981 bytes in 591 blocks. ==11616== possibly lost: 0 bytes in 0 blocks. ==11616== still reachable: 4 bytes in 1 blocks. ==11616== suppressed: 0 bytes in 0 blocks. Still not perfect but significant improvement.
2008-05-29 17:53:44 -04:00
if (s->check.wait_list.tasklet)
tasklet_free(s->check.wait_list.tasklet);
if (s->agent.wait_list.tasklet)
tasklet_free(s->agent.wait_list.tasklet);
task_destroy(s->warmup);
free(s->id);
free(s->cookie);
free(s->check.bi.area);
free(s->check.bo.area);
free(s->agent.bi.area);
free(s->agent.bo.area);
free(s->agent.send_string);
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
free(s->hostname_dn);
free((char*)s->conf.file);
free(s->idle_conns);
free(s->priv_conns);
free(s->safe_conns);
free(s->idle_orphan_conns);
free(s->curr_idle_thr);
if (s->use_ssl || s->check.use_ssl) {
if (xprt_get(XPRT_SSL) && xprt_get(XPRT_SSL)->destroy_srv)
xprt_get(XPRT_SSL)->destroy_srv(s);
}
HA_SPIN_DESTROY(&s->lock);
free(s);
s = s_next;
}/* end while(s) */
[MEDIUM] Fix memory freeing at exit New functions implemented: - deinit_pollers: called at the end of deinit()) - prune_acl: called via list_for_each_entry_safe Add missing pool_destroy2 calls: - p->hdr_idx_pool - pool2_tree64 Implement all task stopping: - health-check: needs new "struct task" in the struct server - queue processing: queue_mgt - appsess_refresh: appsession_refresh before (idle system): ==6079== LEAK SUMMARY: ==6079== definitely lost: 1,112 bytes in 75 blocks. ==6079== indirectly lost: 53,356 bytes in 2,090 blocks. ==6079== possibly lost: 52 bytes in 1 blocks. ==6079== still reachable: 150,996 bytes in 504 blocks. ==6079== suppressed: 0 bytes in 0 blocks. after (idle system): ==6945== LEAK SUMMARY: ==6945== definitely lost: 7,644 bytes in 137 blocks. ==6945== indirectly lost: 9,913 bytes in 587 blocks. ==6945== possibly lost: 0 bytes in 0 blocks. ==6945== still reachable: 0 bytes in 0 blocks. ==6945== suppressed: 0 bytes in 0 blocks. before (running system for ~2m): ==9343== LEAK SUMMARY: ==9343== definitely lost: 1,112 bytes in 75 blocks. ==9343== indirectly lost: 54,199 bytes in 2,122 blocks. ==9343== possibly lost: 52 bytes in 1 blocks. ==9343== still reachable: 151,128 bytes in 509 blocks. ==9343== suppressed: 0 bytes in 0 blocks. after (running system for ~2m): ==11616== LEAK SUMMARY: ==11616== definitely lost: 7,644 bytes in 137 blocks. ==11616== indirectly lost: 9,981 bytes in 591 blocks. ==11616== possibly lost: 0 bytes in 0 blocks. ==11616== still reachable: 4 bytes in 1 blocks. ==11616== suppressed: 0 bytes in 0 blocks. Still not perfect but significant improvement.
2008-05-29 17:53:44 -04:00
list_for_each_entry_safe(l, l_next, &p->conf.listeners, by_fe) {
/*
* Zombie proxy, the listener just pretend to be up
* because they still hold an opened fd.
* Close it and give the listener its real state.
*/
if (p->state == PR_STSTOPPED && l->state >= LI_ZOMBIE) {
close(l->fd);
l->state = LI_INIT;
}
unbind_listener(l);
delete_listener(l);
LIST_DEL(&l->by_fe);
LIST_DEL(&l->by_bind);
free(l->name);
free(l->counters);
free(l);
}
[MEDIUM] Fix memory freeing at exit New functions implemented: - deinit_pollers: called at the end of deinit()) - prune_acl: called via list_for_each_entry_safe Add missing pool_destroy2 calls: - p->hdr_idx_pool - pool2_tree64 Implement all task stopping: - health-check: needs new "struct task" in the struct server - queue processing: queue_mgt - appsess_refresh: appsession_refresh before (idle system): ==6079== LEAK SUMMARY: ==6079== definitely lost: 1,112 bytes in 75 blocks. ==6079== indirectly lost: 53,356 bytes in 2,090 blocks. ==6079== possibly lost: 52 bytes in 1 blocks. ==6079== still reachable: 150,996 bytes in 504 blocks. ==6079== suppressed: 0 bytes in 0 blocks. after (idle system): ==6945== LEAK SUMMARY: ==6945== definitely lost: 7,644 bytes in 137 blocks. ==6945== indirectly lost: 9,913 bytes in 587 blocks. ==6945== possibly lost: 0 bytes in 0 blocks. ==6945== still reachable: 0 bytes in 0 blocks. ==6945== suppressed: 0 bytes in 0 blocks. before (running system for ~2m): ==9343== LEAK SUMMARY: ==9343== definitely lost: 1,112 bytes in 75 blocks. ==9343== indirectly lost: 54,199 bytes in 2,122 blocks. ==9343== possibly lost: 52 bytes in 1 blocks. ==9343== still reachable: 151,128 bytes in 509 blocks. ==9343== suppressed: 0 bytes in 0 blocks. after (running system for ~2m): ==11616== LEAK SUMMARY: ==11616== definitely lost: 7,644 bytes in 137 blocks. ==11616== indirectly lost: 9,981 bytes in 591 blocks. ==11616== possibly lost: 0 bytes in 0 blocks. ==11616== still reachable: 4 bytes in 1 blocks. ==11616== suppressed: 0 bytes in 0 blocks. Still not perfect but significant improvement.
2008-05-29 17:53:44 -04:00
/* Release unused SSL configs. */
list_for_each_entry_safe(bind_conf, bind_back, &p->conf.bind, by_fe) {
if (bind_conf->xprt->destroy_bind_conf)
bind_conf->xprt->destroy_bind_conf(bind_conf);
free(bind_conf->file);
free(bind_conf->arg);
LIST_DEL(&bind_conf->by_fe);
free(bind_conf);
}
MAJOR: filters: Add filters support This patch adds the support of filters in HAProxy. The main idea is to have a way to "easely" extend HAProxy by adding some "modules", called filters, that will be able to change HAProxy behavior in a programmatic way. To do so, many entry points has been added in code to let filters to hook up to different steps of the processing. A filter must define a flt_ops sutrctures (see include/types/filters.h for details). This structure contains all available callbacks that a filter can define: struct flt_ops { /* * Callbacks to manage the filter lifecycle */ int (*init) (struct proxy *p); void (*deinit)(struct proxy *p); int (*check) (struct proxy *p); /* * Stream callbacks */ void (*stream_start) (struct stream *s); void (*stream_accept) (struct stream *s); void (*session_establish)(struct stream *s); void (*stream_stop) (struct stream *s); /* * HTTP callbacks */ int (*http_start) (struct stream *s, struct http_msg *msg); int (*http_start_body) (struct stream *s, struct http_msg *msg); int (*http_start_chunk) (struct stream *s, struct http_msg *msg); int (*http_data) (struct stream *s, struct http_msg *msg); int (*http_last_chunk) (struct stream *s, struct http_msg *msg); int (*http_end_chunk) (struct stream *s, struct http_msg *msg); int (*http_chunk_trailers)(struct stream *s, struct http_msg *msg); int (*http_end_body) (struct stream *s, struct http_msg *msg); void (*http_end) (struct stream *s, struct http_msg *msg); void (*http_reset) (struct stream *s, struct http_msg *msg); int (*http_pre_process) (struct stream *s, struct http_msg *msg); int (*http_post_process) (struct stream *s, struct http_msg *msg); void (*http_reply) (struct stream *s, short status, const struct chunk *msg); }; To declare and use a filter, in the configuration, the "filter" keyword must be used in a listener/frontend section: frontend test ... filter <FILTER-NAME> [OPTIONS...] The filter referenced by the <FILTER-NAME> must declare a configuration parser on its own name to fill flt_ops and filter_conf field in the proxy's structure. An exemple will be provided later to make it perfectly clear. For now, filters cannot be used in backend section. But this is only a matter of time. Documentation will also be added later. This is the first commit of a long list about filters. It is possible to have several filters on the same listener/frontend. These filters are stored in an array of at most MAX_FILTERS elements (define in include/types/filters.h). Again, this will be replaced later by a list of filters. The filter API has been highly refactored. Main changes are: * Now, HA supports an infinite number of filters per proxy. To do so, filters are stored in list. * Because filters are stored in list, filters state has been moved from the channel structure to the filter structure. This is cleaner because there is no more info about filters in channel structure. * It is possible to defined filters on backends only. For such filters, stream_start/stream_stop callbacks are not called. Of course, it is possible to mix frontend and backend filters. * Now, TCP streams are also filtered. All callbacks without the 'http_' prefix are called for all kind of streams. In addition, 2 new callbacks were added to filter data exchanged through a TCP stream: - tcp_data: it is called when new data are available or when old unprocessed data are still waiting. - tcp_forward_data: it is called when some data can be consumed. * New callbacks attached to channel were added: - channel_start_analyze: it is called when a filter is ready to process data exchanged through a channel. 2 new analyzers (a frontend and a backend) are attached to channels to call this callback. For a frontend filter, it is called before any other analyzer. For a backend filter, it is called when a backend is attached to a stream. So some processing cannot be filtered in that case. - channel_analyze: it is called before each analyzer attached to a channel, expects analyzers responsible for data sending. - channel_end_analyze: it is called when all other analyzers have finished their processing. A new analyzers is attached to channels to call this callback. For a TCP stream, this is always the last one called. For a HTTP one, the callback is called when a request/response ends, so it is called one time for each request/response. * 'session_established' callback has been removed. Everything that is done in this callback can be handled by 'channel_start_analyze' on the response channel. * 'http_pre_process' and 'http_post_process' callbacks have been replaced by 'channel_analyze'. * 'http_start' callback has been replaced by 'http_headers'. This new one is called just before headers sending and parsing of the body. * 'http_end' callback has been replaced by 'channel_end_analyze'. * It is possible to set a forwarder for TCP channels. It was already possible to do it for HTTP ones. * Forwarders can partially consumed forwardable data. For this reason a new HTTP message state was added before HTTP_MSG_DONE : HTTP_MSG_ENDING. Now all filters can define corresponding callbacks (http_forward_data and tcp_forward_data). Each filter owns 2 offsets relative to buf->p, next and forward, to track, respectively, input data already parsed but not forwarded yet by the filter and parsed data considered as forwarded by the filter. A any time, we have the warranty that a filter cannot parse or forward more input than previous ones. And, of course, it cannot forward more input than it has parsed. 2 macros has been added to retrieve these offets: FLT_NXT and FLT_FWD. In addition, 2 functions has been added to change the 'next size' and the 'forward size' of a filter. When a filter parses input data, it can alter these data, so the size of these data can vary. This action has an effet on all previous filters that must be handled. To do so, the function 'filter_change_next_size' must be called, passing the size variation. In the same spirit, if a filter alter forwarded data, it must call the function 'filter_change_forward_size'. 'filter_change_next_size' can be called in 'http_data' and 'tcp_data' callbacks and only these ones. And 'filter_change_forward_size' can be called in 'http_forward_data' and 'tcp_forward_data' callbacks and only these ones. The data changes are the filter responsability, but with some limitation. It must not change already parsed/forwarded data or data that previous filters have not parsed/forwarded yet. Because filters can be used on backends, when we the backend is set for a stream, we add filters defined for this backend in the filter list of the stream. But we must only do that when the backend and the frontend of the stream are not the same. Else same filters are added a second time leading to undefined behavior. The HTTP compression code had to be moved. So it simplifies http_response_forward_body function. To do so, the way the data are forwarded has changed. Now, a filter (and only one) can forward data. In a commit to come, this limitation will be removed to let all filters take part to data forwarding. There are 2 new functions that filters should use to deal with this feature: * flt_set_http_data_forwarder: This function sets the filter (using its id) that will forward data for the specified HTTP message. It is possible if it was not already set by another filter _AND_ if no data was yet forwarded (msg->msg_state <= HTTP_MSG_BODY). It returns -1 if an error occurs. * flt_http_data_forwarder: This function returns the filter id that will forward data for the specified HTTP message. If there is no forwarder set, it returns -1. When an HTTP data forwarder is set for the response, the HTTP compression is disabled. Of course, this is not definitive.
2015-04-30 05:48:27 -04:00
flt_deinit(p);
free(p->desc);
free(p->fwdfor_hdr_name);
free_http_req_rules(&p->http_req_rules);
free_http_res_rules(&p->http_res_rules);
task_destroy(p->task);
pool_destroy(p->req_cap_pool);
pool_destroy(p->rsp_cap_pool);
if (p->table)
pool_destroy(p->table->pool);
p0 = p;
p = p->next;
HA_SPIN_DESTROY(&p0->lbprm.lock);
HA_SPIN_DESTROY(&p0->lock);
free(p0);
}/* end while(p) */
while (ua) {
uap = ua;
ua = ua->next;
free(uap->uri_prefix);
free(uap->auth_realm);
free(uap->node);
free(uap->desc);
userlist_free(uap->userlist);
free_http_req_rules(&uap->http_req_rules);
free(uap);
}
userlist_free(userlist);
cfg_unregister_sections();
deinit_log_buffers();
protocol_unbind_all();
list_for_each_entry(pdf, &post_deinit_list, list)
pdf->fct();
free(global.log_send_hostname); global.log_send_hostname = NULL;
chunk_destroy(&global.log_tag);
free(global.chroot); global.chroot = NULL;
free(global.pidfile); global.pidfile = NULL;
free(global.node); global.node = NULL;
free(global.desc); global.desc = NULL;
free(oldpids); oldpids = NULL;
task_destroy(global_listener_queue_task); global_listener_queue_task = NULL;
task_destroy(idle_conn_task);
idle_conn_task = NULL;
list_for_each_entry_safe(log, logb, &global.logsrvs, list) {
LIST_DEL(&log->list);
free(log);
}
list_for_each_entry_safe(wl, wlb, &cfg_cfgfiles, list) {
free(wl->s);
LIST_DEL(&wl->list);
free(wl);
}
list_for_each_entry_safe(bol, bolb, &build_opts_list, list) {
if (bol->must_free)
free((void *)bol->str);
LIST_DEL(&bol->list);
free(bol);
}
vars_prune(&global.vars, NULL, NULL);
pool_destroy_all();
[MEDIUM] Fix memory freeing at exit New functions implemented: - deinit_pollers: called at the end of deinit()) - prune_acl: called via list_for_each_entry_safe Add missing pool_destroy2 calls: - p->hdr_idx_pool - pool2_tree64 Implement all task stopping: - health-check: needs new "struct task" in the struct server - queue processing: queue_mgt - appsess_refresh: appsession_refresh before (idle system): ==6079== LEAK SUMMARY: ==6079== definitely lost: 1,112 bytes in 75 blocks. ==6079== indirectly lost: 53,356 bytes in 2,090 blocks. ==6079== possibly lost: 52 bytes in 1 blocks. ==6079== still reachable: 150,996 bytes in 504 blocks. ==6079== suppressed: 0 bytes in 0 blocks. after (idle system): ==6945== LEAK SUMMARY: ==6945== definitely lost: 7,644 bytes in 137 blocks. ==6945== indirectly lost: 9,913 bytes in 587 blocks. ==6945== possibly lost: 0 bytes in 0 blocks. ==6945== still reachable: 0 bytes in 0 blocks. ==6945== suppressed: 0 bytes in 0 blocks. before (running system for ~2m): ==9343== LEAK SUMMARY: ==9343== definitely lost: 1,112 bytes in 75 blocks. ==9343== indirectly lost: 54,199 bytes in 2,122 blocks. ==9343== possibly lost: 52 bytes in 1 blocks. ==9343== still reachable: 151,128 bytes in 509 blocks. ==9343== suppressed: 0 bytes in 0 blocks. after (running system for ~2m): ==11616== LEAK SUMMARY: ==11616== definitely lost: 7,644 bytes in 137 blocks. ==11616== indirectly lost: 9,981 bytes in 591 blocks. ==11616== possibly lost: 0 bytes in 0 blocks. ==11616== still reachable: 4 bytes in 1 blocks. ==11616== suppressed: 0 bytes in 0 blocks. Still not perfect but significant improvement.
2008-05-29 17:53:44 -04:00
deinit_pollers();
} /* end deinit() */
/* Runs the polling loop */
static void run_poll_loop()
{
int next, wake;
tv_update_date(0,1);
while (1) {
/* Process a few tasks */
process_runnable_tasks();
/* check if we caught some signals and process them in the
first thread */
if (tid == 0)
signal_process_queue();
/* Check if we can expire some tasks */
next = wake_expired_tasks();
/* stop when there's nothing left to do */
if ((jobs - unstoppable_jobs) == 0)
break;
/* also stop if we failed to cleanly stop all tasks */
if (killed > 1)
break;
/* expire immediately if events are pending */
wake = 1;
if (thread_has_tasks())
activity[tid].wake_tasks++;
else if (signal_queue_len && tid == 0)
activity[tid].wake_signal++;
else {
_HA_ATOMIC_OR(&sleeping_thread_mask, tid_bit);
__ha_barrier_atomic_store();
if (global_tasks_mask & tid_bit) {
activity[tid].wake_tasks++;
_HA_ATOMIC_AND(&sleeping_thread_mask, ~tid_bit);
} else
wake = 0;
}
/* The poller will ensure it returns around <next> */
cur_poller.poll(&cur_poller, next, wake);
activity[tid].loops++;
}
}
static void *run_thread_poll_loop(void *data)
{
struct per_thread_alloc_fct *ptaf;
struct per_thread_init_fct *ptif;
struct per_thread_deinit_fct *ptdf;
struct per_thread_free_fct *ptff;
static int init_left = 0;
__decl_hathreads(static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER);
__decl_hathreads(static pthread_cond_t init_cond = PTHREAD_COND_INITIALIZER);
ha_set_tid((unsigned long)data);
#if (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
#ifdef USE_THREAD
pthread_getcpuclockid(pthread_self(), &ti->clock_id);
#else
ti->clock_id = CLOCK_THREAD_CPUTIME_ID;
#endif
#endif
/* Now, initialize one thread init at a time. This is better since
* some init code is a bit tricky and may release global resources
* after reallocating them locally. This will also ensure there is
* no race on file descriptors allocation.
*/
#ifdef USE_THREAD
pthread_mutex_lock(&init_mutex);
#endif
/* The first thread must set the number of threads left */
if (!init_left)
init_left = global.nbthread;
init_left--;
tv_update_date(-1,-1);
/* per-thread alloc calls performed here are not allowed to snoop on
* other threads, so they are free to initialize at their own rhythm
* as long as they act as if they were alone. None of them may rely
* on resources initialized by the other ones.
*/
list_for_each_entry(ptaf, &per_thread_alloc_list, list) {
if (!ptaf->fct()) {
ha_alert("failed to allocate resources for thread %u.\n", tid);
exit(1);
}
}
/* per-thread init calls performed here are not allowed to snoop on
* other threads, so they are free to initialize at their own rhythm
* as long as they act as if they were alone.
*/
list_for_each_entry(ptif, &per_thread_init_list, list) {
if (!ptif->fct()) {
ha_alert("failed to initialize thread %u.\n", tid);
exit(1);
}
}
BUG/MEDIUM: init/threads: prevent initialized threads from starting before others Since commit 6ec902a ("MINOR: threads: serialize threads initialization") we now serialize threads initialization. But doing so has emphasized another race which is that some threads may actually start the loop before others are done initializing. As soon as all threads enter the first thread_release() call, their rdv bit is cleared and they're all waiting for all others' rdv to be cleared as well, with their harmless bit set. The first one to notice the cleared mask will progress through thread_isolate(), take rdv again preventing most others from noticing its short pass to zero, and this first one will be able to run all the way through the initialization till the last call to thread_release() which it happily crosses, being the only one with the rdv bit, leaving the room for one or a few others to do the same. This results in some threads entering the loop before others are done with their initialization, which is particularly bad. PiBa-NL reported that some regtests fail for him due to this (which was impossible to reproduce here, but races are racy by definition). However placing some printf() in the initialization code definitely shows this unsychronized startup. This patch takes a different approach in three steps : - first, we don't start with thread_release() anymore and we don't set the rdv mask anymore in the main call. This was initially done to let all threads start toghether, which we don't want. Instead we just start with thread_isolate(). Since all threads are harmful by default, they all wait for each other's readiness before starting. - second, we don't release with thread_release() but with thread_sync_release(), meaning that we don't leave the function until other ones have reached the point in the function where they decide to leave it as well. - third, it makes sure we don't start the listeners using protocol_enable_all() before all threads have allocated their local FD tables or have initialized their pollers, otherwise startup could be racy as well. It's worth noting that it is even possible to limit this call to thread #0 as it only needs to be performed once. This now guarantees that all thread init calls start only after all threads are ready, and that no thread enters the polling loop before all others have completed their initialization. Please check GH issues #111 and #117 for more context. No backport is needed, though if some new init races are reported in 1.9 (or even 1.8) which do not affect 2.0, then it may make sense to carefully backport this small series.
2019-06-10 03:51:04 -04:00
/* enabling protocols will result in fd_insert() calls to be performed,
* we want all threads to have already allocated their local fd tables
* before doing so, thus only the last thread does it.
BUG/MEDIUM: init/threads: prevent initialized threads from starting before others Since commit 6ec902a ("MINOR: threads: serialize threads initialization") we now serialize threads initialization. But doing so has emphasized another race which is that some threads may actually start the loop before others are done initializing. As soon as all threads enter the first thread_release() call, their rdv bit is cleared and they're all waiting for all others' rdv to be cleared as well, with their harmless bit set. The first one to notice the cleared mask will progress through thread_isolate(), take rdv again preventing most others from noticing its short pass to zero, and this first one will be able to run all the way through the initialization till the last call to thread_release() which it happily crosses, being the only one with the rdv bit, leaving the room for one or a few others to do the same. This results in some threads entering the loop before others are done with their initialization, which is particularly bad. PiBa-NL reported that some regtests fail for him due to this (which was impossible to reproduce here, but races are racy by definition). However placing some printf() in the initialization code definitely shows this unsychronized startup. This patch takes a different approach in three steps : - first, we don't start with thread_release() anymore and we don't set the rdv mask anymore in the main call. This was initially done to let all threads start toghether, which we don't want. Instead we just start with thread_isolate(). Since all threads are harmful by default, they all wait for each other's readiness before starting. - second, we don't release with thread_release() but with thread_sync_release(), meaning that we don't leave the function until other ones have reached the point in the function where they decide to leave it as well. - third, it makes sure we don't start the listeners using protocol_enable_all() before all threads have allocated their local FD tables or have initialized their pollers, otherwise startup could be racy as well. It's worth noting that it is even possible to limit this call to thread #0 as it only needs to be performed once. This now guarantees that all thread init calls start only after all threads are ready, and that no thread enters the polling loop before all others have completed their initialization. Please check GH issues #111 and #117 for more context. No backport is needed, though if some new init races are reported in 1.9 (or even 1.8) which do not affect 2.0, then it may make sense to carefully backport this small series.
2019-06-10 03:51:04 -04:00
*/
if (init_left == 0)
protocol_enable_all();
#ifdef USE_THREAD
pthread_cond_broadcast(&init_cond);
pthread_mutex_unlock(&init_mutex);
/* now wait for other threads to finish starting */
pthread_mutex_lock(&init_mutex);
while (init_left)
pthread_cond_wait(&init_cond, &init_mutex);
pthread_mutex_unlock(&init_mutex);
#endif
run_poll_loop();
list_for_each_entry(ptdf, &per_thread_deinit_list, list)
ptdf->fct();
list_for_each_entry(ptff, &per_thread_free_list, list)
ptff->fct();
#ifdef USE_THREAD
_HA_ATOMIC_AND(&all_threads_mask, ~tid_bit);
if (tid > 0)
pthread_exit(NULL);
#endif
return NULL;
}
/* This is the global management task for listeners. It enables listeners waiting
* for global resources when there are enough free resource, or at least once in
* a while. It is designed to be called as a task.
*/
static struct task *manage_global_listener_queue(struct task *t, void *context, unsigned short state)
{
int next = TICK_ETERNITY;
/* queue is empty, nothing to do */
if (LIST_ISEMPTY(&global_listener_queue))
goto out;
/* If there are still too many concurrent connections, let's wait for
* some of them to go away. We don't need to re-arm the timer because
* each of them will scan the queue anyway.
*/
if (unlikely(actconn >= global.maxconn))
goto out;
/* We should periodically try to enable listeners waiting for a global
* resource here, because it is possible, though very unlikely, that
* they have been blocked by a temporary lack of global resource such
* as a file descriptor or memory and that the temporary condition has
* disappeared.
*/
dequeue_all_listeners(&global_listener_queue);
out:
t->expire = next;
task_queue(t);
return t;
}
int main(int argc, char **argv)
{
int err, retry;
struct rlimit limit;
char errmsg[100];
int pidfd = -1;
setvbuf(stdout, NULL, _IONBF, 0);
/* this can only safely be done here, though it's optimized away by
* the compiler.
*/
if (MAX_PROCS < 1 || MAX_PROCS > LONGBITS) {
ha_alert("MAX_PROCS value must be between 1 and %d inclusive; "
"HAProxy was built with value %d, please fix it and rebuild.\n",
LONGBITS, MAX_PROCS);
exit(1);
}
/* take a copy of initial limits before we possibly change them */
getrlimit(RLIMIT_NOFILE, &limit);
rlim_fd_cur_at_boot = limit.rlim_cur;
rlim_fd_max_at_boot = limit.rlim_max;
/* process all initcalls in order of potential dependency */
RUN_INITCALLS(STG_PREPARE);
RUN_INITCALLS(STG_LOCK);
RUN_INITCALLS(STG_ALLOC);
RUN_INITCALLS(STG_POOL);
RUN_INITCALLS(STG_REGISTER);
RUN_INITCALLS(STG_INIT);
init(argc, argv);
signal_register_fct(SIGQUIT, dump, SIGQUIT);
signal_register_fct(SIGUSR1, sig_soft_stop, SIGUSR1);
signal_register_fct(SIGHUP, sig_dump_state, SIGHUP);
signal_register_fct(SIGUSR2, NULL, 0);
/* Always catch SIGPIPE even on platforms which define MSG_NOSIGNAL.
* Some recent FreeBSD setups report broken pipes, and MSG_NOSIGNAL
* was defined there, so let's stay on the safe side.
*/
signal_register_fct(SIGPIPE, NULL, 0);
/* ulimits */
if (!global.rlimit_nofile)
global.rlimit_nofile = global.maxsock;
if (global.rlimit_nofile) {
limit.rlim_cur = global.rlimit_nofile;
limit.rlim_max = MAX(rlim_fd_max_at_boot, limit.rlim_cur);
if (setrlimit(RLIMIT_NOFILE, &limit) == -1) {
/* try to set it to the max possible at least */
getrlimit(RLIMIT_NOFILE, &limit);
limit.rlim_cur = limit.rlim_max;
if (setrlimit(RLIMIT_NOFILE, &limit) != -1)
getrlimit(RLIMIT_NOFILE, &limit);
ha_warning("[%s.main()] Cannot raise FD limit to %d, limit is %d.\n", argv[0], global.rlimit_nofile, (int)limit.rlim_cur);
global.rlimit_nofile = limit.rlim_cur;
}
}
if (global.rlimit_memmax) {
limit.rlim_cur = limit.rlim_max =
global.rlimit_memmax * 1048576ULL;
#ifdef RLIMIT_AS
if (setrlimit(RLIMIT_AS, &limit) == -1) {
ha_warning("[%s.main()] Cannot fix MEM limit to %d megs.\n",
argv[0], global.rlimit_memmax);
}
#else
if (setrlimit(RLIMIT_DATA, &limit) == -1) {
ha_warning("[%s.main()] Cannot fix MEM limit to %d megs.\n",
argv[0], global.rlimit_memmax);
}
#endif
}
if (old_unixsocket) {
if (strcmp("/dev/null", old_unixsocket) != 0) {
if (get_old_sockets(old_unixsocket) != 0) {
ha_alert("Failed to get the sockets from the old process!\n");
if (!(global.mode & MODE_MWORKER))
exit(1);
}
}
}
get_cur_unixsocket();
/* We will loop at most 100 times with 10 ms delay each time.
* That's at most 1 second. We only send a signal to old pids
* if we cannot grab at least one port.
*/
retry = MAX_START_RETRIES;
err = ERR_NONE;
while (retry >= 0) {
struct timeval w;
err = start_proxies(retry == 0 || nb_oldpids == 0);
/* exit the loop on no error or fatal error */
if ((err & (ERR_RETRYABLE|ERR_FATAL)) != ERR_RETRYABLE)
break;
if (nb_oldpids == 0 || retry == 0)
break;
/* FIXME-20060514: Solaris and OpenBSD do not support shutdown() on
* listening sockets. So on those platforms, it would be wiser to
* simply send SIGUSR1, which will not be undoable.
*/
if (tell_old_pids(SIGTTOU) == 0) {
/* no need to wait if we can't contact old pids */
retry = 0;
continue;
}
/* give some time to old processes to stop listening */
w.tv_sec = 0;
w.tv_usec = 10*1000;
select(0, NULL, NULL, NULL, &w);
retry--;
}
/* Note: start_proxies() sends an alert when it fails. */
if ((err & ~ERR_WARN) != ERR_NONE) {
if (retry != MAX_START_RETRIES && nb_oldpids) {
protocol_unbind_all(); /* cleanup everything we can */
tell_old_pids(SIGTTIN);
}
exit(1);
}
if (!(global.mode & MODE_MWORKER_WAIT) && listeners == 0) {
ha_alert("[%s.main()] No enabled listener found (check for 'bind' directives) ! Exiting.\n", argv[0]);
/* Note: we don't have to send anything to the old pids because we
* never stopped them. */
exit(1);
}
err = protocol_bind_all(errmsg, sizeof(errmsg));
if ((err & ~ERR_WARN) != ERR_NONE) {
if ((err & ERR_ALERT) || (err & ERR_WARN))
ha_alert("[%s.main()] %s.\n", argv[0], errmsg);
ha_alert("[%s.main()] Some protocols failed to start their listeners! Exiting.\n", argv[0]);
protocol_unbind_all(); /* cleanup everything we can */
if (nb_oldpids)
tell_old_pids(SIGTTIN);
exit(1);
} else if (err & ERR_WARN) {
ha_alert("[%s.main()] %s.\n", argv[0], errmsg);
}
/* Ok, all listener should now be bound, close any leftover sockets
* the previous process gave us, we don't need them anymore
*/
while (xfer_sock_list != NULL) {
struct xfer_sock_list *tmpxfer = xfer_sock_list->next;
close(xfer_sock_list->fd);
free(xfer_sock_list->iface);
free(xfer_sock_list->namespace);
free(xfer_sock_list);
xfer_sock_list = tmpxfer;
}
/* prepare pause/play signals */
signal_register_fct(SIGTTOU, sig_pause, SIGTTOU);
signal_register_fct(SIGTTIN, sig_listen, SIGTTIN);
/* MODE_QUIET can inhibit alerts and warnings below this line */
if (getenv("HAPROXY_MWORKER_REEXEC") != NULL) {
/* either stdin/out/err are already closed or should stay as they are. */
if ((global.mode & MODE_DAEMON)) {
/* daemon mode re-executing, stdin/stdout/stderr are already closed so keep quiet */
global.mode &= ~MODE_VERBOSE;
global.mode |= MODE_QUIET; /* ensure that we won't say anything from now */
}
} else {
if ((global.mode & MODE_QUIET) && !(global.mode & MODE_VERBOSE)) {
/* detach from the tty */
stdio_quiet(-1);
}
}
/* open log & pid files before the chroot */
if ((global.mode & MODE_DAEMON || global.mode & MODE_MWORKER) && global.pidfile != NULL) {
unlink(global.pidfile);
pidfd = open(global.pidfile, O_CREAT | O_WRONLY | O_TRUNC, 0644);
if (pidfd < 0) {
ha_alert("[%s.main()] Cannot create pidfile %s\n", argv[0], global.pidfile);
if (nb_oldpids)
tell_old_pids(SIGTTIN);
protocol_unbind_all();
exit(1);
}
}
if ((global.last_checks & LSTCHK_NETADM) && global.uid) {
ha_alert("[%s.main()] Some configuration options require full privileges, so global.uid cannot be changed.\n"
"", argv[0]);
protocol_unbind_all();
exit(1);
}
/* If the user is not root, we'll still let him try the configuration
* but we inform him that unexpected behaviour may occur.
*/
if ((global.last_checks & LSTCHK_NETADM) && getuid())
ha_warning("[%s.main()] Some options which require full privileges"
" might not work well.\n"
"", argv[0]);
if ((global.mode & (MODE_MWORKER|MODE_DAEMON)) == 0) {
/* chroot if needed */
if (global.chroot != NULL) {
if (chroot(global.chroot) == -1 || chdir("/") == -1) {
ha_alert("[%s.main()] Cannot chroot(%s).\n", argv[0], global.chroot);
if (nb_oldpids)
tell_old_pids(SIGTTIN);
protocol_unbind_all();
exit(1);
}
}
}
if (nb_oldpids && !(global.mode & MODE_MWORKER_WAIT))
nb_oldpids = tell_old_pids(oldpids_sig);
/* send a SIGTERM to workers who have a too high reloads number */
if ((global.mode & MODE_MWORKER) && !(global.mode & MODE_MWORKER_WAIT))
mworker_kill_max_reloads(SIGTERM);
if ((getenv("HAPROXY_MWORKER_REEXEC") == NULL)) {
nb_oldpids = 0;
free(oldpids);
oldpids = NULL;
}
/* Note that any error at this stage will be fatal because we will not
* be able to restart the old pids.
*/
if ((global.mode & (MODE_MWORKER|MODE_DAEMON)) == 0) {
/* setgid / setuid */
if (global.gid) {
if (getgroups(0, NULL) > 0 && setgroups(0, NULL) == -1)
ha_warning("[%s.main()] Failed to drop supplementary groups. Using 'gid'/'group'"
" without 'uid'/'user' is generally useless.\n", argv[0]);
if (setgid(global.gid) == -1) {
ha_alert("[%s.main()] Cannot set gid %d.\n", argv[0], global.gid);
protocol_unbind_all();
exit(1);
}
}
if (global.uid && setuid(global.uid) == -1) {
ha_alert("[%s.main()] Cannot set uid %d.\n", argv[0], global.uid);
protocol_unbind_all();
exit(1);
}
}
/* try our best to re-enable core dumps depending on system capabilities.
* What is addressed here :
* - remove file size limits
* - remove core size limits
* - mark the process dumpable again if it lost it due to user/group
*/
if (global.tune.options & GTUNE_SET_DUMPABLE) {
limit.rlim_cur = limit.rlim_max = RLIM_INFINITY;
#if defined(RLIMIT_FSIZE)
if (setrlimit(RLIMIT_FSIZE, &limit) == -1)
ha_warning("[%s.main()] Failed to set the raise the maximum file size.\n", argv[0]);
#endif
#if defined(RLIMIT_CORE)
if (setrlimit(RLIMIT_CORE, &limit) == -1)
ha_warning("[%s.main()] Failed to set the raise the core dump size.\n", argv[0]);
#endif
#if defined(USE_PRCTL)
if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1)
ha_warning("[%s.main()] Failed to set the dumpable flag, no core will be dumped.\n", argv[0]);
#endif
}
/* check ulimits */
limit.rlim_cur = limit.rlim_max = 0;
getrlimit(RLIMIT_NOFILE, &limit);
if (limit.rlim_cur < global.maxsock) {
ha_warning("[%s.main()] FD limit (%d) too low for maxconn=%d/maxsock=%d. Please raise 'ulimit-n' to %d or more to avoid any trouble.\n",
argv[0], (int)limit.rlim_cur, global.maxconn, global.maxsock, global.maxsock);
}
if (global.mode & (MODE_DAEMON | MODE_MWORKER | MODE_MWORKER_WAIT)) {
struct proxy *px;
struct peers *curpeers;
int ret = 0;
int proc;
int devnullfd = -1;
/*
* if daemon + mworker: must fork here to let a master
* process live in background before forking children
*/
if ((getenv("HAPROXY_MWORKER_REEXEC") == NULL)
&& (global.mode & MODE_MWORKER)
&& (global.mode & MODE_DAEMON)) {
ret = fork();
if (ret < 0) {
ha_alert("[%s.main()] Cannot fork.\n", argv[0]);
protocol_unbind_all();
exit(1); /* there has been an error */
} else if (ret > 0) { /* parent leave to daemonize */
exit(0);
} else /* change the process group ID in the child (master process) */
setsid();
}
/* if in master-worker mode, write the PID of the father */
if (global.mode & MODE_MWORKER) {
char pidstr[100];
snprintf(pidstr, sizeof(pidstr), "%d\n", (int)getpid());
if (pidfd >= 0)
shut_your_big_mouth_gcc(write(pidfd, pidstr, strlen(pidstr)));
}
/* the father launches the required number of processes */
if (!(global.mode & MODE_MWORKER_WAIT)) {
if (global.mode & MODE_MWORKER)
mworker_ext_launch_all();
for (proc = 0; proc < global.nbproc; proc++) {
ret = fork();
if (ret < 0) {
ha_alert("[%s.main()] Cannot fork.\n", argv[0]);
protocol_unbind_all();
exit(1); /* there has been an error */
}
else if (ret == 0) /* child breaks here */
break;
if (pidfd >= 0 && !(global.mode & MODE_MWORKER)) {
char pidstr[100];
snprintf(pidstr, sizeof(pidstr), "%d\n", ret);
shut_your_big_mouth_gcc(write(pidfd, pidstr, strlen(pidstr)));
}
if (global.mode & MODE_MWORKER) {
struct mworker_proc *child;
ha_notice("New worker #%d (%d) forked\n", relative_pid, ret);
/* find the right mworker_proc */
list_for_each_entry(child, &proc_list, list) {
if (child->relative_pid == relative_pid &&
child->reloads == 0 && child->options & PROC_O_TYPE_WORKER) {
child->timestamp = now.tv_sec;
child->pid = ret;
child->version = strdup(haproxy_version);
break;
}
}
}
relative_pid++; /* each child will get a different one */
pid_bit <<= 1;
}
} else {
/* wait mode */
global.nbproc = 1;
proc = 1;
}
#ifdef USE_CPU_AFFINITY
if (proc < global.nbproc && /* child */
proc < MAX_PROCS && /* only the first 32/64 processes may be pinned */
global.cpu_map.proc[proc]) /* only do this if the process has a CPU map */
#ifdef __FreeBSD__
{
cpuset_t cpuset;
int i;
unsigned long cpu_map = global.cpu_map.proc[proc];
CPU_ZERO(&cpuset);
while ((i = ffsl(cpu_map)) > 0) {
CPU_SET(i - 1, &cpuset);
cpu_map &= ~(1UL << (i - 1));
}
ret = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(cpuset), &cpuset);
}
#else
sched_setaffinity(0, sizeof(unsigned long), (void *)&global.cpu_map.proc[proc]);
#endif
#endif
/* close the pidfile both in children and father */
if (pidfd >= 0) {
//lseek(pidfd, 0, SEEK_SET); /* debug: emulate eglibc bug */
close(pidfd);
}
/* We won't ever use this anymore */
free(global.pidfile); global.pidfile = NULL;
if (proc == global.nbproc) {
if (global.mode & (MODE_MWORKER|MODE_MWORKER_WAIT)) {
if ((!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) &&
(global.mode & MODE_DAEMON)) {
/* detach from the tty, this is required to properly daemonize. */
if ((getenv("HAPROXY_MWORKER_REEXEC") == NULL))
stdio_quiet(-1);
global.mode &= ~MODE_VERBOSE;
global.mode |= MODE_QUIET; /* ensure that we won't say anything from now */
}
mworker_loop();
/* should never get there */
exit(EXIT_FAILURE);
}
#if defined(USE_OPENSSL) && !defined(OPENSSL_NO_DH)
ssl_free_dh();
#endif
exit(0); /* parent must leave */
}
/* child must never use the atexit function */
atexit_flag = 0;
/* close useless master sockets */
if (global.mode & MODE_MWORKER) {
struct mworker_proc *child, *it;
master = 0;
mworker_cli_proxy_stop();
/* free proc struct of other processes */
list_for_each_entry_safe(child, it, &proc_list, list) {
/* close the FD of the master side for all
* workers, we don't need to close the worker
* side of other workers since it's done with
* the bind_proc */
if (child->ipc_fd[0] >= 0)
close(child->ipc_fd[0]);
if (child->relative_pid == relative_pid &&
child->reloads == 0) {
/* keep this struct if this is our pid */
proc_self = child;
continue;
}
LIST_DEL(&child->list);
mworker_free_child(child);
child = NULL;
}
}
BUG/MEDIUM: threads/mworker: fix a race on startup Marc Fournier reported an interesting case when using threads with the master-worker mode : sometimes, a listener would have its FD closed during startup. Sometimes it could even be health checks seeing this. What happens is that after the threads are created, and the pollers enabled on each threads, the master-worker pipe is registered, and at the same time a close() is performed on the write side of this pipe since the children must not use it. But since this is replicated in every thread, what happens is that the first thread closes the pipe, thus releases the FD, and the next thread starting a listener in parallel gets this FD reassigned. Then another thread closes the FD again, which this time corresponds to the listener. It can also happen with the health check sockets if they're started early enough. This patch splits the mworker_pipe_register() function in two, so that the close() of the write side of the FD is performed very early after the fork() and long before threads are created (we don't need to delay it anyway). Only the pipe registration is done in the threaded code since it is important that the pollers are properly allocated for this. The mworker_pipe_register() function now takes care of registering the pipe only once, and this is guaranteed by a new surrounding lock. The call to protocol_enable_all() looks fragile in theory since it scans the list of proxies and their listeners, though in practice all threads scan the same list and take the same locks for each listener so it's not possible that any of them escapes the process and finishes before all listeners are started. And the operation is idempotent. This fix must be backported to 1.8. Thanks to Marc for providing very detailed traces clearly showing the problem.
2018-01-23 13:01:49 -05:00
if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) {
devnullfd = open("/dev/null", O_RDWR, 0);
if (devnullfd < 0) {
ha_alert("Cannot open /dev/null\n");
exit(EXIT_FAILURE);
}
}
/* Must chroot and setgid/setuid in the children */
/* chroot if needed */
if (global.chroot != NULL) {
if (chroot(global.chroot) == -1 || chdir("/") == -1) {
ha_alert("[%s.main()] Cannot chroot1(%s).\n", argv[0], global.chroot);
if (nb_oldpids)
tell_old_pids(SIGTTIN);
protocol_unbind_all();
exit(1);
}
}
free(global.chroot);
global.chroot = NULL;
/* setgid / setuid */
if (global.gid) {
if (getgroups(0, NULL) > 0 && setgroups(0, NULL) == -1)
ha_warning("[%s.main()] Failed to drop supplementary groups. Using 'gid'/'group'"
" without 'uid'/'user' is generally useless.\n", argv[0]);
if (setgid(global.gid) == -1) {
ha_alert("[%s.main()] Cannot set gid %d.\n", argv[0], global.gid);
protocol_unbind_all();
exit(1);
}
}
if (global.uid && setuid(global.uid) == -1) {
ha_alert("[%s.main()] Cannot set uid %d.\n", argv[0], global.uid);
protocol_unbind_all();
exit(1);
}
/* pass through every cli socket, and check if it's bound to
* the current process and if it exposes listeners sockets.
* Caution: the GTUNE_SOCKET_TRANSFER is now set after the fork.
* */
if (global.stats_fe) {
struct bind_conf *bind_conf;
list_for_each_entry(bind_conf, &global.stats_fe->conf.bind, by_fe) {
if (bind_conf->level & ACCESS_FD_LISTENERS) {
if (!bind_conf->bind_proc || bind_conf->bind_proc & (1UL << proc)) {
global.tune.options |= GTUNE_SOCKET_TRANSFER;
break;
}
}
}
}
/* we might have to unbind some proxies from some processes */
px = proxies_list;
while (px != NULL) {
if (px->bind_proc && px->state != PR_STSTOPPED) {
if (!(px->bind_proc & (1UL << proc))) {
if (global.tune.options & GTUNE_SOCKET_TRANSFER)
zombify_proxy(px);
else
stop_proxy(px);
}
}
px = px->next;
}
/* we might have to unbind some peers sections from some processes */
for (curpeers = cfg_peers; curpeers; curpeers = curpeers->next) {
if (!curpeers->peers_fe)
continue;
if (curpeers->peers_fe->bind_proc & (1UL << proc))
continue;
stop_proxy(curpeers->peers_fe);
/* disable this peer section so that it kills itself */
signal_unregister_handler(curpeers->sighandler);
task_destroy(curpeers->sync_task);
curpeers->sync_task = NULL;
task_destroy(curpeers->peers_fe->task);
curpeers->peers_fe->task = NULL;
curpeers->peers_fe = NULL;
}
/*
* This is only done in daemon mode because we might want the
* logs on stdout in mworker mode. If we're NOT in QUIET mode,
* we should now close the 3 first FDs to ensure that we can
* detach from the TTY. We MUST NOT do it in other cases since
* it would have already be done, and 0-2 would have been
* affected to listening sockets
*/
if ((global.mode & MODE_DAEMON) &&
(!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))) {
/* detach from the tty */
stdio_quiet(devnullfd);
global.mode &= ~MODE_VERBOSE;
global.mode |= MODE_QUIET; /* ensure that we won't say anything from now */
}
pid = getpid(); /* update child's pid */
if (!(global.mode & MODE_MWORKER)) /* in mworker mode we don't want a new pgid for the children */
setsid();
fork_poller();
}
global.mode &= ~MODE_STARTING;
/*
* That's it : the central polling loop. Run until we stop.
*/
#ifdef USE_THREAD
{
sigset_t blocked_sig, old_sig;
int i;
/* ensure the signals will be blocked in every thread */
sigfillset(&blocked_sig);
sigdelset(&blocked_sig, SIGPROF);
sigdelset(&blocked_sig, SIGBUS);
sigdelset(&blocked_sig, SIGFPE);
sigdelset(&blocked_sig, SIGILL);
sigdelset(&blocked_sig, SIGSEGV);
pthread_sigmask(SIG_SETMASK, &blocked_sig, &old_sig);
/* Create nbthread-1 thread. The first thread is the current process */
thread_info[0].pthread = pthread_self();
for (i = 1; i < global.nbthread; i++)
pthread_create(&thread_info[i].pthread, NULL, &run_thread_poll_loop, (void *)(long)i);
#ifdef USE_CPU_AFFINITY
/* Now the CPU affinity for all threads */
BUG/MEDIUM: threads: cpu-map designating a single thread/process are ignored Since commit 81492c989 ("MINOR: threads: flatten the per-thread cpu-map"), we don't keep the proc*thread matrix anymore to represent the full binding possibilities, but only the proc and thread ones. The problem is that the per-process binding is not the same for each thread and for the process, and the proc[] array was assumed to store the per-proc first thread value when doing this change. Worse, the logic present there tries to deal with thread ranges and process ranges in a way which automatically exclused the other possibility (since ranges cannot be used on both) but as such fails to apply changes if neither the process nor the thread is expressed as a range. The real problem comes from the fact that specifying cpu-map 1/1 doesn't yet reveal if the per-process mask or the per-thread mask needs to be updated. In practice it's the thread one but then the current storage doesn't allow to store the binding of the first thread of each other process in nbproc>1 configurations. When removing the proc*thread matrix, what ought to have been kept was both the thread column for process 1 and the process line for threads 1, but instead only the thread column was kept. This patch reintroduces the storage of the configuration for the first thread of each process so that it is again possible to store either the per-thread or per-process configuration. As a partial workaround for existing configurations, it is possible to systematically indicate at least two processes or two threads at once and map them by pairs or more so that at least two values are present in the range. E.g : # set processes 1-4 to cpus 0-3 : cpu-map auto:1-4/1 0 1 2 3 # or: cpu-map 1-2/1 0 1 cpu-map 2-3/1 2 3 # set threads 1-4 to cpus 0-3 : cpu-map auto:1/1-4 0 1 2 3 # or : cpu-map 1/1-2 0 1 cpu-map 3/3-4 2 3 This fix must be backported to 2.0.
2019-07-16 09:10:34 -04:00
if (global.cpu_map.proc_t1[relative_pid-1])
global.cpu_map.thread[0] &= global.cpu_map.proc_t1[relative_pid-1];
for (i = 0; i < global.nbthread; i++) {
if (global.cpu_map.proc[relative_pid-1])
global.cpu_map.thread[i] &= global.cpu_map.proc[relative_pid-1];
if (i < MAX_THREADS && /* only the first 32/64 threads may be pinned */
global.cpu_map.thread[i]) {/* only do this if the thread has a THREAD map */
#if defined(__FreeBSD__) || defined(__NetBSD__)
cpuset_t cpuset;
#else
cpu_set_t cpuset;
#endif
int j;
unsigned long cpu_map = global.cpu_map.thread[i];
CPU_ZERO(&cpuset);
while ((j = ffsl(cpu_map)) > 0) {
CPU_SET(j - 1, &cpuset);
cpu_map &= ~(1UL << (j - 1));
}
pthread_setaffinity_np(thread_info[i].pthread,
sizeof(cpuset), &cpuset);
}
}
#endif /* !USE_CPU_AFFINITY */
/* when multithreading we need to let only the thread 0 handle the signals */
haproxy_unblock_signals();
/* Finally, start the poll loop for the first thread */
run_thread_poll_loop(0);
/* Wait the end of other threads */
for (i = 1; i < global.nbthread; i++)
pthread_join(thread_info[i].pthread, NULL);
#if defined(DEBUG_THREAD) || defined(DEBUG_FULL)
show_lock_stats();
#endif
}
#else /* ! USE_THREAD */
haproxy_unblock_signals();
run_thread_poll_loop(0);
#endif
/* Do some cleanup */
deinit();
exit(0);
}
/*
* Local variables:
* c-indent-level: 8
* c-basic-offset: 8
* End:
*/