haproxy/src/server.c

7675 lines
231 KiB
C
Raw Normal View History

/*
* Server management functions.
*
* Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
[MEDIUM]: rework checks handling This patch adds two new variables: fastinter and downinter. When server state is: - non-transitionally UP -> inter (no change) - transitionally UP (going down), unchecked or transitionally DOWN (going up) -> fastinter - down -> downinter It allows to set something like: server sr6 127.0.51.61:80 cookie s6 check inter 10000 downinter 20000 fastinter 500 fall 3 weight 40 In the above example haproxy uses 10000ms between checks but as soon as one check fails fastinter (500ms) is used. If server is down downinter (20000) is used or fastinter (500ms) if one check pass. Fastinter is also used when haproxy starts. New "timeout.check" variable was added, if set haproxy uses it as an additional read timeout, but only after a connection has been already established. I was thinking about using "timeout.server" here but most people set this with an addition reserve but still want checks to kick out laggy servers. Please also note that in most cases check request is much simpler and faster to handle than normal requests so this timeout should be smaller. I also changed the timeout used for check connections establishing. Changes from the previous version: - use tv_isset() to check if the timeout is set, - use min("timeout connect", "inter") but only if "timeout check" is set as this min alone may be to short for full (connect + read) check, - debug code (fprintf) commented/removed - documentation Compile tested only (sorry!) as I'm currently traveling but changes are rather small and trivial.
2008-01-20 19:54:06 -05:00
* Copyright 2007-2008 Krzysztof Piotr Oledzki <ole@ans.pl>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
*/
#include <sys/types.h>
#include <netinet/tcp.h>
#include <ctype.h>
#include <errno.h>
#include <import/ceb64_tree.h>
#include <import/cebis_tree.h>
#include <import/eb64tree.h>
#include <haproxy/api.h>
#include <haproxy/applet-t.h>
#include <haproxy/backend.h>
#include <haproxy/cfgparse.h>
#include <haproxy/check.h>
#include <haproxy/cli.h>
#include <haproxy/connection.h>
#include <haproxy/counters.h>
#include <haproxy/dict-t.h>
#include <haproxy/errors.h>
#include <haproxy/global.h>
#include <haproxy/guid.h>
#include <haproxy/log.h>
#include <haproxy/mailers.h>
#include <haproxy/namespace.h>
#include <haproxy/port_range.h>
#include <haproxy/protocol.h>
#include <haproxy/proxy.h>
#include <haproxy/queue.h>
#include <haproxy/quic_tp.h>
#include <haproxy/quic_tune.h>
#include <haproxy/resolvers.h>
#include <haproxy/sample.h>
#include <haproxy/sc_strm.h>
#include <haproxy/server.h>
#include <haproxy/stats.h>
#include <haproxy/ssl_sock.h>
#include <haproxy/stconn.h>
#include <haproxy/stream.h>
#include <haproxy/stress.h>
#include <haproxy/task.h>
#include <haproxy/tcpcheck.h>
#include <haproxy/time.h>
#include <haproxy/tools.h>
#include <haproxy/xxhash.h>
#include <haproxy/event_hdl.h>
[MEDIUM] stats: report server and backend cumulated downtime Hello, This patch implements new statistics for SLA calculation by adding new field 'Dwntime' with total down time since restart (both HTTP/CSV) and extending status field (HTTP) or inserting a new one (CSV) with time showing how long each server/backend is in a current state. Additionaly, down transations are also calculated and displayed for backends, so it is possible to know how many times selected backend was down, generating "No server is available to handle this request." error. New information are presentetd in two different ways: - for HTTP: a "human redable form", one of "100000d 23h", "23h 59m" or "59m 59s" - for CSV: seconds I believe that seconds resolution is enough. As there are more columns in the status page I decided to shrink some names to make more space: - Weight -> Wght - Check -> Chk - Down -> Dwn Making described changes I also made some improvements and fixed some small bugs: - don't increment s->health above 's->rise + s->fall - 1'. Previously it was incremented an then (re)set to 's->rise + s->fall - 1'. - do not set server down if it is down already - do not set server up if it is up already - fix colspan in multiple places (mostly introduced by my previous patch) - add missing "status" header to CSV - fix order of retries/redispatches in server (CSV) - s/Tthen/Then/ - s/server/backend/ in DATA_ST_PX_BE (dumpstats.c) Changes from previous version: - deal with negative time intervales - don't relay on s->state (SRV_RUNNING) - little reworked human_time + compacted format (no spaces). If needed it can be used in the future for other purposes by optionally making "cnt" as an argument - leave set_server_down mostly unchanged - only little reworked "process_chk: 9" - additional fields in CSV are appended to the rigth - fix "SEC" macro - named arguments (human_time, be_downtime, srv_downtime) Hope it is OK. If there are only cosmetic changes needed please fill free to correct it, however if there are some bigger changes required I would like to discuss it first or at last to know what exactly was changed especially since I already put this patch into my production server. :) Thank you, Best regards, Krzysztof Oledzki
2007-10-22 10:21:10 -04:00
static void srv_update_status(struct server *s, int type, int cause);
static int srv_apply_lastaddr(struct server *srv, int *err_code);
BUG/MEDIUM: connections: force connections cleanup on server changes I've been trying to understand a change of behaviour between v2.2dev5 and v2.2dev6. Indeed our probe is regularly testing to add and remove servers on a given backend such as: # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 263 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 0.0.0.0 0 1 256 256 0 15 3 0 14 0 0 0 - 0 - -> curl on the corresponding frontend: reply from server:31255 # echo "set server be_foo/srv1 addr 10.236.139.34 port 31257" | sudo socat stdio /var/lib/haproxy/stats IP changed from '0.0.0.0' to '10.236.139.34', port changed from '0' to '31257' by 'stats socket command' # echo "set server be_foo/srv1 weight 256" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 check-port 8500" | sudo socat stdio /var/lib/haproxy/stats health check port updated. # echo "set server be_foo/srv1 state ready" | sudo socat stdio /var/lib/haproxy/stats # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 105 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 10.236.139.34 2 0 256 256 2319 15 3 2 6 0 0 0 - 31257 - -> curl on the corresponding frontend: reply for server:31257 (notice the difference of weight) # echo "set server be_foo/srv1 state maint" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 addr 0.0.0.0 port 0" | sudo socat stdio /var/lib/haproxy/stats IP changed from '10.236.139.34' to '0.0.0.0', port changed from '31257' to '0' by 'stats socket command' # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 263 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 0.0.0.0 0 1 256 256 0 15 3 0 14 0 0 0 - 0 - -> curl on the corresponding frontend: reply from server:31255 # echo "set server be_foo/srv1 addr 10.236.139.34 port 31256" | sudo socat stdio /var/lib/haproxy/stats IP changed from '0.0.0.0' to '10.236.139.34', port changed from '0' to '31256' by 'stats socket command' # echo "set server be_foo/srv1 weight 256" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 check-port 8500" | sudo socat stdio /var/lib/haproxy/stats health check port updated. # echo "set server be_foo/srv1 state ready" | sudo socat stdio /var/lib/haproxy/stats # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 105 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 10.236.139.34 2 0 256 256 2319 15 3 2 6 0 0 0 - 31256 - -> curl on the corresponding frontend: reply from server:31257 (!) Here we indeed would expect to get an anver from server:31256. The issue is highly linked to the usage of `pool-purge-delay`, with a value which is higher than the duration of the test, 10s in our case. a git bisect between dev5 and dev6 seems to show commit 079cb9af22da6 ("MEDIUM: connections: Revamp the way idle connections are killed") being the origin of this new behaviour. So if I understand the later correctly, it seems that it was more a matter of chance that we did not saw the issue earlier. My patch proposes to force clean idle connections in the two following cases: - we set a (still running) server to maintenance - we change the ip/port of a server This commit should be backported to 2.1, 2.0, and 1.9. Signed-off-by: William Dauchy <w.dauchy@criteo.com>
2020-05-02 15:52:36 -04:00
static void srv_cleanup_connections(struct server *srv);
/* extra keywords used as value for other arguments. They are used as
* suggestions for mistyped words.
*/
static const char *extra_kw_list[] = {
"ipv4", "ipv6", "legacy", "octet-count",
"fail-check", "sudden-death", "mark-down",
NULL /* must be last */
};
/* List head of all known server keywords */
struct srv_kw_list srv_keywords = {
.list = LIST_HEAD_INIT(srv_keywords.list)
};
[MEDIUM] stats: report server and backend cumulated downtime Hello, This patch implements new statistics for SLA calculation by adding new field 'Dwntime' with total down time since restart (both HTTP/CSV) and extending status field (HTTP) or inserting a new one (CSV) with time showing how long each server/backend is in a current state. Additionaly, down transations are also calculated and displayed for backends, so it is possible to know how many times selected backend was down, generating "No server is available to handle this request." error. New information are presentetd in two different ways: - for HTTP: a "human redable form", one of "100000d 23h", "23h 59m" or "59m 59s" - for CSV: seconds I believe that seconds resolution is enough. As there are more columns in the status page I decided to shrink some names to make more space: - Weight -> Wght - Check -> Chk - Down -> Dwn Making described changes I also made some improvements and fixed some small bugs: - don't increment s->health above 's->rise + s->fall - 1'. Previously it was incremented an then (re)set to 's->rise + s->fall - 1'. - do not set server down if it is down already - do not set server up if it is up already - fix colspan in multiple places (mostly introduced by my previous patch) - add missing "status" header to CSV - fix order of retries/redispatches in server (CSV) - s/Tthen/Then/ - s/server/backend/ in DATA_ST_PX_BE (dumpstats.c) Changes from previous version: - deal with negative time intervales - don't relay on s->state (SRV_RUNNING) - little reworked human_time + compacted format (no spaces). If needed it can be used in the future for other purposes by optionally making "cnt" as an argument - leave set_server_down mostly unchanged - only little reworked "process_chk: 9" - additional fields in CSV are appended to the rigth - fix "SEC" macro - named arguments (human_time, be_downtime, srv_downtime) Hope it is OK. If there are only cosmetic changes needed please fill free to correct it, however if there are some bigger changes required I would like to discuss it first or at last to know what exactly was changed especially since I already put this patch into my production server. :) Thank you, Best regards, Krzysztof Oledzki
2007-10-22 10:21:10 -04:00
__decl_thread(HA_SPINLOCK_T idle_conn_srv_lock);
struct eb_root idle_conn_srv = EB_ROOT;
struct task *idle_conn_task __read_mostly = NULL;
struct mt_list servers_list = MT_LIST_HEAD_INIT(servers_list);
BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates For inet families (IP4/IP6), it is expected that server's addr/port might be updated at runtime from DNS, cli or lua for instance. Such updates were performed under the server's lock. Unfortunately, most readers such as backend.c or sink.c perform the read without taking server's lock because they can't afford slowing down their processing for a type of event which is normally rare. But this could result in bad values being read for the server addr:svc_port tuple (ie: during connection etablishment) as a result of concurrent updates from external components, which can obviously cause some undesirable effects. Instead of slowing the readers down, as we consider server's addr changes are relatively rare, we take another approach and try to update the addr:port atomically by performing changes under full thread isolation when a new change is requested. The changes are performed by a dedicated task which takes care of isolating the current thread and doesn't depend on other threads (independent code path) to protect against dead locks. As such, server's addr:port changes will now be performed atomically, but they will not be processed instantly, they will be translated to events that the dedicated task will pick up from time to time to apply the pending changes. This bug existed for a very long time and has never been reported so far. It was discovered by reading the code during the implementation of log backend ("mode log" in backends). As it involves changes in sensitive areas as well as thread isolation, it is probably not worth considering backporting it for now, unless it is proven that it will help to solve bugs that are actually encountered in the field. This patch depends on: - 24da4d3 ("MINOR: tools: use const for read only pointers in ip{cmp,cpy}") - c886fb5 ("MINOR: server/ip: centralize server ip updates") - event_hdl API (which was first seen on 2.8) + 683b2ae ("MINOR: server/event_hdl: add SERVER_INETADDR event") + BUG/MEDIUM: server/event_hdl: memory overrun in _srv_event_hdl_prepare_inetaddr() + "MINOR: event_hdl: add global tunables" Note that the patch may be reworked so that it doesn't depend on event_hdl API for older versions, the approach would remain the same: this would result in a larger patch due to the need to manually implement a global queue of pending updates with its dedicated task responsible for picking updates and comitting them. An alternative approach could consist in per-server, lock-protected, temporary addr:svc_port storage dedicated to "updaters" were only the most recent values would be kept. The sync task would then use them as source values to atomically update the addr:svc_port members that the runtime readers are actually using.
2023-11-13 12:14:24 -05:00
static struct task *server_atomic_sync_task = NULL;
static event_hdl_async_equeue server_atomic_sync_queue;
/* SERVER DELETE(n)->ADD global tracker:
* This is meant to provide srv->rid (revision id) value.
* Revision id allows to differentiate between a previously existing
* deleted server and a new server reusing deleted server name/id.
*
* start value is 0 (even value)
* LSB is used to specify that one or multiple srv delete in a row
* were performed.
* When adding a new server, increment by 1 if current
* value is odd (odd = LSB set),
* because adding a new server after one or
* multiple deletions means we could potentially be reusing old names:
* Increase the revision id to prevent mixups between old and new names.
*
* srv->rid is calculated from cnt even values only.
* sizeof(srv_id_reuse_cnt) must be twice sizeof(srv->rid)
*
* Wraparound is expected and should not cause issues
* (with current design we allow up to 4 billion unique revisions)
*
* Counter is only used under thread_isolate (cli_add/cli_del),
* no need for atomic ops.
*/
static uint64_t srv_id_reuse_cnt = 0;
/* The server names dictionary */
struct dict server_key_dict = {
.name = "server keys",
.values = EB_ROOT_UNIQUE,
};
static const char *srv_adm_st_chg_cause_str[] = {
[SRV_ADM_STCHGC_NONE] = "",
[SRV_ADM_STCHGC_DNS_NOENT] = "entry removed from SRV record",
[SRV_ADM_STCHGC_DNS_NOIP] = "No IP for server ",
[SRV_ADM_STCHGC_DNS_NX] = "DNS NX status",
[SRV_ADM_STCHGC_DNS_TIMEOUT] = "DNS timeout status",
[SRV_ADM_STCHGC_DNS_REFUSED] = "DNS refused status",
[SRV_ADM_STCHGC_DNS_UNSPEC] = "unspecified DNS error",
[SRV_ADM_STCHGC_STATS_DISABLE] = "'disable' on stats page",
[SRV_ADM_STCHGC_STATS_STOP] = "'stop' on stats page"
};
const char *srv_adm_st_chg_cause(enum srv_adm_st_chg_cause cause)
{
return srv_adm_st_chg_cause_str[cause];
}
static const char *srv_op_st_chg_cause_str[] = {
[SRV_OP_STCHGC_NONE] = "",
[SRV_OP_STCHGC_HEALTH] = "",
[SRV_OP_STCHGC_AGENT] = "",
[SRV_OP_STCHGC_CLI] = "changed from CLI",
[SRV_OP_STCHGC_LUA] = "changed from Lua script",
[SRV_OP_STCHGC_STATS_WEB] = "changed from Web interface",
[SRV_OP_STCHGC_STATEFILE] = "changed from server-state after a reload"
};
static void srv_reset_path_parameters(struct server *s)
{
HA_RWLOCK_WRLOCK(SERVER_LOCK, &s->path_params.param_lock);
s->path_params.nego_alpn[0] = 0;
HA_RWLOCK_WRUNLOCK(SERVER_LOCK, &s->path_params.param_lock);
}
const char *srv_op_st_chg_cause(enum srv_op_st_chg_cause cause)
{
return srv_op_st_chg_cause_str[cause];
}
int srv_downtime(const struct server *s)
{
if ((s->cur_state != SRV_ST_STOPPED) || s->last_change >= ns_to_sec(now_ns)) // ignore negative time
[MEDIUM] stats: report server and backend cumulated downtime Hello, This patch implements new statistics for SLA calculation by adding new field 'Dwntime' with total down time since restart (both HTTP/CSV) and extending status field (HTTP) or inserting a new one (CSV) with time showing how long each server/backend is in a current state. Additionaly, down transations are also calculated and displayed for backends, so it is possible to know how many times selected backend was down, generating "No server is available to handle this request." error. New information are presentetd in two different ways: - for HTTP: a "human redable form", one of "100000d 23h", "23h 59m" or "59m 59s" - for CSV: seconds I believe that seconds resolution is enough. As there are more columns in the status page I decided to shrink some names to make more space: - Weight -> Wght - Check -> Chk - Down -> Dwn Making described changes I also made some improvements and fixed some small bugs: - don't increment s->health above 's->rise + s->fall - 1'. Previously it was incremented an then (re)set to 's->rise + s->fall - 1'. - do not set server down if it is down already - do not set server up if it is up already - fix colspan in multiple places (mostly introduced by my previous patch) - add missing "status" header to CSV - fix order of retries/redispatches in server (CSV) - s/Tthen/Then/ - s/server/backend/ in DATA_ST_PX_BE (dumpstats.c) Changes from previous version: - deal with negative time intervales - don't relay on s->state (SRV_RUNNING) - little reworked human_time + compacted format (no spaces). If needed it can be used in the future for other purposes by optionally making "cnt" as an argument - leave set_server_down mostly unchanged - only little reworked "process_chk: 9" - additional fields in CSV are appended to the rigth - fix "SEC" macro - named arguments (human_time, be_downtime, srv_downtime) Hope it is OK. If there are only cosmetic changes needed please fill free to correct it, however if there are some bigger changes required I would like to discuss it first or at last to know what exactly was changed especially since I already put this patch into my production server. :) Thank you, Best regards, Krzysztof Oledzki
2007-10-22 10:21:10 -04:00
return s->down_time;
return ns_to_sec(now_ns) - s->last_change + s->down_time;
[MEDIUM] stats: report server and backend cumulated downtime Hello, This patch implements new statistics for SLA calculation by adding new field 'Dwntime' with total down time since restart (both HTTP/CSV) and extending status field (HTTP) or inserting a new one (CSV) with time showing how long each server/backend is in a current state. Additionaly, down transations are also calculated and displayed for backends, so it is possible to know how many times selected backend was down, generating "No server is available to handle this request." error. New information are presentetd in two different ways: - for HTTP: a "human redable form", one of "100000d 23h", "23h 59m" or "59m 59s" - for CSV: seconds I believe that seconds resolution is enough. As there are more columns in the status page I decided to shrink some names to make more space: - Weight -> Wght - Check -> Chk - Down -> Dwn Making described changes I also made some improvements and fixed some small bugs: - don't increment s->health above 's->rise + s->fall - 1'. Previously it was incremented an then (re)set to 's->rise + s->fall - 1'. - do not set server down if it is down already - do not set server up if it is up already - fix colspan in multiple places (mostly introduced by my previous patch) - add missing "status" header to CSV - fix order of retries/redispatches in server (CSV) - s/Tthen/Then/ - s/server/backend/ in DATA_ST_PX_BE (dumpstats.c) Changes from previous version: - deal with negative time intervales - don't relay on s->state (SRV_RUNNING) - little reworked human_time + compacted format (no spaces). If needed it can be used in the future for other purposes by optionally making "cnt" as an argument - leave set_server_down mostly unchanged - only little reworked "process_chk: 9" - additional fields in CSV are appended to the rigth - fix "SEC" macro - named arguments (human_time, be_downtime, srv_downtime) Hope it is OK. If there are only cosmetic changes needed please fill free to correct it, however if there are some bigger changes required I would like to discuss it first or at last to know what exactly was changed especially since I already put this patch into my production server. :) Thank you, Best regards, Krzysztof Oledzki
2007-10-22 10:21:10 -04:00
}
int srv_getinter(const struct check *check)
{
const struct server *s = check->server;
if ((check->state & (CHK_ST_CONFIGURED|CHK_ST_FASTINTER)) == CHK_ST_CONFIGURED &&
(check->health == check->rise + check->fall - 1))
return check->inter;
[MEDIUM]: rework checks handling This patch adds two new variables: fastinter and downinter. When server state is: - non-transitionally UP -> inter (no change) - transitionally UP (going down), unchecked or transitionally DOWN (going up) -> fastinter - down -> downinter It allows to set something like: server sr6 127.0.51.61:80 cookie s6 check inter 10000 downinter 20000 fastinter 500 fall 3 weight 40 In the above example haproxy uses 10000ms between checks but as soon as one check fails fastinter (500ms) is used. If server is down downinter (20000) is used or fastinter (500ms) if one check pass. Fastinter is also used when haproxy starts. New "timeout.check" variable was added, if set haproxy uses it as an additional read timeout, but only after a connection has been already established. I was thinking about using "timeout.server" here but most people set this with an addition reserve but still want checks to kick out laggy servers. Please also note that in most cases check request is much simpler and faster to handle than normal requests so this timeout should be smaller. I also changed the timeout used for check connections establishing. Changes from the previous version: - use tv_isset() to check if the timeout is set, - use min("timeout connect", "inter") but only if "timeout check" is set as this min alone may be to short for full (connect + read) check, - debug code (fprintf) commented/removed - documentation Compile tested only (sorry!) as I'm currently traveling but changes are rather small and trivial.
2008-01-20 19:54:06 -05:00
if ((s->next_state == SRV_ST_STOPPED) && check->health == 0)
return (check->downinter)?(check->downinter):(check->inter);
[MEDIUM]: rework checks handling This patch adds two new variables: fastinter and downinter. When server state is: - non-transitionally UP -> inter (no change) - transitionally UP (going down), unchecked or transitionally DOWN (going up) -> fastinter - down -> downinter It allows to set something like: server sr6 127.0.51.61:80 cookie s6 check inter 10000 downinter 20000 fastinter 500 fall 3 weight 40 In the above example haproxy uses 10000ms between checks but as soon as one check fails fastinter (500ms) is used. If server is down downinter (20000) is used or fastinter (500ms) if one check pass. Fastinter is also used when haproxy starts. New "timeout.check" variable was added, if set haproxy uses it as an additional read timeout, but only after a connection has been already established. I was thinking about using "timeout.server" here but most people set this with an addition reserve but still want checks to kick out laggy servers. Please also note that in most cases check request is much simpler and faster to handle than normal requests so this timeout should be smaller. I also changed the timeout used for check connections establishing. Changes from the previous version: - use tv_isset() to check if the timeout is set, - use min("timeout connect", "inter") but only if "timeout check" is set as this min alone may be to short for full (connect + read) check, - debug code (fprintf) commented/removed - documentation Compile tested only (sorry!) as I'm currently traveling but changes are rather small and trivial.
2008-01-20 19:54:06 -05:00
return (check->fastinter)?(check->fastinter):(check->inter);
[MEDIUM]: rework checks handling This patch adds two new variables: fastinter and downinter. When server state is: - non-transitionally UP -> inter (no change) - transitionally UP (going down), unchecked or transitionally DOWN (going up) -> fastinter - down -> downinter It allows to set something like: server sr6 127.0.51.61:80 cookie s6 check inter 10000 downinter 20000 fastinter 500 fall 3 weight 40 In the above example haproxy uses 10000ms between checks but as soon as one check fails fastinter (500ms) is used. If server is down downinter (20000) is used or fastinter (500ms) if one check pass. Fastinter is also used when haproxy starts. New "timeout.check" variable was added, if set haproxy uses it as an additional read timeout, but only after a connection has been already established. I was thinking about using "timeout.server" here but most people set this with an addition reserve but still want checks to kick out laggy servers. Please also note that in most cases check request is much simpler and faster to handle than normal requests so this timeout should be smaller. I also changed the timeout used for check connections establishing. Changes from the previous version: - use tv_isset() to check if the timeout is set, - use min("timeout connect", "inter") but only if "timeout check" is set as this min alone may be to short for full (connect + read) check, - debug code (fprintf) commented/removed - documentation Compile tested only (sorry!) as I'm currently traveling but changes are rather small and trivial.
2008-01-20 19:54:06 -05:00
}
/* Update server's addr:svc_port tuple in INET context
*
* Must be called under thread isolation to ensure consistent readings across
BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates For inet families (IP4/IP6), it is expected that server's addr/port might be updated at runtime from DNS, cli or lua for instance. Such updates were performed under the server's lock. Unfortunately, most readers such as backend.c or sink.c perform the read without taking server's lock because they can't afford slowing down their processing for a type of event which is normally rare. But this could result in bad values being read for the server addr:svc_port tuple (ie: during connection etablishment) as a result of concurrent updates from external components, which can obviously cause some undesirable effects. Instead of slowing the readers down, as we consider server's addr changes are relatively rare, we take another approach and try to update the addr:port atomically by performing changes under full thread isolation when a new change is requested. The changes are performed by a dedicated task which takes care of isolating the current thread and doesn't depend on other threads (independent code path) to protect against dead locks. As such, server's addr:port changes will now be performed atomically, but they will not be processed instantly, they will be translated to events that the dedicated task will pick up from time to time to apply the pending changes. This bug existed for a very long time and has never been reported so far. It was discovered by reading the code during the implementation of log backend ("mode log" in backends). As it involves changes in sensitive areas as well as thread isolation, it is probably not worth considering backporting it for now, unless it is proven that it will help to solve bugs that are actually encountered in the field. This patch depends on: - 24da4d3 ("MINOR: tools: use const for read only pointers in ip{cmp,cpy}") - c886fb5 ("MINOR: server/ip: centralize server ip updates") - event_hdl API (which was first seen on 2.8) + 683b2ae ("MINOR: server/event_hdl: add SERVER_INETADDR event") + BUG/MEDIUM: server/event_hdl: memory overrun in _srv_event_hdl_prepare_inetaddr() + "MINOR: event_hdl: add global tunables" Note that the patch may be reworked so that it doesn't depend on event_hdl API for older versions, the approach would remain the same: this would result in a larger patch due to the need to manually implement a global queue of pending updates with its dedicated task responsible for picking updates and comitting them. An alternative approach could consist in per-server, lock-protected, temporary addr:svc_port storage dedicated to "updaters" were only the most recent values would be kept. The sync task would then use them as source values to atomically update the addr:svc_port members that the runtime readers are actually using.
2023-11-13 12:14:24 -05:00
* all threads (addr:svc_port might be read without srv lock being held).
*/
static void _srv_set_inetaddr_port(struct server *srv,
const struct sockaddr_storage *addr,
unsigned int svc_port, uint8_t mapped_port)
{
ipcpy(addr, &srv->addr);
srv->svc_port = svc_port;
if (mapped_port)
srv->flags |= SRV_F_MAPPORTS;
else
srv->flags &= ~SRV_F_MAPPORTS;
if (srv->proxy->lbprm.update_server_eweight) {
/* some balancers (chash in particular) may use the addr in their routing decisions */
srv->proxy->lbprm.update_server_eweight(srv);
}
if (srv->log_target && srv->log_target->type == LOG_TARGET_DGRAM) {
/* server is used as a log target, manually update log target addr for DGRAM */
ipcpy(addr, srv->log_target->addr);
set_host_port(srv->log_target->addr, svc_port);
}
}
/* same as _srv_set_inetaddr_port() but only updates the addr part
*/
static void _srv_set_inetaddr(struct server *srv,
const struct sockaddr_storage *addr)
{
_srv_set_inetaddr_port(srv, addr, srv->svc_port, !!(srv->flags & SRV_F_MAPPORTS));
}
BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates For inet families (IP4/IP6), it is expected that server's addr/port might be updated at runtime from DNS, cli or lua for instance. Such updates were performed under the server's lock. Unfortunately, most readers such as backend.c or sink.c perform the read without taking server's lock because they can't afford slowing down their processing for a type of event which is normally rare. But this could result in bad values being read for the server addr:svc_port tuple (ie: during connection etablishment) as a result of concurrent updates from external components, which can obviously cause some undesirable effects. Instead of slowing the readers down, as we consider server's addr changes are relatively rare, we take another approach and try to update the addr:port atomically by performing changes under full thread isolation when a new change is requested. The changes are performed by a dedicated task which takes care of isolating the current thread and doesn't depend on other threads (independent code path) to protect against dead locks. As such, server's addr:port changes will now be performed atomically, but they will not be processed instantly, they will be translated to events that the dedicated task will pick up from time to time to apply the pending changes. This bug existed for a very long time and has never been reported so far. It was discovered by reading the code during the implementation of log backend ("mode log" in backends). As it involves changes in sensitive areas as well as thread isolation, it is probably not worth considering backporting it for now, unless it is proven that it will help to solve bugs that are actually encountered in the field. This patch depends on: - 24da4d3 ("MINOR: tools: use const for read only pointers in ip{cmp,cpy}") - c886fb5 ("MINOR: server/ip: centralize server ip updates") - event_hdl API (which was first seen on 2.8) + 683b2ae ("MINOR: server/event_hdl: add SERVER_INETADDR event") + BUG/MEDIUM: server/event_hdl: memory overrun in _srv_event_hdl_prepare_inetaddr() + "MINOR: event_hdl: add global tunables" Note that the patch may be reworked so that it doesn't depend on event_hdl API for older versions, the approach would remain the same: this would result in a larger patch due to the need to manually implement a global queue of pending updates with its dedicated task responsible for picking updates and comitting them. An alternative approach could consist in per-server, lock-protected, temporary addr:svc_port storage dedicated to "updaters" were only the most recent values would be kept. The sync task would then use them as source values to atomically update the addr:svc_port members that the runtime readers are actually using.
2023-11-13 12:14:24 -05:00
/*
* Function executed by server_atomic_sync_task to perform atomic updates on
* compatible server struct members that are not guarded by any lock since
* they are not supposed to change often and are subject to being used in
* sensitive codepaths
*
* Some updates may require thread isolation: we start without isolation
* but as soon as we encounter an event that requires isolation, we do so.
* Once the event is processed, we keep the isolation until we've processed
* the whole batch of events and leave isolation once we're done, as it would
* be very costly to try to acquire isolation multiple times in a row.
* The task will limit itself to a number of events per run to prevent
* thread contention (see: "tune.events.max-events-at-once").
*
* TODO: if we find out that enforcing isolation is too costly, we may
* consider adding thread_isolate_try_full(timeout) or equivalent to the
* thread API so that we can do our best not to block harmless threads
* for too long if one or multiple threads are still heavily busy. This
* would mean that the task would be capable of rescheduling itself to
* start again on the current event if it failed to acquire thread
* isolation. This would also imply that the event_hdl API allows us
* to check an event without popping it from the queue first (remove the
* event once it is successfully processed).
*/
static void srv_set_addr_desc(struct server *s, int reattach);
static struct task *server_atomic_sync(struct task *task, void *context, unsigned int state)
{
unsigned int remain = event_hdl_tune.max_events_at_once; // to limit max number of events per batch
struct event_hdl_async_event *event;
BUG_ON(remain == 0); // event_hdl_tune.max_events_at_once is expected to be > 0
BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates For inet families (IP4/IP6), it is expected that server's addr/port might be updated at runtime from DNS, cli or lua for instance. Such updates were performed under the server's lock. Unfortunately, most readers such as backend.c or sink.c perform the read without taking server's lock because they can't afford slowing down their processing for a type of event which is normally rare. But this could result in bad values being read for the server addr:svc_port tuple (ie: during connection etablishment) as a result of concurrent updates from external components, which can obviously cause some undesirable effects. Instead of slowing the readers down, as we consider server's addr changes are relatively rare, we take another approach and try to update the addr:port atomically by performing changes under full thread isolation when a new change is requested. The changes are performed by a dedicated task which takes care of isolating the current thread and doesn't depend on other threads (independent code path) to protect against dead locks. As such, server's addr:port changes will now be performed atomically, but they will not be processed instantly, they will be translated to events that the dedicated task will pick up from time to time to apply the pending changes. This bug existed for a very long time and has never been reported so far. It was discovered by reading the code during the implementation of log backend ("mode log" in backends). As it involves changes in sensitive areas as well as thread isolation, it is probably not worth considering backporting it for now, unless it is proven that it will help to solve bugs that are actually encountered in the field. This patch depends on: - 24da4d3 ("MINOR: tools: use const for read only pointers in ip{cmp,cpy}") - c886fb5 ("MINOR: server/ip: centralize server ip updates") - event_hdl API (which was first seen on 2.8) + 683b2ae ("MINOR: server/event_hdl: add SERVER_INETADDR event") + BUG/MEDIUM: server/event_hdl: memory overrun in _srv_event_hdl_prepare_inetaddr() + "MINOR: event_hdl: add global tunables" Note that the patch may be reworked so that it doesn't depend on event_hdl API for older versions, the approach would remain the same: this would result in a larger patch due to the need to manually implement a global queue of pending updates with its dedicated task responsible for picking updates and comitting them. An alternative approach could consist in per-server, lock-protected, temporary addr:svc_port storage dedicated to "updaters" were only the most recent values would be kept. The sync task would then use them as source values to atomically update the addr:svc_port members that the runtime readers are actually using.
2023-11-13 12:14:24 -05:00
/* check for new server events that we care about */
BUG/MEDIUM: server/addr: fix tune.events.max-events-at-once event miss and leak An issue has been introduced with cd99440 ("BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates"). Indeed, in the above commit we implemented the atomic_sync task which is responsible for consuming pending server events to apply the changes atomically. For now only server's addr updates are concerned. To prevent the task from causing contention, a budget was assigned to it. It can be controlled with the global tunable 'tune.events.max-events-at-once': the task may not process more than this number of events at once. However, a bug was introduced with this budget logic: each time the task has to be interrupted because it runs out of budget, we reschedule the task to finish where it left off, but the current event which was already removed from the queue wasn't processed yet. This means that this pending event (each tune.events.max-events-at-once) is effectively lost. When the atomic_sync task deals with large number of concurrent events, this bug has 2 known consequences: first a server's addr/port update will be lost every 'tune.events.max-events-at-once'. This can of course cause reliability issues because if the event is not republished periodically, the server could stay in a stale state for indefinite amount of time. This is the case when the DNS server flaps for instance: some servers may not come back UP after the incident as described in GH #2666. Another issue is that the lost event was not cleaned up, resulting in a small memory leak. So in the end, it means that the bug is likely to cause more and more degradation over time until haproxy is restarted. As a workaround, 'tune.events.max-events-at-once' may be set to the maximum number of events expected per batch. Note however that this value cannot exceed 10 000, otherwise it could cause the watchdog to trigger due to the task being busy for too long and preventing other threads from making any progress. Setting higher values may not be optimal for common workloads so it should only be used to mitigate the bug while waiting for this fix. Since tune.events.max-events-at-once defaults to 100, this bug only affects configs that involve more than 100 servers whose addr:port properties are likely to be updated at the same time (batched updates from cli, lua, dns..) To fix the bug, we move the budget check after the current event is fully handled. For that we went from a basic 'while' to 'do..while' loop as we assume from the config that 'tune.events.max-events-at-once' cannot be 0. While at it, we reschedule the task once thread isolation ends (it was not required to perform the reschedule while under isolation) to give the hand back faster to waiting threads. This patch should be backported up to 2.9 with cd99440. It should fix GH #2666.
2024-08-06 08:29:56 -04:00
do {
event = event_hdl_async_equeue_pop(&server_atomic_sync_queue);
if (!event)
break; /* no events in queue */
BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates For inet families (IP4/IP6), it is expected that server's addr/port might be updated at runtime from DNS, cli or lua for instance. Such updates were performed under the server's lock. Unfortunately, most readers such as backend.c or sink.c perform the read without taking server's lock because they can't afford slowing down their processing for a type of event which is normally rare. But this could result in bad values being read for the server addr:svc_port tuple (ie: during connection etablishment) as a result of concurrent updates from external components, which can obviously cause some undesirable effects. Instead of slowing the readers down, as we consider server's addr changes are relatively rare, we take another approach and try to update the addr:port atomically by performing changes under full thread isolation when a new change is requested. The changes are performed by a dedicated task which takes care of isolating the current thread and doesn't depend on other threads (independent code path) to protect against dead locks. As such, server's addr:port changes will now be performed atomically, but they will not be processed instantly, they will be translated to events that the dedicated task will pick up from time to time to apply the pending changes. This bug existed for a very long time and has never been reported so far. It was discovered by reading the code during the implementation of log backend ("mode log" in backends). As it involves changes in sensitive areas as well as thread isolation, it is probably not worth considering backporting it for now, unless it is proven that it will help to solve bugs that are actually encountered in the field. This patch depends on: - 24da4d3 ("MINOR: tools: use const for read only pointers in ip{cmp,cpy}") - c886fb5 ("MINOR: server/ip: centralize server ip updates") - event_hdl API (which was first seen on 2.8) + 683b2ae ("MINOR: server/event_hdl: add SERVER_INETADDR event") + BUG/MEDIUM: server/event_hdl: memory overrun in _srv_event_hdl_prepare_inetaddr() + "MINOR: event_hdl: add global tunables" Note that the patch may be reworked so that it doesn't depend on event_hdl API for older versions, the approach would remain the same: this would result in a larger patch due to the need to manually implement a global queue of pending updates with its dedicated task responsible for picking updates and comitting them. An alternative approach could consist in per-server, lock-protected, temporary addr:svc_port storage dedicated to "updaters" were only the most recent values would be kept. The sync task would then use them as source values to atomically update the addr:svc_port members that the runtime readers are actually using.
2023-11-13 12:14:24 -05:00
if (event_hdl_sub_type_equal(event->type, EVENT_HDL_SUB_END)) {
/* ending event: no more events to come */
event_hdl_async_free_event(event);
task_destroy(task);
task = NULL;
break;
}
/* new event to process */
if (event_hdl_sub_type_equal(event->type, EVENT_HDL_SUB_SERVER_INETADDR)) {
struct sockaddr_storage new_addr;
struct event_hdl_cb_data_server_inetaddr *data = event->data;
struct proxy *px;
struct server *srv;
/* server ip:port changed, we must atomically update data members
* to prevent invalid reads by other threads.
*/
BUG/MEDIUM: server: fix race on server_atomic_sync() The following patch fixes a race condition during server addr/port update : cd994407a9545a8d84e410dc0cc18c30966b70d8 BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates The new update mechanism is implemented via an event update. It uses thread isolation to guarantee that no other thread is accessing server addr/port. Furthermore, to ensure server instance is not deleted just before the event handler, server instance is lookup via its ID in proxy tree. However, thread isolation is only entered after server lookup. This leaves a tiny race condition as the thread will be marked as harmless and a concurrent thread can delete the server in the meantime. This causes server_atomic_sync() to manipulated a deleted server instance to reinsert it in used_server_addr backend tree. This can cause a segfault during this operation or possibly on a future used_server_addr tree access. This issue was detected by criteo. Several backtraces were retrieved, each related to server addr_node insert or delete operation, either in srv_set_addr_desc(), or add/delete dynamic server handlers. To fix this, simply extend thread isolation section to start it before server lookup. This ensures that once retrieved the server cannot be deleted until its addr/port are updated. To ensure this issue won't happen anymore, a new BUG_ON() is added in srv_set_addr_desc(). Also note that ebpt_delete() is now called every time on delete handler as this is a safe idempotent operation. To reproduce these crashes, a script was executed to add then remove different servers every second. In parallel, the following CLI command was issued repeatdly without any delay to force multiple update on servers port : set server <srv> addr 0.0.0.0 port $((1024 + RANDOM % 1024)) This must be backported at least up to 3.0. If above mentionned patch has been selected for previous version, this commit must also be backported on them.
2024-07-02 12:14:57 -04:00
/*
* this requires thread isolation, which is safe since we're the only
* task working for the current subscription and we don't hold locks
* or resources that other threads may depend on to complete a running
* cycle. Note that we do this way because we assume that this event is
* rather rare.
*/
if (!thread_isolated())
thread_isolate_full();
BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates For inet families (IP4/IP6), it is expected that server's addr/port might be updated at runtime from DNS, cli or lua for instance. Such updates were performed under the server's lock. Unfortunately, most readers such as backend.c or sink.c perform the read without taking server's lock because they can't afford slowing down their processing for a type of event which is normally rare. But this could result in bad values being read for the server addr:svc_port tuple (ie: during connection etablishment) as a result of concurrent updates from external components, which can obviously cause some undesirable effects. Instead of slowing the readers down, as we consider server's addr changes are relatively rare, we take another approach and try to update the addr:port atomically by performing changes under full thread isolation when a new change is requested. The changes are performed by a dedicated task which takes care of isolating the current thread and doesn't depend on other threads (independent code path) to protect against dead locks. As such, server's addr:port changes will now be performed atomically, but they will not be processed instantly, they will be translated to events that the dedicated task will pick up from time to time to apply the pending changes. This bug existed for a very long time and has never been reported so far. It was discovered by reading the code during the implementation of log backend ("mode log" in backends). As it involves changes in sensitive areas as well as thread isolation, it is probably not worth considering backporting it for now, unless it is proven that it will help to solve bugs that are actually encountered in the field. This patch depends on: - 24da4d3 ("MINOR: tools: use const for read only pointers in ip{cmp,cpy}") - c886fb5 ("MINOR: server/ip: centralize server ip updates") - event_hdl API (which was first seen on 2.8) + 683b2ae ("MINOR: server/event_hdl: add SERVER_INETADDR event") + BUG/MEDIUM: server/event_hdl: memory overrun in _srv_event_hdl_prepare_inetaddr() + "MINOR: event_hdl: add global tunables" Note that the patch may be reworked so that it doesn't depend on event_hdl API for older versions, the approach would remain the same: this would result in a larger patch due to the need to manually implement a global queue of pending updates with its dedicated task responsible for picking updates and comitting them. An alternative approach could consist in per-server, lock-protected, temporary addr:svc_port storage dedicated to "updaters" were only the most recent values would be kept. The sync task would then use them as source values to atomically update the addr:svc_port members that the runtime readers are actually using.
2023-11-13 12:14:24 -05:00
/* check if related server still exists */
px = proxy_find_by_id(data->server.safe.proxy_uuid, PR_CAP_BE, 0);
if (!px)
continue;
srv = server_find_by_id_unique(px, data->server.safe.puid, data->server.safe.rid);
BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates For inet families (IP4/IP6), it is expected that server's addr/port might be updated at runtime from DNS, cli or lua for instance. Such updates were performed under the server's lock. Unfortunately, most readers such as backend.c or sink.c perform the read without taking server's lock because they can't afford slowing down their processing for a type of event which is normally rare. But this could result in bad values being read for the server addr:svc_port tuple (ie: during connection etablishment) as a result of concurrent updates from external components, which can obviously cause some undesirable effects. Instead of slowing the readers down, as we consider server's addr changes are relatively rare, we take another approach and try to update the addr:port atomically by performing changes under full thread isolation when a new change is requested. The changes are performed by a dedicated task which takes care of isolating the current thread and doesn't depend on other threads (independent code path) to protect against dead locks. As such, server's addr:port changes will now be performed atomically, but they will not be processed instantly, they will be translated to events that the dedicated task will pick up from time to time to apply the pending changes. This bug existed for a very long time and has never been reported so far. It was discovered by reading the code during the implementation of log backend ("mode log" in backends). As it involves changes in sensitive areas as well as thread isolation, it is probably not worth considering backporting it for now, unless it is proven that it will help to solve bugs that are actually encountered in the field. This patch depends on: - 24da4d3 ("MINOR: tools: use const for read only pointers in ip{cmp,cpy}") - c886fb5 ("MINOR: server/ip: centralize server ip updates") - event_hdl API (which was first seen on 2.8) + 683b2ae ("MINOR: server/event_hdl: add SERVER_INETADDR event") + BUG/MEDIUM: server/event_hdl: memory overrun in _srv_event_hdl_prepare_inetaddr() + "MINOR: event_hdl: add global tunables" Note that the patch may be reworked so that it doesn't depend on event_hdl API for older versions, the approach would remain the same: this would result in a larger patch due to the need to manually implement a global queue of pending updates with its dedicated task responsible for picking updates and comitting them. An alternative approach could consist in per-server, lock-protected, temporary addr:svc_port storage dedicated to "updaters" were only the most recent values would be kept. The sync task would then use them as source values to atomically update the addr:svc_port members that the runtime readers are actually using.
2023-11-13 12:14:24 -05:00
if (!srv)
continue;
/* prepare new addr based on event cb data */
memset(&new_addr, 0, sizeof(new_addr));
new_addr.ss_family = data->safe.next.family;
switch (new_addr.ss_family) {
case AF_INET:
((struct sockaddr_in *)&new_addr)->sin_addr.s_addr =
data->safe.next.addr.v4.s_addr;
break;
case AF_INET6:
memcpy(&((struct sockaddr_in6 *)&new_addr)->sin6_addr,
&data->safe.next.addr.v6,
sizeof(struct in6_addr));
break;
case AF_UNSPEC:
/* addr reset, nothing to do */
break;
BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates For inet families (IP4/IP6), it is expected that server's addr/port might be updated at runtime from DNS, cli or lua for instance. Such updates were performed under the server's lock. Unfortunately, most readers such as backend.c or sink.c perform the read without taking server's lock because they can't afford slowing down their processing for a type of event which is normally rare. But this could result in bad values being read for the server addr:svc_port tuple (ie: during connection etablishment) as a result of concurrent updates from external components, which can obviously cause some undesirable effects. Instead of slowing the readers down, as we consider server's addr changes are relatively rare, we take another approach and try to update the addr:port atomically by performing changes under full thread isolation when a new change is requested. The changes are performed by a dedicated task which takes care of isolating the current thread and doesn't depend on other threads (independent code path) to protect against dead locks. As such, server's addr:port changes will now be performed atomically, but they will not be processed instantly, they will be translated to events that the dedicated task will pick up from time to time to apply the pending changes. This bug existed for a very long time and has never been reported so far. It was discovered by reading the code during the implementation of log backend ("mode log" in backends). As it involves changes in sensitive areas as well as thread isolation, it is probably not worth considering backporting it for now, unless it is proven that it will help to solve bugs that are actually encountered in the field. This patch depends on: - 24da4d3 ("MINOR: tools: use const for read only pointers in ip{cmp,cpy}") - c886fb5 ("MINOR: server/ip: centralize server ip updates") - event_hdl API (which was first seen on 2.8) + 683b2ae ("MINOR: server/event_hdl: add SERVER_INETADDR event") + BUG/MEDIUM: server/event_hdl: memory overrun in _srv_event_hdl_prepare_inetaddr() + "MINOR: event_hdl: add global tunables" Note that the patch may be reworked so that it doesn't depend on event_hdl API for older versions, the approach would remain the same: this would result in a larger patch due to the need to manually implement a global queue of pending updates with its dedicated task responsible for picking updates and comitting them. An alternative approach could consist in per-server, lock-protected, temporary addr:svc_port storage dedicated to "updaters" were only the most recent values would be kept. The sync task would then use them as source values to atomically update the addr:svc_port members that the runtime readers are actually using.
2023-11-13 12:14:24 -05:00
default:
/* should not happen */
break;
}
/* apply new addr:port combination */
_srv_set_inetaddr_port(srv, &new_addr,
data->safe.next.port.svc, data->safe.next.port.map);
BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates For inet families (IP4/IP6), it is expected that server's addr/port might be updated at runtime from DNS, cli or lua for instance. Such updates were performed under the server's lock. Unfortunately, most readers such as backend.c or sink.c perform the read without taking server's lock because they can't afford slowing down their processing for a type of event which is normally rare. But this could result in bad values being read for the server addr:svc_port tuple (ie: during connection etablishment) as a result of concurrent updates from external components, which can obviously cause some undesirable effects. Instead of slowing the readers down, as we consider server's addr changes are relatively rare, we take another approach and try to update the addr:port atomically by performing changes under full thread isolation when a new change is requested. The changes are performed by a dedicated task which takes care of isolating the current thread and doesn't depend on other threads (independent code path) to protect against dead locks. As such, server's addr:port changes will now be performed atomically, but they will not be processed instantly, they will be translated to events that the dedicated task will pick up from time to time to apply the pending changes. This bug existed for a very long time and has never been reported so far. It was discovered by reading the code during the implementation of log backend ("mode log" in backends). As it involves changes in sensitive areas as well as thread isolation, it is probably not worth considering backporting it for now, unless it is proven that it will help to solve bugs that are actually encountered in the field. This patch depends on: - 24da4d3 ("MINOR: tools: use const for read only pointers in ip{cmp,cpy}") - c886fb5 ("MINOR: server/ip: centralize server ip updates") - event_hdl API (which was first seen on 2.8) + 683b2ae ("MINOR: server/event_hdl: add SERVER_INETADDR event") + BUG/MEDIUM: server/event_hdl: memory overrun in _srv_event_hdl_prepare_inetaddr() + "MINOR: event_hdl: add global tunables" Note that the patch may be reworked so that it doesn't depend on event_hdl API for older versions, the approach would remain the same: this would result in a larger patch due to the need to manually implement a global queue of pending updates with its dedicated task responsible for picking updates and comitting them. An alternative approach could consist in per-server, lock-protected, temporary addr:svc_port storage dedicated to "updaters" were only the most recent values would be kept. The sync task would then use them as source values to atomically update the addr:svc_port members that the runtime readers are actually using.
2023-11-13 12:14:24 -05:00
/* propagate the changes, force connection cleanup */
if (new_addr.ss_family != AF_UNSPEC &&
(srv->next_admin & SRV_ADMF_RMAINT)) {
/* server was previously put under DNS maintenance due
* to DNS error, but addr resolves again, so we must
* put it out of maintenance
*/
srv_clr_admin_flag(srv, SRV_ADMF_RMAINT);
/* thanks to valid DNS resolution? */
if (data->safe.updater.dns) {
chunk_reset(&trash);
chunk_printf(&trash, "Server %s/%s administratively READY thanks to valid DNS answer", srv->proxy->id, srv->id);
ha_warning("%s.\n", trash.area);
send_log(srv->proxy, LOG_NOTICE, "%s.\n", trash.area);
}
}
srv_cleanup_connections(srv);
BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates For inet families (IP4/IP6), it is expected that server's addr/port might be updated at runtime from DNS, cli or lua for instance. Such updates were performed under the server's lock. Unfortunately, most readers such as backend.c or sink.c perform the read without taking server's lock because they can't afford slowing down their processing for a type of event which is normally rare. But this could result in bad values being read for the server addr:svc_port tuple (ie: during connection etablishment) as a result of concurrent updates from external components, which can obviously cause some undesirable effects. Instead of slowing the readers down, as we consider server's addr changes are relatively rare, we take another approach and try to update the addr:port atomically by performing changes under full thread isolation when a new change is requested. The changes are performed by a dedicated task which takes care of isolating the current thread and doesn't depend on other threads (independent code path) to protect against dead locks. As such, server's addr:port changes will now be performed atomically, but they will not be processed instantly, they will be translated to events that the dedicated task will pick up from time to time to apply the pending changes. This bug existed for a very long time and has never been reported so far. It was discovered by reading the code during the implementation of log backend ("mode log" in backends). As it involves changes in sensitive areas as well as thread isolation, it is probably not worth considering backporting it for now, unless it is proven that it will help to solve bugs that are actually encountered in the field. This patch depends on: - 24da4d3 ("MINOR: tools: use const for read only pointers in ip{cmp,cpy}") - c886fb5 ("MINOR: server/ip: centralize server ip updates") - event_hdl API (which was first seen on 2.8) + 683b2ae ("MINOR: server/event_hdl: add SERVER_INETADDR event") + BUG/MEDIUM: server/event_hdl: memory overrun in _srv_event_hdl_prepare_inetaddr() + "MINOR: event_hdl: add global tunables" Note that the patch may be reworked so that it doesn't depend on event_hdl API for older versions, the approach would remain the same: this would result in a larger patch due to the need to manually implement a global queue of pending updates with its dedicated task responsible for picking updates and comitting them. An alternative approach could consist in per-server, lock-protected, temporary addr:svc_port storage dedicated to "updaters" were only the most recent values would be kept. The sync task would then use them as source values to atomically update the addr:svc_port members that the runtime readers are actually using.
2023-11-13 12:14:24 -05:00
srv_set_dyncookie(srv);
srv_set_addr_desc(srv, 1);
}
event_hdl_async_free_event(event);
} while (--remain);
BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates For inet families (IP4/IP6), it is expected that server's addr/port might be updated at runtime from DNS, cli or lua for instance. Such updates were performed under the server's lock. Unfortunately, most readers such as backend.c or sink.c perform the read without taking server's lock because they can't afford slowing down their processing for a type of event which is normally rare. But this could result in bad values being read for the server addr:svc_port tuple (ie: during connection etablishment) as a result of concurrent updates from external components, which can obviously cause some undesirable effects. Instead of slowing the readers down, as we consider server's addr changes are relatively rare, we take another approach and try to update the addr:port atomically by performing changes under full thread isolation when a new change is requested. The changes are performed by a dedicated task which takes care of isolating the current thread and doesn't depend on other threads (independent code path) to protect against dead locks. As such, server's addr:port changes will now be performed atomically, but they will not be processed instantly, they will be translated to events that the dedicated task will pick up from time to time to apply the pending changes. This bug existed for a very long time and has never been reported so far. It was discovered by reading the code during the implementation of log backend ("mode log" in backends). As it involves changes in sensitive areas as well as thread isolation, it is probably not worth considering backporting it for now, unless it is proven that it will help to solve bugs that are actually encountered in the field. This patch depends on: - 24da4d3 ("MINOR: tools: use const for read only pointers in ip{cmp,cpy}") - c886fb5 ("MINOR: server/ip: centralize server ip updates") - event_hdl API (which was first seen on 2.8) + 683b2ae ("MINOR: server/event_hdl: add SERVER_INETADDR event") + BUG/MEDIUM: server/event_hdl: memory overrun in _srv_event_hdl_prepare_inetaddr() + "MINOR: event_hdl: add global tunables" Note that the patch may be reworked so that it doesn't depend on event_hdl API for older versions, the approach would remain the same: this would result in a larger patch due to the need to manually implement a global queue of pending updates with its dedicated task responsible for picking updates and comitting them. An alternative approach could consist in per-server, lock-protected, temporary addr:svc_port storage dedicated to "updaters" were only the most recent values would be kept. The sync task would then use them as source values to atomically update the addr:svc_port members that the runtime readers are actually using.
2023-11-13 12:14:24 -05:00
/* some events possibly required thread_isolation:
* now that we are done, we must leave thread isolation before
* returning
*/
if (thread_isolated())
thread_release();
BUG/MEDIUM: server/addr: fix tune.events.max-events-at-once event miss and leak An issue has been introduced with cd99440 ("BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates"). Indeed, in the above commit we implemented the atomic_sync task which is responsible for consuming pending server events to apply the changes atomically. For now only server's addr updates are concerned. To prevent the task from causing contention, a budget was assigned to it. It can be controlled with the global tunable 'tune.events.max-events-at-once': the task may not process more than this number of events at once. However, a bug was introduced with this budget logic: each time the task has to be interrupted because it runs out of budget, we reschedule the task to finish where it left off, but the current event which was already removed from the queue wasn't processed yet. This means that this pending event (each tune.events.max-events-at-once) is effectively lost. When the atomic_sync task deals with large number of concurrent events, this bug has 2 known consequences: first a server's addr/port update will be lost every 'tune.events.max-events-at-once'. This can of course cause reliability issues because if the event is not republished periodically, the server could stay in a stale state for indefinite amount of time. This is the case when the DNS server flaps for instance: some servers may not come back UP after the incident as described in GH #2666. Another issue is that the lost event was not cleaned up, resulting in a small memory leak. So in the end, it means that the bug is likely to cause more and more degradation over time until haproxy is restarted. As a workaround, 'tune.events.max-events-at-once' may be set to the maximum number of events expected per batch. Note however that this value cannot exceed 10 000, otherwise it could cause the watchdog to trigger due to the task being busy for too long and preventing other threads from making any progress. Setting higher values may not be optimal for common workloads so it should only be used to mitigate the bug while waiting for this fix. Since tune.events.max-events-at-once defaults to 100, this bug only affects configs that involve more than 100 servers whose addr:port properties are likely to be updated at the same time (batched updates from cli, lua, dns..) To fix the bug, we move the budget check after the current event is fully handled. For that we went from a basic 'while' to 'do..while' loop as we assume from the config that 'tune.events.max-events-at-once' cannot be 0. While at it, we reschedule the task once thread isolation ends (it was not required to perform the reschedule while under isolation) to give the hand back faster to waiting threads. This patch should be backported up to 2.9 with cd99440. It should fix GH #2666.
2024-08-06 08:29:56 -04:00
if (!remain) {
/* we stopped because we've already spent all our budget here,
* and considering we possibly were under isolation, we cannot
* keep blocking other threads any longer.
*
* Reschedule the task to finish where we left off if
* there are remaining events in the queue.
*/
BUG_ON(task == NULL); // ending event doesn't decrement remain
BUG/MEDIUM: server/addr: fix tune.events.max-events-at-once event miss and leak An issue has been introduced with cd99440 ("BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates"). Indeed, in the above commit we implemented the atomic_sync task which is responsible for consuming pending server events to apply the changes atomically. For now only server's addr updates are concerned. To prevent the task from causing contention, a budget was assigned to it. It can be controlled with the global tunable 'tune.events.max-events-at-once': the task may not process more than this number of events at once. However, a bug was introduced with this budget logic: each time the task has to be interrupted because it runs out of budget, we reschedule the task to finish where it left off, but the current event which was already removed from the queue wasn't processed yet. This means that this pending event (each tune.events.max-events-at-once) is effectively lost. When the atomic_sync task deals with large number of concurrent events, this bug has 2 known consequences: first a server's addr/port update will be lost every 'tune.events.max-events-at-once'. This can of course cause reliability issues because if the event is not republished periodically, the server could stay in a stale state for indefinite amount of time. This is the case when the DNS server flaps for instance: some servers may not come back UP after the incident as described in GH #2666. Another issue is that the lost event was not cleaned up, resulting in a small memory leak. So in the end, it means that the bug is likely to cause more and more degradation over time until haproxy is restarted. As a workaround, 'tune.events.max-events-at-once' may be set to the maximum number of events expected per batch. Note however that this value cannot exceed 10 000, otherwise it could cause the watchdog to trigger due to the task being busy for too long and preventing other threads from making any progress. Setting higher values may not be optimal for common workloads so it should only be used to mitigate the bug while waiting for this fix. Since tune.events.max-events-at-once defaults to 100, this bug only affects configs that involve more than 100 servers whose addr:port properties are likely to be updated at the same time (batched updates from cli, lua, dns..) To fix the bug, we move the budget check after the current event is fully handled. For that we went from a basic 'while' to 'do..while' loop as we assume from the config that 'tune.events.max-events-at-once' cannot be 0. While at it, we reschedule the task once thread isolation ends (it was not required to perform the reschedule while under isolation) to give the hand back faster to waiting threads. This patch should be backported up to 2.9 with cd99440. It should fix GH #2666.
2024-08-06 08:29:56 -04:00
if (!event_hdl_async_equeue_isempty(&server_atomic_sync_queue))
task_wakeup(task, TASK_WOKEN_OTHER);
}
BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates For inet families (IP4/IP6), it is expected that server's addr/port might be updated at runtime from DNS, cli or lua for instance. Such updates were performed under the server's lock. Unfortunately, most readers such as backend.c or sink.c perform the read without taking server's lock because they can't afford slowing down their processing for a type of event which is normally rare. But this could result in bad values being read for the server addr:svc_port tuple (ie: during connection etablishment) as a result of concurrent updates from external components, which can obviously cause some undesirable effects. Instead of slowing the readers down, as we consider server's addr changes are relatively rare, we take another approach and try to update the addr:port atomically by performing changes under full thread isolation when a new change is requested. The changes are performed by a dedicated task which takes care of isolating the current thread and doesn't depend on other threads (independent code path) to protect against dead locks. As such, server's addr:port changes will now be performed atomically, but they will not be processed instantly, they will be translated to events that the dedicated task will pick up from time to time to apply the pending changes. This bug existed for a very long time and has never been reported so far. It was discovered by reading the code during the implementation of log backend ("mode log" in backends). As it involves changes in sensitive areas as well as thread isolation, it is probably not worth considering backporting it for now, unless it is proven that it will help to solve bugs that are actually encountered in the field. This patch depends on: - 24da4d3 ("MINOR: tools: use const for read only pointers in ip{cmp,cpy}") - c886fb5 ("MINOR: server/ip: centralize server ip updates") - event_hdl API (which was first seen on 2.8) + 683b2ae ("MINOR: server/event_hdl: add SERVER_INETADDR event") + BUG/MEDIUM: server/event_hdl: memory overrun in _srv_event_hdl_prepare_inetaddr() + "MINOR: event_hdl: add global tunables" Note that the patch may be reworked so that it doesn't depend on event_hdl API for older versions, the approach would remain the same: this would result in a larger patch due to the need to manually implement a global queue of pending updates with its dedicated task responsible for picking updates and comitting them. An alternative approach could consist in per-server, lock-protected, temporary addr:svc_port storage dedicated to "updaters" were only the most recent values would be kept. The sync task would then use them as source values to atomically update the addr:svc_port members that the runtime readers are actually using.
2023-11-13 12:14:24 -05:00
return task;
}
/* Try to start the atomic server sync task.
*
* Returns ERR_NONE on success and a combination of ERR_CODE on failure
*/
static int server_atomic_sync_start()
{
struct event_hdl_sub_type subscriptions = EVENT_HDL_SUB_NONE;
if (server_atomic_sync_task)
return ERR_NONE; // nothing to do
server_atomic_sync_task = task_new_anywhere();
if (!server_atomic_sync_task)
goto fail;
server_atomic_sync_task->process = server_atomic_sync;
event_hdl_async_equeue_init(&server_atomic_sync_queue);
/* task created, now subscribe to relevant server events in the global list */
subscriptions = event_hdl_sub_type_add(subscriptions, EVENT_HDL_SUB_SERVER_INETADDR);
if (!event_hdl_subscribe(NULL, subscriptions,
EVENT_HDL_ASYNC_TASK(&server_atomic_sync_queue,
server_atomic_sync_task,
NULL,
NULL)))
goto fail;
return ERR_NONE;
fail:
task_destroy(server_atomic_sync_task);
server_atomic_sync_task = NULL;
return ERR_ALERT | ERR_FATAL;
}
REGISTER_POST_CHECK(server_atomic_sync_start);
/* fill common server event data members struct
* must be called with server lock or under thread isolate
*/
static inline void _srv_event_hdl_prepare(struct event_hdl_cb_data_server *cb_data,
struct server *srv, uint8_t thread_isolate)
{
/* safe data assignments */
cb_data->safe.puid = srv->puid;
cb_data->safe.rid = srv->rid;
cb_data->safe.flags = srv->flags;
snprintf(cb_data->safe.name, sizeof(cb_data->safe.name), "%s", srv->id);
cb_data->safe.proxy_name[0] = '\0';
cb_data->safe.proxy_uuid = -1; /* default value */
if (srv->proxy) {
cb_data->safe.proxy_uuid = srv->proxy->uuid;
snprintf(cb_data->safe.proxy_name, sizeof(cb_data->safe.proxy_name), "%s", srv->proxy->id);
}
/* unsafe data assignments */
cb_data->unsafe.ptr = srv;
cb_data->unsafe.thread_isolate = thread_isolate;
cb_data->unsafe.srv_lock = !thread_isolate;
}
/* take an event-check snapshot from a live check */
void _srv_event_hdl_prepare_checkres(struct event_hdl_cb_data_server_checkres *checkres,
struct check *check)
{
checkres->agent = !!(check->state & CHK_ST_AGENT);
checkres->result = check->result;
checkres->duration = check->duration;
checkres->reason.status = check->status;
checkres->reason.code = check->code;
checkres->health.cur = check->health;
checkres->health.rise = check->rise;
checkres->health.fall = check->fall;
}
/* Prepare SERVER_STATE event
*
* This special event will contain extra hints related to the state change
*
* Must be called with server lock held
*/
void _srv_event_hdl_prepare_state(struct event_hdl_cb_data_server_state *cb_data,
struct server *srv, int type, int cause,
enum srv_state prev_state, int requeued)
{
/* state event provides additional info about the server state change */
cb_data->safe.type = type;
cb_data->safe.new_state = srv->cur_state;
cb_data->safe.old_state = prev_state;
cb_data->safe.requeued = requeued;
if (type) {
/* administrative */
cb_data->safe.adm_st_chg.cause = cause;
}
else {
/* operational */
cb_data->safe.op_st_chg.cause = cause;
if (cause == SRV_OP_STCHGC_HEALTH || cause == SRV_OP_STCHGC_AGENT) {
struct check *check = (cause == SRV_OP_STCHGC_HEALTH) ? &srv->check : &srv->agent;
/* provide additional check-related state change result */
_srv_event_hdl_prepare_checkres(&cb_data->safe.op_st_chg.check, check);
}
}
}
/* Prepare SERVER_INETADDR event, prev data is learned from the current
* server settings.
*
* This special event will contain extra hints related to the addr change
*
* Must be called with the server lock held.
*/
static void _srv_event_hdl_prepare_inetaddr(struct event_hdl_cb_data_server_inetaddr *cb_data,
struct server *srv,
const struct server_inetaddr *next_inetaddr,
struct server_inetaddr_updater updater)
{
struct server_inetaddr prev_inetaddr;
server_get_inetaddr(srv, &prev_inetaddr);
/* only INET families are supported */
BUG_ON((next_inetaddr->family != AF_UNSPEC &&
next_inetaddr->family != AF_INET && next_inetaddr->family != AF_INET6));
/* prev */
cb_data->safe.prev = prev_inetaddr;
/* next */
cb_data->safe.next = *next_inetaddr;
/* updater */
cb_data->safe.updater = updater;
}
/* server event publishing helper: publish in both global and
* server dedicated subscription list.
*/
#define _srv_event_hdl_publish(e, d, s) \
({ \
/* publish in server dedicated sub list */ \
event_hdl_publish(&s->e_subs, e, EVENT_HDL_CB_DATA(&d));\
/* publish in global subscription list */ \
event_hdl_publish(NULL, e, EVENT_HDL_CB_DATA(&d)); \
})
/* General server event publishing:
* Use this to publish EVENT_HDL_SUB_SERVER family type event
* from srv facility.
*
* server ptr must be valid.
* Must be called with srv lock or under thread_isolate.
*/
static void srv_event_hdl_publish(struct event_hdl_sub_type event,
struct server *srv, uint8_t thread_isolate)
{
struct event_hdl_cb_data_server cb_data;
/* prepare event data */
_srv_event_hdl_prepare(&cb_data, srv, thread_isolate);
_srv_event_hdl_publish(event, cb_data, srv);
}
/* Publish SERVER_CHECK event
*
* This special event will contain extra hints related to the check itself
*
* Must be called with server lock held
*/
void srv_event_hdl_publish_check(struct server *srv, struct check *check)
{
struct event_hdl_cb_data_server_check cb_data;
/* check event provides additional info about the server check */
_srv_event_hdl_prepare_checkres(&cb_data.safe.res, check);
cb_data.unsafe.ptr = check;
/* prepare event data (common server data) */
_srv_event_hdl_prepare((struct event_hdl_cb_data_server *)&cb_data, srv, 0);
_srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_CHECK, cb_data, srv);
}
/*
* Check that we did not get a hash collision.
* Unlikely, but it can happen. The server's proxy must be at least
* read-locked.
*/
static inline void srv_check_for_dup_dyncookie(struct server *s)
{
struct proxy *p = s->proxy;
struct server *tmpserv;
for (tmpserv = p->srv; tmpserv != NULL;
tmpserv = tmpserv->next) {
if (tmpserv == s)
continue;
if (tmpserv->next_admin & SRV_ADMF_FMAINT)
continue;
if (tmpserv->cookie &&
strcmp(tmpserv->cookie, s->cookie) == 0) {
ha_warning("We generated two equal cookies for two different servers.\n"
"Please change the secret key for '%s'.\n",
s->proxy->id);
}
}
}
/*
* Must be called with the server lock held, and will read-lock the proxy.
*/
void srv_set_dyncookie(struct server *s)
{
struct proxy *p = s->proxy;
char *tmpbuf;
unsigned long long hash_value;
size_t key_len;
size_t buffer_len;
int addr_len;
int port;
HA_RWLOCK_RDLOCK(PROXY_LOCK, &p->lock);
BUG/MAJOR: queue/threads: avoid an AB/BA locking issue in process_srv_queue() A problem involving server slowstart was reported by @max2k1 in issue #197. The problem is that pendconn_grab_from_px() takes the proxy lock while already under the server's lock while process_srv_queue() first takes the proxy's lock then the server's lock. While the latter seems more natural, it is fundamentally incompatible with mayn other operations performed on servers, namely state change propagation, where the proxy is only known after the server and cannot be locked around the servers. Howwever reversing the lock in process_srv_queue() is trivial and only the few functions related to dynamic cookies need to be adjusted for this so that the proxy's lock is taken for each server operation. This is possible because the proxy's server list is built once at boot time and remains stable. So this is what this patch does. The comments in the proxy and server structs were updated to mention this rule that the server's lock may not be taken under the proxy's lock but may enclose it. Another approach could consist in using a second lock for the proxy's queue which would be different from the regular proxy's lock, but given that the operations above are rare and operate on small servers list, there is no reason for overdesigning a solution. This fix was successfully tested with 10000 servers in a backend where adjusting the dyncookies in loops over the CLI didn't have a measurable impact on the traffic. The only workaround without the fix is to disable any occurrence of "slowstart" on server lines, or to disable threads using "nbthread 1". This must be backported as far as 1.8.
2019-07-30 05:59:34 -04:00
if ((s->flags & SRV_F_COOKIESET) ||
!(s->proxy->ck_opts & PR_CK_DYNAMIC) ||
s->proxy->dyncookie_key == NULL)
BUG/MAJOR: queue/threads: avoid an AB/BA locking issue in process_srv_queue() A problem involving server slowstart was reported by @max2k1 in issue #197. The problem is that pendconn_grab_from_px() takes the proxy lock while already under the server's lock while process_srv_queue() first takes the proxy's lock then the server's lock. While the latter seems more natural, it is fundamentally incompatible with mayn other operations performed on servers, namely state change propagation, where the proxy is only known after the server and cannot be locked around the servers. Howwever reversing the lock in process_srv_queue() is trivial and only the few functions related to dynamic cookies need to be adjusted for this so that the proxy's lock is taken for each server operation. This is possible because the proxy's server list is built once at boot time and remains stable. So this is what this patch does. The comments in the proxy and server structs were updated to mention this rule that the server's lock may not be taken under the proxy's lock but may enclose it. Another approach could consist in using a second lock for the proxy's queue which would be different from the regular proxy's lock, but given that the operations above are rare and operate on small servers list, there is no reason for overdesigning a solution. This fix was successfully tested with 10000 servers in a backend where adjusting the dyncookies in loops over the CLI didn't have a measurable impact on the traffic. The only workaround without the fix is to disable any occurrence of "slowstart" on server lines, or to disable threads using "nbthread 1". This must be backported as far as 1.8.
2019-07-30 05:59:34 -04:00
goto out;
key_len = strlen(p->dyncookie_key);
if (s->addr.ss_family != AF_INET &&
s->addr.ss_family != AF_INET6)
BUG/MAJOR: queue/threads: avoid an AB/BA locking issue in process_srv_queue() A problem involving server slowstart was reported by @max2k1 in issue #197. The problem is that pendconn_grab_from_px() takes the proxy lock while already under the server's lock while process_srv_queue() first takes the proxy's lock then the server's lock. While the latter seems more natural, it is fundamentally incompatible with mayn other operations performed on servers, namely state change propagation, where the proxy is only known after the server and cannot be locked around the servers. Howwever reversing the lock in process_srv_queue() is trivial and only the few functions related to dynamic cookies need to be adjusted for this so that the proxy's lock is taken for each server operation. This is possible because the proxy's server list is built once at boot time and remains stable. So this is what this patch does. The comments in the proxy and server structs were updated to mention this rule that the server's lock may not be taken under the proxy's lock but may enclose it. Another approach could consist in using a second lock for the proxy's queue which would be different from the regular proxy's lock, but given that the operations above are rare and operate on small servers list, there is no reason for overdesigning a solution. This fix was successfully tested with 10000 servers in a backend where adjusting the dyncookies in loops over the CLI didn't have a measurable impact on the traffic. The only workaround without the fix is to disable any occurrence of "slowstart" on server lines, or to disable threads using "nbthread 1". This must be backported as far as 1.8.
2019-07-30 05:59:34 -04:00
goto out;
/*
* Buffer to calculate the cookie value.
* The buffer contains the secret key + the server IP address
* + the TCP port.
*/
addr_len = (s->addr.ss_family == AF_INET) ? 4 : 16;
/*
* The TCP port should use only 2 bytes, but is stored in
* an unsigned int in struct server, so let's use 4, to be
* on the safe side.
*/
buffer_len = key_len + addr_len + 4;
tmpbuf = trash.area;
memcpy(tmpbuf, p->dyncookie_key, key_len);
memcpy(&(tmpbuf[key_len]),
s->addr.ss_family == AF_INET ?
(void *)&((struct sockaddr_in *)&s->addr)->sin_addr.s_addr :
(void *)&(((struct sockaddr_in6 *)&s->addr)->sin6_addr.s6_addr),
addr_len);
/*
* Make sure it's the same across all the load balancers,
* no matter their endianness.
*/
port = htonl(s->svc_port);
memcpy(&tmpbuf[key_len + addr_len], &port, 4);
hash_value = XXH64(tmpbuf, buffer_len, 0);
memprintf(&s->cookie, "%016llx", hash_value);
if (!s->cookie)
BUG/MAJOR: queue/threads: avoid an AB/BA locking issue in process_srv_queue() A problem involving server slowstart was reported by @max2k1 in issue #197. The problem is that pendconn_grab_from_px() takes the proxy lock while already under the server's lock while process_srv_queue() first takes the proxy's lock then the server's lock. While the latter seems more natural, it is fundamentally incompatible with mayn other operations performed on servers, namely state change propagation, where the proxy is only known after the server and cannot be locked around the servers. Howwever reversing the lock in process_srv_queue() is trivial and only the few functions related to dynamic cookies need to be adjusted for this so that the proxy's lock is taken for each server operation. This is possible because the proxy's server list is built once at boot time and remains stable. So this is what this patch does. The comments in the proxy and server structs were updated to mention this rule that the server's lock may not be taken under the proxy's lock but may enclose it. Another approach could consist in using a second lock for the proxy's queue which would be different from the regular proxy's lock, but given that the operations above are rare and operate on small servers list, there is no reason for overdesigning a solution. This fix was successfully tested with 10000 servers in a backend where adjusting the dyncookies in loops over the CLI didn't have a measurable impact on the traffic. The only workaround without the fix is to disable any occurrence of "slowstart" on server lines, or to disable threads using "nbthread 1". This must be backported as far as 1.8.
2019-07-30 05:59:34 -04:00
goto out;
s->cklen = 16;
/* Don't bother checking if the dyncookie is duplicated if
* the server is marked as "disabled", maybe it doesn't have
* its real IP yet, but just a place holder.
*/
if (!(s->next_admin & SRV_ADMF_FMAINT))
srv_check_for_dup_dyncookie(s);
BUG/MAJOR: queue/threads: avoid an AB/BA locking issue in process_srv_queue() A problem involving server slowstart was reported by @max2k1 in issue #197. The problem is that pendconn_grab_from_px() takes the proxy lock while already under the server's lock while process_srv_queue() first takes the proxy's lock then the server's lock. While the latter seems more natural, it is fundamentally incompatible with mayn other operations performed on servers, namely state change propagation, where the proxy is only known after the server and cannot be locked around the servers. Howwever reversing the lock in process_srv_queue() is trivial and only the few functions related to dynamic cookies need to be adjusted for this so that the proxy's lock is taken for each server operation. This is possible because the proxy's server list is built once at boot time and remains stable. So this is what this patch does. The comments in the proxy and server structs were updated to mention this rule that the server's lock may not be taken under the proxy's lock but may enclose it. Another approach could consist in using a second lock for the proxy's queue which would be different from the regular proxy's lock, but given that the operations above are rare and operate on small servers list, there is no reason for overdesigning a solution. This fix was successfully tested with 10000 servers in a backend where adjusting the dyncookies in loops over the CLI didn't have a measurable impact on the traffic. The only workaround without the fix is to disable any occurrence of "slowstart" on server lines, or to disable threads using "nbthread 1". This must be backported as far as 1.8.
2019-07-30 05:59:34 -04:00
out:
HA_RWLOCK_RDUNLOCK(PROXY_LOCK, &p->lock);
}
/* Returns true if it's possible to reuse an idle connection from server <srv>
* for a websocket stream. This is the case if server is configured to use the
* same protocol for both HTTP and websocket streams. This depends on the value
* of "proto", "alpn" and "ws" keywords.
*/
int srv_check_reuse_ws(struct server *srv)
{
if (srv->mux_proto || srv->use_ssl != 1 || !srv->ssl_ctx.alpn_str) {
/* explicit srv.mux_proto or no ALPN : srv.mux_proto is used
* for mux selection.
*/
const struct ist srv_mux = srv->mux_proto ?
srv->mux_proto->token : IST_NULL;
switch (srv->ws) {
/* "auto" means use the same protocol : reuse is possible. */
case SRV_WS_AUTO:
return 1;
/* "h2" means use h2 for websocket : reuse is possible if
* server mux is h2.
*/
case SRV_WS_H2:
if (srv->mux_proto && isteq(srv_mux, ist("h2")))
return 1;
break;
/* "h1" means use h1 for websocket : reuse is possible if
* server mux is h1.
*/
case SRV_WS_H1:
if (!srv->mux_proto || isteq(srv_mux, ist("h1")))
return 1;
break;
}
}
else {
/* ALPN selection.
* Based on the assumption that only "h2" and "http/1.1" token
* are used on server ALPN.
*/
const struct ist alpn = ist2(srv->ssl_ctx.alpn_str,
srv->ssl_ctx.alpn_len);
switch (srv->ws) {
case SRV_WS_AUTO:
/* for auto mode, consider reuse as possible if the
* server uses a single protocol ALPN
*/
if (!istchr(alpn, ','))
return 1;
break;
case SRV_WS_H2:
return isteq(alpn, ist("\x02h2"));
case SRV_WS_H1:
return isteq(alpn, ist("\x08http/1.1"));
}
}
return 0;
}
/* Return the proto to used for a websocket stream on <srv> without ALPN. NULL
* is a valid value indicating to use the fallback mux.
*/
const struct mux_ops *srv_get_ws_proto(struct server *srv)
{
const struct mux_proto_list *mux = NULL;
switch (srv->ws) {
case SRV_WS_AUTO:
mux = srv->mux_proto;
break;
case SRV_WS_H1:
mux = get_mux_proto(ist("h1"));
break;
case SRV_WS_H2:
mux = get_mux_proto(ist("h2"));
break;
}
return mux ? mux->mux : NULL;
}
/*
* Must be called with the server lock held. The server is first removed from
* the proxy tree if it was already attached. If <reattach> is true, the server
* will then be attached in the proxy tree. The proxy lock is held to
* manipulate the tree.
*/
static void srv_set_addr_desc(struct server *s, int reattach)
{
struct proxy *p = s->proxy;
char *key;
BUG/MEDIUM: server: fix race on server_atomic_sync() The following patch fixes a race condition during server addr/port update : cd994407a9545a8d84e410dc0cc18c30966b70d8 BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates The new update mechanism is implemented via an event update. It uses thread isolation to guarantee that no other thread is accessing server addr/port. Furthermore, to ensure server instance is not deleted just before the event handler, server instance is lookup via its ID in proxy tree. However, thread isolation is only entered after server lookup. This leaves a tiny race condition as the thread will be marked as harmless and a concurrent thread can delete the server in the meantime. This causes server_atomic_sync() to manipulated a deleted server instance to reinsert it in used_server_addr backend tree. This can cause a segfault during this operation or possibly on a future used_server_addr tree access. This issue was detected by criteo. Several backtraces were retrieved, each related to server addr_node insert or delete operation, either in srv_set_addr_desc(), or add/delete dynamic server handlers. To fix this, simply extend thread isolation section to start it before server lookup. This ensures that once retrieved the server cannot be deleted until its addr/port are updated. To ensure this issue won't happen anymore, a new BUG_ON() is added in srv_set_addr_desc(). Also note that ebpt_delete() is now called every time on delete handler as this is a safe idempotent operation. To reproduce these crashes, a script was executed to add then remove different servers every second. In parallel, the following CLI command was issued repeatdly without any delay to force multiple update on servers port : set server <srv> addr 0.0.0.0 port $((1024 + RANDOM % 1024)) This must be backported at least up to 3.0. If above mentionned patch has been selected for previous version, this commit must also be backported on them.
2024-07-02 12:14:57 -04:00
/* Risk of used_server_addr tree corruption if server is already deleted. */
BUG_ON(s->flags & SRV_F_DELETED);
key = sa2str(&s->addr, s->svc_port, s->flags & SRV_F_MAPPORTS);
if (s->addr_key) {
if (key && strcmp(key, s->addr_key) == 0) {
free(key);
return;
}
HA_RWLOCK_WRLOCK(PROXY_LOCK, &p->lock);
cebuis_item_delete(&p->used_server_addr, addr_node, addr_key, s);
HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->lock);
free(s->addr_key);
}
s->addr_key = key;
if (reattach) {
if (s->addr_key) {
HA_RWLOCK_WRLOCK(PROXY_LOCK, &p->lock);
cebuis_item_insert(&p->used_server_addr, addr_node, addr_key, s);
HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->lock);
}
}
}
/*
* Registers the server keyword list <kwl> as a list of valid keywords for next
* parsing sessions.
*/
void srv_register_keywords(struct srv_kw_list *kwl)
{
LIST_APPEND(&srv_keywords.list, &kwl->list);
}
/* Return a pointer to the server keyword <kw>, or NULL if not found. If the
* keyword is found with a NULL ->parse() function, then an attempt is made to
* find one with a valid ->parse() function. This way it is possible to declare
* platform-dependant, known keywords as NULL, then only declare them as valid
* if some options are met. Note that if the requested keyword contains an
* opening parenthesis, everything from this point is ignored.
*/
struct srv_kw *srv_find_kw(const char *kw)
{
int index;
const char *kwend;
struct srv_kw_list *kwl;
struct srv_kw *ret = NULL;
kwend = strchr(kw, '(');
if (!kwend)
kwend = kw + strlen(kw);
list_for_each_entry(kwl, &srv_keywords.list, list) {
for (index = 0; kwl->kw[index].kw != NULL; index++) {
if ((strncmp(kwl->kw[index].kw, kw, kwend - kw) == 0) &&
kwl->kw[index].kw[kwend-kw] == 0) {
if (kwl->kw[index].parse)
return &kwl->kw[index]; /* found it !*/
else
ret = &kwl->kw[index]; /* may be OK */
}
}
}
return ret;
}
/* Dumps all registered "server" keywords to the <out> string pointer. The
* unsupported keywords are only dumped if their supported form was not
* found.
*/
void srv_dump_kws(char **out)
{
struct srv_kw_list *kwl;
int index;
if (!out)
return;
*out = NULL;
list_for_each_entry(kwl, &srv_keywords.list, list) {
for (index = 0; kwl->kw[index].kw != NULL; index++) {
if (kwl->kw[index].parse ||
srv_find_kw(kwl->kw[index].kw) == &kwl->kw[index]) {
memprintf(out, "%s[%4s] %s%s%s%s\n", *out ? *out : "",
kwl->scope,
kwl->kw[index].kw,
kwl->kw[index].skip ? " <arg>" : "",
kwl->kw[index].default_ok ? " [dflt_ok]" : "",
kwl->kw[index].parse ? "" : " (not supported)");
}
}
}
}
[MEDIUM]: rework checks handling This patch adds two new variables: fastinter and downinter. When server state is: - non-transitionally UP -> inter (no change) - transitionally UP (going down), unchecked or transitionally DOWN (going up) -> fastinter - down -> downinter It allows to set something like: server sr6 127.0.51.61:80 cookie s6 check inter 10000 downinter 20000 fastinter 500 fall 3 weight 40 In the above example haproxy uses 10000ms between checks but as soon as one check fails fastinter (500ms) is used. If server is down downinter (20000) is used or fastinter (500ms) if one check pass. Fastinter is also used when haproxy starts. New "timeout.check" variable was added, if set haproxy uses it as an additional read timeout, but only after a connection has been already established. I was thinking about using "timeout.server" here but most people set this with an addition reserve but still want checks to kick out laggy servers. Please also note that in most cases check request is much simpler and faster to handle than normal requests so this timeout should be smaller. I also changed the timeout used for check connections establishing. Changes from the previous version: - use tv_isset() to check if the timeout is set, - use min("timeout connect", "inter") but only if "timeout check" is set as this min alone may be to short for full (connect + read) check, - debug code (fprintf) commented/removed - documentation Compile tested only (sorry!) as I'm currently traveling but changes are rather small and trivial.
2008-01-20 19:54:06 -05:00
/* Try to find in srv_keyword the word that looks closest to <word> by counting
* transitions between letters, digits and other characters. Will return the
* best matching word if found, otherwise NULL. An optional array of extra
* words to compare may be passed in <extra>, but it must then be terminated
* by a NULL entry. If unused it may be NULL.
*/
static const char *srv_find_best_kw(const char *word)
{
uint8_t word_sig[1024];
uint8_t list_sig[1024];
const struct srv_kw_list *kwl;
const char *best_ptr = NULL;
int dist, best_dist = INT_MAX;
const char **extra;
int index;
make_word_fingerprint(word_sig, word);
list_for_each_entry(kwl, &srv_keywords.list, list) {
for (index = 0; kwl->kw[index].kw != NULL; index++) {
make_word_fingerprint(list_sig, kwl->kw[index].kw);
dist = word_fingerprint_distance(word_sig, list_sig);
if (dist < best_dist) {
best_dist = dist;
best_ptr = kwl->kw[index].kw;
}
}
}
for (extra = extra_kw_list; *extra; extra++) {
make_word_fingerprint(list_sig, *extra);
dist = word_fingerprint_distance(word_sig, list_sig);
if (dist < best_dist) {
best_dist = dist;
best_ptr = *extra;
}
}
if (best_dist > 2 * strlen(word) || (best_ptr && best_dist > 2 * strlen(best_ptr)))
best_ptr = NULL;
return best_ptr;
}
/* This function returns the first unused server ID greater than or equal to
* <from> in the proxy <px>. Zero is returned if no spare one is found (should
* never happen).
*/
uint server_get_next_id(const struct proxy *px, uint from)
{
const struct server *sv;
do {
sv = ceb32_item_lookup_ge(&px->conf.used_server_id, conf.puid_node, puid, from, struct server);
if (!sv || sv->puid > from)
return from; /* available */
from++;
} while (from);
return from;
}
/* Parse the "backup" server keyword */
static int srv_parse_backup(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
newsrv->flags |= SRV_F_BACKUP;
return 0;
}
/* Parse the "cookie" server keyword */
static int srv_parse_cookie(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
char *arg;
arg = args[*cur_arg + 1];
if (!*arg) {
memprintf(err, "'%s' expects <value> as argument.\n", args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
free(newsrv->cookie);
newsrv->cookie = strdup(arg);
newsrv->cklen = strlen(arg);
newsrv->flags |= SRV_F_COOKIESET;
return 0;
}
/* Parse the "disabled" server keyword */
static int srv_parse_disabled(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
newsrv->next_admin |= SRV_ADMF_CMAINT | SRV_ADMF_FMAINT;
newsrv->next_state = SRV_ST_STOPPED;
newsrv->check.state |= CHK_ST_PAUSED;
newsrv->check.health = 0;
return 0;
}
/* Parse the "enabled" server keyword */
static int srv_parse_enabled(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
newsrv->next_admin &= ~SRV_ADMF_CMAINT & ~SRV_ADMF_FMAINT;
newsrv->next_state = SRV_ST_RUNNING;
newsrv->check.state &= ~CHK_ST_PAUSED;
newsrv->check.health = newsrv->check.rise;
return 0;
}
/* Parse the "error-limit" server keyword */
static int srv_parse_error_limit(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
if (!*args[*cur_arg + 1]) {
memprintf(err, "'%s' expects an integer argument.",
args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
newsrv->consecutive_errors_limit = atoi(args[*cur_arg + 1]);
if (newsrv->consecutive_errors_limit <= 0) {
memprintf(err, "%s has to be > 0.",
args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
return 0;
}
/* Parse the "guid" keyword */
static int srv_parse_guid(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
const char *guid;
char *guid_err = NULL;
if (!*args[*cur_arg + 1]) {
memprintf(err, "'%s' : expects an argument", args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
guid = args[*cur_arg + 1];
if (guid_insert(&newsrv->obj_type, guid, &guid_err)) {
memprintf(err, "'%s': %s", args[*cur_arg], guid_err);
ha_free(&guid_err);
return ERR_ALERT | ERR_FATAL;
}
return 0;
}
/* Parse the "ws" keyword */
static int srv_parse_ws(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
if (!args[*cur_arg + 1]) {
memprintf(err, "'%s' expects 'auto', 'h1' or 'h2' value", args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
if (strcmp(args[*cur_arg + 1], "h1") == 0) {
newsrv->ws = SRV_WS_H1;
}
else if (strcmp(args[*cur_arg + 1], "h2") == 0) {
newsrv->ws = SRV_WS_H2;
}
else if (strcmp(args[*cur_arg + 1], "auto") == 0) {
newsrv->ws = SRV_WS_AUTO;
}
else {
memprintf(err, "'%s' has to be 'auto', 'h1' or 'h2'", args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
return 0;
}
/* Parse the "hash-key" server keyword */
static int srv_parse_hash_key(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
if (!args[*cur_arg + 1]) {
memprintf(err, "'%s expects 'id', 'addr', or 'addr-port' value", args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
if (strcmp(args[*cur_arg + 1], "id") == 0) {
newsrv->hash_key = SRV_HASH_KEY_ID;
}
else if (strcmp(args[*cur_arg + 1], "addr") == 0) {
newsrv->hash_key = SRV_HASH_KEY_ADDR;
}
else if (strcmp(args[*cur_arg + 1], "addr-port") == 0) {
newsrv->hash_key = SRV_HASH_KEY_ADDR_PORT;
}
else {
memprintf(err, "'%s' has to be 'id', 'addr', or 'addr-port'", args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
return 0;
}
/* Parse the "idle-ping" server keyword */
static int srv_parse_idle_ping(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
const char *res;
unsigned int value;
if (!*(args[*cur_arg+1])) {
memprintf(err, "'%s' expects an argument.", args[*cur_arg]);
goto error;
}
res = parse_time_err(args[*cur_arg+1], &value, TIME_UNIT_MS);
if (res == PARSE_TIME_OVER) {
memprintf(err, "timer overflow in argument <%s> to <%s> of server %s, maximum value is 2147483647 ms (~24.8 days).",
args[*cur_arg+1], args[*cur_arg], newsrv->id);
goto error;
}
else if (res == PARSE_TIME_UNDER) {
memprintf(err, "timer underflow in argument <%s> to <%s> of server %s, minimum non-null value is 1 ms.",
args[*cur_arg+1], args[*cur_arg], newsrv->id);
goto error;
}
else if (res) {
memprintf(err, "unexpected character '%c' in '%s' argument of server %s.",
*res, args[*cur_arg], newsrv->id);
goto error;
}
newsrv->idle_ping = value;
return 0;
error:
return ERR_ALERT | ERR_FATAL;
}
/* Parse the "init-addr" server keyword */
static int srv_parse_init_addr(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
char *p, *end;
int done;
struct sockaddr_storage sa;
newsrv->init_addr_methods = 0;
memset(&newsrv->init_addr, 0, sizeof(newsrv->init_addr));
for (p = args[*cur_arg + 1]; *p; p = end) {
/* cut on next comma */
for (end = p; *end && *end != ','; end++);
if (*end)
*(end++) = 0;
memset(&sa, 0, sizeof(sa));
if (strcmp(p, "libc") == 0) {
done = srv_append_initaddr(&newsrv->init_addr_methods, SRV_IADDR_LIBC);
}
else if (strcmp(p, "last") == 0) {
done = srv_append_initaddr(&newsrv->init_addr_methods, SRV_IADDR_LAST);
}
else if (strcmp(p, "none") == 0) {
done = srv_append_initaddr(&newsrv->init_addr_methods, SRV_IADDR_NONE);
}
else if (str2ip2(p, &sa, 0)) {
if (is_addr(&newsrv->init_addr)) {
memprintf(err, "'%s' : initial address already specified, cannot add '%s'.",
args[*cur_arg], p);
return ERR_ALERT | ERR_FATAL;
}
newsrv->init_addr = sa;
done = srv_append_initaddr(&newsrv->init_addr_methods, SRV_IADDR_IP);
}
else {
memprintf(err, "'%s' : unknown init-addr method '%s', supported methods are 'libc', 'last', 'none'.",
args[*cur_arg], p);
return ERR_ALERT | ERR_FATAL;
}
if (!done) {
memprintf(err, "'%s' : too many init-addr methods when trying to add '%s'",
args[*cur_arg], p);
return ERR_ALERT | ERR_FATAL;
}
}
return 0;
}
/* Parse the "init-state" server keyword */
static int srv_parse_init_state(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
if (strcmp(args[*cur_arg + 1], "fully-up") == 0)
newsrv->init_state= SRV_INIT_STATE_FULLY_UP;
else if (strcmp(args[*cur_arg + 1], "up") == 0)
newsrv->init_state = SRV_INIT_STATE_UP;
else if (strcmp(args[*cur_arg + 1], "down") == 0)
newsrv->init_state= SRV_INIT_STATE_DOWN;
else if (strcmp(args[*cur_arg + 1], "fully-down") == 0)
newsrv->init_state= SRV_INIT_STATE_FULLY_DOWN;
else {
memprintf(err, "'%s' expects one of 'fully-up', 'up', 'down', or 'fully-down' but got '%s'",
args[*cur_arg], args[*cur_arg + 1]);
return ERR_ALERT | ERR_FATAL;
}
return 0;
}
/* Parse the "log-bufsize" server keyword */
static int srv_parse_log_bufsize(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
if (!*args[*cur_arg + 1]) {
memprintf(err, "'%s' expects an integer argument.",
args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
newsrv->log_bufsize = atoi(args[*cur_arg + 1]);
if (newsrv->log_bufsize <= 0) {
memprintf(err, "%s has to be > 0.",
args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
return 0;
}
/* Parse the "log-proto" server keyword */
static int srv_parse_log_proto(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
if (strcmp(args[*cur_arg + 1], "legacy") == 0)
newsrv->log_proto = SRV_LOG_PROTO_LEGACY;
else if (strcmp(args[*cur_arg + 1], "octet-count") == 0)
newsrv->log_proto = SRV_LOG_PROTO_OCTET_COUNTING;
else {
memprintf(err, "'%s' expects one of 'legacy' or 'octet-count' but got '%s'",
args[*cur_arg], args[*cur_arg + 1]);
return ERR_ALERT | ERR_FATAL;
}
return 0;
}
/* Parse the "maxconn" server keyword */
static int srv_parse_maxconn(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
newsrv->maxconn = atol(args[*cur_arg + 1]);
return 0;
}
/* Parse the "maxqueue" server keyword */
static int srv_parse_maxqueue(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
newsrv->maxqueue = atol(args[*cur_arg + 1]);
return 0;
}
/* Parse the "minconn" server keyword */
static int srv_parse_minconn(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
newsrv->minconn = atol(args[*cur_arg + 1]);
return 0;
}
static int srv_parse_max_reuse(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err)
{
char *arg;
arg = args[*cur_arg + 1];
if (!*arg) {
memprintf(err, "'%s' expects <value> as argument.\n", args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
newsrv->max_reuse = atoi(arg);
return 0;
}
static int srv_parse_pool_purge_delay(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err)
{
const char *res;
char *arg;
unsigned int time;
arg = args[*cur_arg + 1];
if (!*arg) {
memprintf(err, "'%s' expects <value> as argument.\n", args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
res = parse_time_err(arg, &time, TIME_UNIT_MS);
if (res == PARSE_TIME_OVER) {
memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
args[*cur_arg+1], args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
else if (res == PARSE_TIME_UNDER) {
memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
args[*cur_arg+1], args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
else if (res) {
memprintf(err, "unexpected character '%c' in argument to <%s>.\n",
*res, args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
newsrv->pool_purge_delay = time;
return 0;
}
static int srv_parse_pool_conn_name(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err)
{
char *arg;
arg = args[*cur_arg + 1];
if (!*arg) {
memprintf(err, "'%s' expects <value> as argument", args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
ha_free(&newsrv->pool_conn_name);
newsrv->pool_conn_name = strdup(arg);
if (!newsrv->pool_conn_name) {
memprintf(err, "'%s' : out of memory", args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
return 0;
}
MEDIUM: server: add a new pool-low-conn server setting The problem with the way idle connections currently work is that it's easy for a thread to steal all of its siblings' connections, then release them, then it's done by another one, etc. This happens even more easily due to scheduling latencies, or merged events inside the same pool loop, which, when dealing with a fast server responding in sub-millisecond delays, can really result in one thread being fully at work at a time. In such a case, we perform a huge amount of takeover() which consumes CPU and requires quite some locking, sometimes resulting in lower performance than expected. In order to fight against this problem, this patch introduces a new server setting "pool-low-conn", whose purpose is to dictate when it is allowed to steal connections from a sibling. As long as the number of idle connections remains at least as high as this value, it is permitted to take over another connection. When the idle connection count becomes lower, a thread may only use its own connections or create a new one. By proceeding like this even with a low number (typically 2*nbthreads), we quickly end up in a situation where all active threads have a few connections. It then becomes possible to connect to a server without bothering other threads the vast majority of the time, while still being able to use these connections when the number of available FDs becomes low. We also use this threshold instead of global.nbthread in the connection release logic, allowing to keep more extra connections if needed. A test performed with 10000 concurrent HTTP/1 connections, 16 threads and 210 servers with 1 millisecond of server response time showed the following numbers: haproxy 2.1.7: 185000 requests per second haproxy 2.2: 314000 requests per second haproxy 2.2 lowconn 32: 352000 requests per second The takeover rate goes down from 300k/s to 13k/s. The difference is further amplified as the response time shrinks.
2020-07-01 01:43:51 -04:00
static int srv_parse_pool_low_conn(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err)
{
char *arg;
arg = args[*cur_arg + 1];
if (!*arg) {
memprintf(err, "'%s' expects <value> as argument.\n", args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
newsrv->low_idle_conns = atoi(arg);
return 0;
}
static int srv_parse_pool_max_conn(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err)
{
char *arg;
arg = args[*cur_arg + 1];
if (!*arg) {
memprintf(err, "'%s' expects <value> as argument.\n", args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
newsrv->max_idle_conns = atoi(arg);
if ((int)newsrv->max_idle_conns < -1) {
memprintf(err, "'%s' must be >= -1", args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
return 0;
}
/* parse the "id" server keyword */
static int srv_parse_id(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err)
{
struct server *target;
if (!*args[*cur_arg + 1]) {
memprintf(err, "'%s' : expects an integer argument", args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
newsrv->puid = atol(args[*cur_arg + 1]);
if (newsrv->puid <= 0) {
memprintf(err, "'%s' : custom id has to be > 0", args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
target = server_find_by_id(curproxy, newsrv->puid);
if (target) {
memprintf(err, "'%s' : custom id %d already used at %s:%d ('server %s')",
args[*cur_arg], newsrv->puid, target->conf.file, target->conf.line,
target->id);
return ERR_ALERT | ERR_FATAL;
}
newsrv->flags |= SRV_F_FORCED_ID;
return 0;
}
/* Parse the "namespace" server keyword */
static int srv_parse_namespace(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
#ifdef USE_NS
char *arg;
arg = args[*cur_arg + 1];
if (!*arg) {
memprintf(err, "'%s' : expects <name> as argument", args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
if (strcmp(arg, "*") == 0) {
/* Use the namespace associated with the connection (if present). */
newsrv->flags |= SRV_F_USE_NS_FROM_PP;
MINOR: capabilities: add cap_sys_admin support If 'namespace' keyword is used in the backend server settings or/and in the bind string, it means that haproxy process will call setns() to change its default namespace to the configured one and then, it will create a socket in this new namespace. setns() syscall requires CAP_SYS_ADMIN capability in the process Effective set (see man 2 setns). Otherwise, the process must be run as root. To avoid to run haproxy as root, let's add cap_sys_admin capability in the same way as we already added the support for some other network capabilities. As CAP_SYS_ADMIN belongs to CAP_SYS_* capabilities type, let's add a separate flag LSTCHK_SYSADM for it. This flag is set, if the 'namespace' keyword was found during configuration parsing. The flag may be unset only in prepare_caps_for_setuid() or in prepare_caps_from_permitted_set(), which inspect process EUID/RUID and Effective and Permitted capabilities sets. If system doesn't support Linux capabilities or 'cap_sys_admin' was not set in 'setcap', but 'namespace' keyword is presented in the configuration, we keep the previous strict behaviour. Process, that has changed uid to the non-priviledged user, will terminate with alert. This alert invites the user to recheck its configuration. In the case, when haproxy will start and run under a non-root user and 'cap_sys_admin' is not set, but 'namespace' keyword is presented, this patch does not change previous behaviour as well. We'll still let the user to try its configuration, but we inform via warning, that unexpected things, like socket creation errors, may occur.
2024-04-26 15:47:54 -04:00
global.last_checks |= LSTCHK_SYSADM;
return 0;
}
/*
* As this parser may be called several times for the same 'default-server'
* object, or for a new 'server' instance deriving from a 'default-server'
* one with SRV_F_USE_NS_FROM_PP flag enabled, let's reset it.
*/
newsrv->flags &= ~SRV_F_USE_NS_FROM_PP;
newsrv->netns = netns_store_lookup(arg, strlen(arg));
if (!newsrv->netns)
newsrv->netns = netns_store_insert(arg);
if (!newsrv->netns) {
memprintf(err, "Cannot open namespace '%s'", arg);
return ERR_ALERT | ERR_FATAL;
}
MINOR: capabilities: add cap_sys_admin support If 'namespace' keyword is used in the backend server settings or/and in the bind string, it means that haproxy process will call setns() to change its default namespace to the configured one and then, it will create a socket in this new namespace. setns() syscall requires CAP_SYS_ADMIN capability in the process Effective set (see man 2 setns). Otherwise, the process must be run as root. To avoid to run haproxy as root, let's add cap_sys_admin capability in the same way as we already added the support for some other network capabilities. As CAP_SYS_ADMIN belongs to CAP_SYS_* capabilities type, let's add a separate flag LSTCHK_SYSADM for it. This flag is set, if the 'namespace' keyword was found during configuration parsing. The flag may be unset only in prepare_caps_for_setuid() or in prepare_caps_from_permitted_set(), which inspect process EUID/RUID and Effective and Permitted capabilities sets. If system doesn't support Linux capabilities or 'cap_sys_admin' was not set in 'setcap', but 'namespace' keyword is presented in the configuration, we keep the previous strict behaviour. Process, that has changed uid to the non-priviledged user, will terminate with alert. This alert invites the user to recheck its configuration. In the case, when haproxy will start and run under a non-root user and 'cap_sys_admin' is not set, but 'namespace' keyword is presented, this patch does not change previous behaviour as well. We'll still let the user to try its configuration, but we inform via warning, that unexpected things, like socket creation errors, may occur.
2024-04-26 15:47:54 -04:00
global.last_checks |= LSTCHK_SYSADM;
return 0;
#else
memprintf(err, "'%s': '%s' option not implemented", args[0], args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
#endif
}
/* Parse the "no-backup" server keyword */
static int srv_parse_no_backup(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
newsrv->flags &= ~SRV_F_BACKUP;
return 0;
}
/* Disable server PROXY protocol flags. */
static inline int srv_disable_pp_flags(struct server *srv, unsigned int flags)
{
srv->pp_opts &= ~flags;
return 0;
}
/* Parse the "no-send-proxy" server keyword */
static int srv_parse_no_send_proxy(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
return srv_disable_pp_flags(newsrv, SRV_PP_V1);
}
/* Parse the "no-send-proxy-v2" server keyword */
static int srv_parse_no_send_proxy_v2(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
return srv_disable_pp_flags(newsrv, SRV_PP_V2);
}
/* Parse the "shard" server keyword */
static int srv_parse_shard(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
newsrv->shard = atol(args[*cur_arg + 1]);
return 0;
}
/* Parse the "no-tfo" server keyword */
static int srv_parse_no_tfo(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
newsrv->flags &= ~SRV_F_FASTOPEN;
return 0;
}
/* Parse the "non-stick" server keyword */
static int srv_parse_non_stick(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
newsrv->flags |= SRV_F_NON_STICK;
return 0;
}
/* Enable server PROXY protocol flags. */
static inline int srv_enable_pp_flags(struct server *srv, unsigned int flags)
{
srv->pp_opts |= flags;
return 0;
}
/* parse the "proto" server keyword */
static int srv_parse_proto(char **args, int *cur_arg,
struct proxy *px, struct server *newsrv, char **err)
{
struct ist proto;
if (!*args[*cur_arg + 1]) {
memprintf(err, "'%s' : missing value", args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
proto = ist(args[*cur_arg + 1]);
newsrv->mux_proto = get_mux_proto(proto);
if (!newsrv->mux_proto) {
memprintf(err, "'%s' : unknown MUX protocol '%s'", args[*cur_arg], args[*cur_arg+1]);
return ERR_ALERT | ERR_FATAL;
}
return 0;
}
/* parse the "proxy-v2-options" */
static int srv_parse_proxy_v2_options(char **args, int *cur_arg,
struct proxy *px, struct server *newsrv, char **err)
{
char *p, *n;
for (p = args[*cur_arg+1]; p; p = n) {
n = strchr(p, ',');
if (n)
*n++ = '\0';
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
if (strcmp(p, "ssl") == 0) {
newsrv->pp_opts |= SRV_PP_V2_SSL;
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
} else if (strcmp(p, "cert-cn") == 0) {
newsrv->pp_opts |= SRV_PP_V2_SSL;
newsrv->pp_opts |= SRV_PP_V2_SSL_CN;
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
} else if (strcmp(p, "cert-key") == 0) {
newsrv->pp_opts |= SRV_PP_V2_SSL;
newsrv->pp_opts |= SRV_PP_V2_SSL_KEY_ALG;
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
} else if (strcmp(p, "cert-sig") == 0) {
newsrv->pp_opts |= SRV_PP_V2_SSL;
newsrv->pp_opts |= SRV_PP_V2_SSL_SIG_ALG;
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
} else if (strcmp(p, "ssl-cipher") == 0) {
newsrv->pp_opts |= SRV_PP_V2_SSL;
newsrv->pp_opts |= SRV_PP_V2_SSL_CIPHER;
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
} else if (strcmp(p, "authority") == 0) {
newsrv->pp_opts |= SRV_PP_V2_AUTHORITY;
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
} else if (strcmp(p, "crc32c") == 0) {
newsrv->pp_opts |= SRV_PP_V2_CRC32C;
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
} else if (strcmp(p, "unique-id") == 0) {
newsrv->pp_opts |= SRV_PP_V2_UNIQUE_ID;
} else
goto fail;
}
return 0;
fail:
if (err)
memprintf(err, "'%s' : proxy v2 option not implemented", p);
return ERR_ALERT | ERR_FATAL;
}
/* Parse the "observe" server keyword */
static int srv_parse_observe(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
char *arg;
arg = args[*cur_arg + 1];
if (!*arg) {
memprintf(err, "'%s' expects <mode> as argument.\n", args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
if (strcmp(arg, "none") == 0) {
newsrv->observe = HANA_OBS_NONE;
}
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
else if (strcmp(arg, "layer4") == 0) {
newsrv->observe = HANA_OBS_LAYER4;
}
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
else if (strcmp(arg, "layer7") == 0) {
if (curproxy->mode != PR_MODE_HTTP) {
memprintf(err, "'%s' can only be used in http proxies.\n", arg);
return ERR_ALERT;
}
newsrv->observe = HANA_OBS_LAYER7;
}
else {
memprintf(err, "'%s' expects one of 'none', 'layer4', 'layer7' "
"but got '%s'\n", args[*cur_arg], arg);
return ERR_ALERT | ERR_FATAL;
}
return 0;
}
/* Parse the "on-error" server keyword */
static int srv_parse_on_error(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
if (strcmp(args[*cur_arg + 1], "fastinter") == 0)
newsrv->onerror = HANA_ONERR_FASTINTER;
else if (strcmp(args[*cur_arg + 1], "fail-check") == 0)
newsrv->onerror = HANA_ONERR_FAILCHK;
else if (strcmp(args[*cur_arg + 1], "sudden-death") == 0)
newsrv->onerror = HANA_ONERR_SUDDTH;
else if (strcmp(args[*cur_arg + 1], "mark-down") == 0)
newsrv->onerror = HANA_ONERR_MARKDWN;
else {
memprintf(err, "'%s' expects one of 'fastinter', "
"'fail-check', 'sudden-death' or 'mark-down' but got '%s'",
args[*cur_arg], args[*cur_arg + 1]);
return ERR_ALERT | ERR_FATAL;
}
return 0;
}
/* Parse the "on-marked-down" server keyword */
static int srv_parse_on_marked_down(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
if (strcmp(args[*cur_arg + 1], "shutdown-sessions") == 0)
newsrv->onmarkeddown = HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS;
else {
memprintf(err, "'%s' expects 'shutdown-sessions' but got '%s'",
args[*cur_arg], args[*cur_arg + 1]);
return ERR_ALERT | ERR_FATAL;
}
return 0;
}
/* Parse the "on-marked-up" server keyword */
static int srv_parse_on_marked_up(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
if (strcmp(args[*cur_arg + 1], "shutdown-backup-sessions") == 0)
newsrv->onmarkedup = HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS;
else {
memprintf(err, "'%s' expects 'shutdown-backup-sessions' but got '%s'",
args[*cur_arg], args[*cur_arg + 1]);
return ERR_ALERT | ERR_FATAL;
}
return 0;
}
/* Parse the "redir" server keyword */
static int srv_parse_redir(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
char *arg;
arg = args[*cur_arg + 1];
if (!*arg) {
memprintf(err, "'%s' expects <prefix> as argument.\n", args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
free(newsrv->rdr_pfx);
newsrv->rdr_pfx = strdup(arg);
newsrv->rdr_len = strlen(arg);
return 0;
}
/* Parse the "resolvers" server keyword */
static int srv_parse_resolvers(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
free(newsrv->resolvers_id);
newsrv->resolvers_id = strdup(args[*cur_arg + 1]);
return 0;
}
/* Parse the "resolve-net" server keyword */
static int srv_parse_resolve_net(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
char *p, *e;
unsigned char mask;
struct resolv_options *opt;
if (!args[*cur_arg + 1] || args[*cur_arg + 1][0] == '\0') {
memprintf(err, "'%s' expects a list of networks.",
args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
opt = &newsrv->resolv_opts;
/* Split arguments by comma, and convert it from ipv4 or ipv6
* string network in in_addr or in6_addr.
*/
p = args[*cur_arg + 1];
e = p;
while (*p != '\0') {
/* If no room available, return error. */
if (opt->pref_net_nb >= SRV_MAX_PREF_NET) {
memprintf(err, "'%s' exceed %d networks.",
args[*cur_arg], SRV_MAX_PREF_NET);
return ERR_ALERT | ERR_FATAL;
}
/* look for end or comma. */
while (*e != ',' && *e != '\0')
e++;
if (*e == ',') {
*e = '\0';
e++;
}
if (str2net(p, 0, &opt->pref_net[opt->pref_net_nb].addr.in4,
&opt->pref_net[opt->pref_net_nb].mask.in4)) {
/* Try to convert input string from ipv4 or ipv6 network. */
opt->pref_net[opt->pref_net_nb].family = AF_INET;
} else if (str62net(p, &opt->pref_net[opt->pref_net_nb].addr.in6,
&mask)) {
/* Try to convert input string from ipv6 network. */
len2mask6(mask, &opt->pref_net[opt->pref_net_nb].mask.in6);
opt->pref_net[opt->pref_net_nb].family = AF_INET6;
} else {
/* All network conversions fail, return error. */
memprintf(err, "'%s' invalid network '%s'.",
args[*cur_arg], p);
return ERR_ALERT | ERR_FATAL;
}
opt->pref_net_nb++;
p = e;
}
return 0;
}
/* Parse the "resolve-opts" server keyword */
static int srv_parse_resolve_opts(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
char *p, *end;
for (p = args[*cur_arg + 1]; *p; p = end) {
/* cut on next comma */
for (end = p; *end && *end != ','; end++);
if (*end)
*(end++) = 0;
if (strcmp(p, "allow-dup-ip") == 0) {
newsrv->resolv_opts.accept_duplicate_ip = 1;
}
else if (strcmp(p, "ignore-weight") == 0) {
newsrv->resolv_opts.ignore_weight = 1;
}
else if (strcmp(p, "prevent-dup-ip") == 0) {
newsrv->resolv_opts.accept_duplicate_ip = 0;
}
else {
memprintf(err, "'%s' : unknown resolve-opts option '%s', supported methods are 'allow-dup-ip', 'ignore-weight', and 'prevent-dup-ip'.",
args[*cur_arg], p);
return ERR_ALERT | ERR_FATAL;
}
}
return 0;
}
/* Parse the "resolve-prefer" server keyword */
static int srv_parse_resolve_prefer(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
if (strcmp(args[*cur_arg + 1], "ipv4") == 0)
newsrv->resolv_opts.family_prio = AF_INET;
else if (strcmp(args[*cur_arg + 1], "ipv6") == 0)
newsrv->resolv_opts.family_prio = AF_INET6;
else {
memprintf(err, "'%s' expects either ipv4 or ipv6 as argument.",
args[*cur_arg]);
return ERR_ALERT | ERR_FATAL;
}
return 0;
}
/* Parse the "send-proxy" server keyword */
static int srv_parse_send_proxy(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
return srv_enable_pp_flags(newsrv, SRV_PP_V1);
}
/* Parse the "send-proxy-v2" server keyword */
static int srv_parse_send_proxy_v2(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
return srv_enable_pp_flags(newsrv, SRV_PP_V2);
}
/* Parse the "set-proxy-v2-tlv-fmt" server keyword */
static int srv_parse_set_proxy_v2_tlv_fmt(char **args, int *cur_arg,
struct proxy *px, struct server *newsrv, char **err)
{
char *error = NULL, *cmd = NULL;
unsigned int tlv_type = 0;
struct srv_pp_tlv_list *srv_tlv = NULL;
cmd = args[*cur_arg];
if (!*cmd) {
memprintf(err, "'%s' : could not read set-proxy-v2-tlv-fmt command", args[*cur_arg]);
goto fail;
}
cmd += strlen("set-proxy-v2-tlv-fmt");
if (*cmd == '(') {
cmd++; /* skip the '(' */
errno = 0;
tlv_type = strtoul(cmd, &error, 0); /* convert TLV ID */
if (unlikely((cmd == error) || (errno != 0))) {
memprintf(err, "'%s' : could not convert TLV ID", args[*cur_arg]);
goto fail;
}
if (errno == EINVAL) {
memprintf(err, "'%s' : could not find a valid number for the TLV ID", args[*cur_arg]);
goto fail;
}
if (*error != ')') {
memprintf(err, "'%s' : expects set-proxy-v2-tlv(<TLV ID>)", args[*cur_arg]);
goto fail;
}
if (tlv_type > 0xFF) {
memprintf(err, "'%s' : the maximum allowed TLV ID is %d", args[*cur_arg], 0xFF);
goto fail;
}
}
srv_tlv = malloc(sizeof(*srv_tlv));
if (unlikely(!srv_tlv)) {
memprintf(err, "'%s' : failed to parse allocate TLV entry", args[*cur_arg]);
goto fail;
}
srv_tlv->type = tlv_type;
lf_expr_init(&srv_tlv->fmt);
srv_tlv->fmt_string = strdup(args[*cur_arg + 1]);
if (unlikely(!srv_tlv->fmt_string)) {
memprintf(err, "'%s' : failed to save format string for parsing", args[*cur_arg]);
goto fail;
}
LIST_APPEND(&newsrv->pp_tlvs, &srv_tlv->list);
(*cur_arg)++;
return 0;
fail:
free(srv_tlv);
errno = 0;
return ERR_ALERT | ERR_FATAL;
}
/* Parse the "slowstart" server keyword */
static int srv_parse_slowstart(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
/* slowstart is stored in seconds */
unsigned int val;
const char *time_err = parse_time_err(args[*cur_arg + 1], &val, TIME_UNIT_MS);
if (time_err == PARSE_TIME_OVER) {
memprintf(err, "overflow in argument <%s> to <%s> of server %s, maximum value is 2147483647 ms (~24.8 days).",
args[*cur_arg+1], args[*cur_arg], newsrv->id);
return ERR_ALERT | ERR_FATAL;
}
else if (time_err == PARSE_TIME_UNDER) {
memprintf(err, "underflow in argument <%s> to <%s> of server %s, minimum non-null value is 1 ms.",
args[*cur_arg+1], args[*cur_arg], newsrv->id);
return ERR_ALERT | ERR_FATAL;
}
else if (time_err) {
memprintf(err, "unexpected character '%c' in 'slowstart' argument of server %s.",
*time_err, newsrv->id);
return ERR_ALERT | ERR_FATAL;
}
newsrv->slowstart = (val + 999) / 1000;
return 0;
}
/* Parse the "source" server keyword */
static int srv_parse_source(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
char *errmsg;
int port_low, port_high;
struct sockaddr_storage *sk;
errmsg = NULL;
if (!*args[*cur_arg + 1]) {
memprintf(err, "'%s' expects <addr>[:<port>[-<port>]], and optionally '%s' <addr>, "
"and '%s' <name> as argument.\n", args[*cur_arg], "usesrc", "interface");
goto err;
}
/* 'sk' is statically allocated (no need to be freed). */
sk = str2sa_range(args[*cur_arg + 1], NULL, &port_low, &port_high, NULL, NULL, NULL,
&errmsg, NULL, NULL, NULL,
PA_O_RESOLVE | PA_O_PORT_OK | PA_O_PORT_RANGE | PA_O_STREAM | PA_O_CONNECT);
if (!sk) {
memprintf(err, "'%s %s' : %s\n", args[*cur_arg], args[*cur_arg + 1], errmsg);
goto err;
}
newsrv->conn_src.opts |= CO_SRC_BIND;
newsrv->conn_src.source_addr = *sk;
if (port_low != port_high) {
int i;
newsrv->conn_src.sport_range = port_range_alloc_range(port_high - port_low + 1);
if (!newsrv->conn_src.sport_range) {
ha_alert("Server '%s': Out of memory (sport_range)\n", args[0]);
goto err;
}
for (i = 0; i < newsrv->conn_src.sport_range->size; i++)
newsrv->conn_src.sport_range->ports[i] = port_low + i;
}
*cur_arg += 2;
while (*(args[*cur_arg])) {
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
if (strcmp(args[*cur_arg], "usesrc") == 0) { /* address to use outside */
#if defined(CONFIG_HAP_TRANSPARENT)
if (!*args[*cur_arg + 1]) {
ha_alert("'usesrc' expects <addr>[:<port>], 'client', 'clientip', "
"or 'hdr_ip(name,#)' as argument.\n");
goto err;
}
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
if (strcmp(args[*cur_arg + 1], "client") == 0) {
newsrv->conn_src.opts &= ~CO_SRC_TPROXY_MASK;
newsrv->conn_src.opts |= CO_SRC_TPROXY_CLI;
}
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
else if (strcmp(args[*cur_arg + 1], "clientip") == 0) {
newsrv->conn_src.opts &= ~CO_SRC_TPROXY_MASK;
newsrv->conn_src.opts |= CO_SRC_TPROXY_CIP;
}
else if (!strncmp(args[*cur_arg + 1], "hdr_ip(", 7)) {
char *name, *end;
name = args[*cur_arg + 1] + 7;
while (isspace((unsigned char)*name))
name++;
end = name;
while (*end && !isspace((unsigned char)*end) && *end != ',' && *end != ')')
end++;
newsrv->conn_src.opts &= ~CO_SRC_TPROXY_MASK;
newsrv->conn_src.opts |= CO_SRC_TPROXY_DYN;
free(newsrv->conn_src.bind_hdr_name);
newsrv->conn_src.bind_hdr_name = calloc(1, end - name + 1);
if (!newsrv->conn_src.bind_hdr_name) {
ha_alert("Server '%s': Out of memory (bind_hdr_name)\n", args[0]);
goto err;
}
newsrv->conn_src.bind_hdr_len = end - name;
memcpy(newsrv->conn_src.bind_hdr_name, name, end - name);
newsrv->conn_src.bind_hdr_name[end - name] = '\0';
newsrv->conn_src.bind_hdr_occ = -1;
/* now look for an occurrence number */
while (isspace((unsigned char)*end))
end++;
if (*end == ',') {
end++;
name = end;
if (*end == '-')
end++;
while (isdigit((unsigned char)*end))
end++;
newsrv->conn_src.bind_hdr_occ = strl2ic(name, end - name);
}
if (newsrv->conn_src.bind_hdr_occ < -MAX_HDR_HISTORY) {
ha_alert("usesrc hdr_ip(name,num) does not support negative"
" occurrences values smaller than %d.\n", MAX_HDR_HISTORY);
goto err;
}
}
else {
struct sockaddr_storage *sk;
int port1, port2;
/* 'sk' is statically allocated (no need to be freed). */
sk = str2sa_range(args[*cur_arg + 1], NULL, &port1, &port2, NULL, NULL, NULL,
&errmsg, NULL, NULL, NULL,
PA_O_RESOLVE | PA_O_PORT_OK | PA_O_STREAM | PA_O_CONNECT);
if (!sk) {
ha_alert("'%s %s' : %s\n", args[*cur_arg], args[*cur_arg + 1], errmsg);
goto err;
}
newsrv->conn_src.tproxy_addr = *sk;
newsrv->conn_src.opts |= CO_SRC_TPROXY_ADDR;
}
global.last_checks |= LSTCHK_NETADM;
*cur_arg += 2;
continue;
#else /* no TPROXY support */
ha_alert("'usesrc' not allowed here because support for TPROXY was not compiled in.\n");
goto err;
#endif /* defined(CONFIG_HAP_TRANSPARENT) */
} /* "usesrc" */
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
if (strcmp(args[*cur_arg], "interface") == 0) { /* specifically bind to this interface */
#ifdef SO_BINDTODEVICE
if (!*args[*cur_arg + 1]) {
ha_alert("'%s' : missing interface name.\n", args[0]);
goto err;
}
free(newsrv->conn_src.iface_name);
newsrv->conn_src.iface_name = strdup(args[*cur_arg + 1]);
newsrv->conn_src.iface_len = strlen(newsrv->conn_src.iface_name);
global.last_checks |= LSTCHK_NETADM;
#else
ha_alert("'%s' : '%s' option not implemented.\n", args[0], args[*cur_arg]);
goto err;
#endif
*cur_arg += 2;
continue;
}
/* this keyword in not an option of "source" */
break;
} /* while */
return 0;
err:
free(errmsg);
return ERR_ALERT | ERR_FATAL;
}
/* Parse the "stick" server keyword */
static int srv_parse_stick(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
newsrv->flags &= ~SRV_F_NON_STICK;
return 0;
}
/* Parse the "track" server keyword */
static int srv_parse_track(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
char *arg;
arg = args[*cur_arg + 1];
if (!*arg) {
memprintf(err, "'track' expects [<proxy>/]<server> as argument.\n");
return ERR_ALERT | ERR_FATAL;
}
free(newsrv->trackit);
newsrv->trackit = strdup(arg);
return 0;
}
/* Parse the "socks4" server keyword */
static int srv_parse_socks4(char **args, int *cur_arg,
struct proxy *curproxy, struct server *newsrv, char **err)
{
char *errmsg;
int port_low, port_high;
struct sockaddr_storage *sk;
errmsg = NULL;
if (!*args[*cur_arg + 1]) {
memprintf(err, "'%s' expects <addr>:<port> as argument.\n", args[*cur_arg]);
goto err;
}
/* 'sk' is statically allocated (no need to be freed). */
sk = str2sa_range(args[*cur_arg + 1], NULL, &port_low, &port_high, NULL, NULL, NULL,
&errmsg, NULL, NULL, NULL,
PA_O_RESOLVE | PA_O_PORT_OK | PA_O_PORT_MAND | PA_O_STREAM | PA_O_CONNECT);
if (!sk) {
memprintf(err, "'%s %s' : %s\n", args[*cur_arg], args[*cur_arg + 1], errmsg);
goto err;
}
newsrv->flags |= SRV_F_SOCKS4_PROXY;
newsrv->socks4_addr = *sk;
return 0;
err:
free(errmsg);
return ERR_ALERT | ERR_FATAL;
}
/* parse the "tfo" server keyword */
static int srv_parse_tfo(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
{
newsrv->flags |= SRV_F_FASTOPEN;
return 0;
}
/* parse the "usesrc" server keyword */
static int srv_parse_usesrc(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
{
memprintf(err, "'%s' only allowed after a '%s' statement.",
"usesrc", "source");
return ERR_ALERT | ERR_FATAL;
}
/* parse the "weight" server keyword */
static int srv_parse_weight(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
{
int w;
w = atol(args[*cur_arg + 1]);
if (w < 0 || w > SRV_UWGHT_MAX) {
memprintf(err, "weight of server %s is not within 0 and %d (%d).",
newsrv->id, SRV_UWGHT_MAX, w);
return ERR_ALERT | ERR_FATAL;
}
newsrv->uweight = newsrv->iweight = w;
return 0;
}
static int srv_parse_strict_maxconn(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
{
newsrv->flags |= SRV_F_STRICT_MAXCONN;
return 0;
}
/* Returns 1 if the server has streams pointing to it, and 0 otherwise. */
static int srv_has_streams(struct server *srv)
{
return !!_HA_ATOMIC_LOAD(&srv->served);
}
/* Shutdown all connections of a server. The caller must pass a termination
* code in <why>, which must be one of SF_ERR_* indicating the reason for the
* shutdown.
*
* Must be called with the server lock held.
*/
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
void srv_shutdown_streams(struct server *srv, int why)
{
struct stream *stream;
MAJOR: import: update mt_list to support exponential back-off (try #2) This is the second attempt at importing the updated mt_list code (commit 59459ea3). The previous one was attempted with commit c618ed5ff4 ("MAJOR: import: update mt_list to support exponential back-off") but revealed problems with QUIC connections and was reverted. The problem that was faced was that elements deleted inside an iterator were no longer reset, and that if they were to be recycled in this form, they could appear as busy to the next user. This was trivially reproduced with this: $ cat quic-repro.cfg global stats socket /tmp/sock1 level admin stats timeout 1h limited-quic frontend stats mode http bind quic4@:8443 ssl crt rsa+dh2048.pem alpn h3 timeout client 5s stats uri / $ ./haproxy -db -f quic-repro.cfg & $ h2load -c 10 -n 100000 --npn h3 https://127.0.0.1:8443/ => hang This was purely an API issue caused by the simplified usage of the macros for the iterator. The original version had two backups (one full element and one pointer) that the user had to take care of, while the new one only uses one that is transparent for the user. But during removal, the element still has to be unlocked if it's going to be reused. All of this sparked discussions with Fred and Aurlien regarding the still unclear state of locking. It was found that the lock API does too much at once and is lacking granularity. The new version offers a much more fine- grained control allowing to selectively lock/unlock an element, a link, the rest of the list etc. It was also found that plenty of places just want to free the current element, or delete it to do anything with it, hence don't need to reset its pointers (e.g. event_hdl). Finally it appeared obvious that the root cause of the problem was the unclear usage of the list iterators themselves because one does not necessarily expect the element to be presented locked when not needed, which makes the unlock easy to overlook during reviews. The updated version of the list presents explicit lock status in the macro name (_LOCKED or _UNLOCKED suffixes). When using the _LOCKED suffix, the caller is expected to unlock the element if it intends to reuse it. At least the status is advertised. The _UNLOCKED variant, instead, always unlocks it before starting the loop block. This means it's not necessary to think about unlocking it, though it's obviously not usable with everything. A few _UNLOCKED were used at obvious places (i.e. where the element is deleted and freed without any prior check). Interestingly, the tests performed last year on QUIC forwarding, that resulted in limited traffic for the original version and higher bit rate for the new one couldn't be reproduced because since then the QUIC stack has gaind in efficiency, and the 100 Gbps barrier is now reached with or without the mt_list update. However the unit tests definitely show a huge difference, particularly on EPYC platforms where the EBO provides tremendous CPU savings. Overall, the following changes are visible from the application code: - mt_list_for_each_entry_safe() + 1 back elem + 1 back ptr => MT_LIST_FOR_EACH_ENTRY_LOCKED() or MT_LIST_FOR_EACH_ENTRY_UNLOCKED() + 1 back elem - MT_LIST_DELETE_SAFE() no longer needed in MT_LIST_FOR_EACH_ENTRY_UNLOCKED() => just manually set iterator to NULL however. For MT_LIST_FOR_EACH_ENTRY_LOCKED() => mt_list_unlock_self() (if element going to be reused) + NULL - MT_LIST_LOCK_ELT => mt_list_lock_full() - MT_LIST_UNLOCK_ELT => mt_list_unlock_full() - l = MT_LIST_APPEND_LOCKED(h, e); MT_LIST_UNLOCK_ELT(); => l=mt_list_lock_prev(h); mt_list_lock_elem(e); mt_list_unlock_full(e, l)
2024-05-30 05:27:32 -04:00
struct mt_list back;
int thr;
for (thr = 0; thr < global.nbthread; thr++)
MAJOR: import: update mt_list to support exponential back-off (try #2) This is the second attempt at importing the updated mt_list code (commit 59459ea3). The previous one was attempted with commit c618ed5ff4 ("MAJOR: import: update mt_list to support exponential back-off") but revealed problems with QUIC connections and was reverted. The problem that was faced was that elements deleted inside an iterator were no longer reset, and that if they were to be recycled in this form, they could appear as busy to the next user. This was trivially reproduced with this: $ cat quic-repro.cfg global stats socket /tmp/sock1 level admin stats timeout 1h limited-quic frontend stats mode http bind quic4@:8443 ssl crt rsa+dh2048.pem alpn h3 timeout client 5s stats uri / $ ./haproxy -db -f quic-repro.cfg & $ h2load -c 10 -n 100000 --npn h3 https://127.0.0.1:8443/ => hang This was purely an API issue caused by the simplified usage of the macros for the iterator. The original version had two backups (one full element and one pointer) that the user had to take care of, while the new one only uses one that is transparent for the user. But during removal, the element still has to be unlocked if it's going to be reused. All of this sparked discussions with Fred and Aurlien regarding the still unclear state of locking. It was found that the lock API does too much at once and is lacking granularity. The new version offers a much more fine- grained control allowing to selectively lock/unlock an element, a link, the rest of the list etc. It was also found that plenty of places just want to free the current element, or delete it to do anything with it, hence don't need to reset its pointers (e.g. event_hdl). Finally it appeared obvious that the root cause of the problem was the unclear usage of the list iterators themselves because one does not necessarily expect the element to be presented locked when not needed, which makes the unlock easy to overlook during reviews. The updated version of the list presents explicit lock status in the macro name (_LOCKED or _UNLOCKED suffixes). When using the _LOCKED suffix, the caller is expected to unlock the element if it intends to reuse it. At least the status is advertised. The _UNLOCKED variant, instead, always unlocks it before starting the loop block. This means it's not necessary to think about unlocking it, though it's obviously not usable with everything. A few _UNLOCKED were used at obvious places (i.e. where the element is deleted and freed without any prior check). Interestingly, the tests performed last year on QUIC forwarding, that resulted in limited traffic for the original version and higher bit rate for the new one couldn't be reproduced because since then the QUIC stack has gaind in efficiency, and the 100 Gbps barrier is now reached with or without the mt_list update. However the unit tests definitely show a huge difference, particularly on EPYC platforms where the EBO provides tremendous CPU savings. Overall, the following changes are visible from the application code: - mt_list_for_each_entry_safe() + 1 back elem + 1 back ptr => MT_LIST_FOR_EACH_ENTRY_LOCKED() or MT_LIST_FOR_EACH_ENTRY_UNLOCKED() + 1 back elem - MT_LIST_DELETE_SAFE() no longer needed in MT_LIST_FOR_EACH_ENTRY_UNLOCKED() => just manually set iterator to NULL however. For MT_LIST_FOR_EACH_ENTRY_LOCKED() => mt_list_unlock_self() (if element going to be reused) + NULL - MT_LIST_LOCK_ELT => mt_list_lock_full() - MT_LIST_UNLOCK_ELT => mt_list_unlock_full() - l = MT_LIST_APPEND_LOCKED(h, e); MT_LIST_UNLOCK_ELT(); => l=mt_list_lock_prev(h); mt_list_lock_elem(e); mt_list_unlock_full(e, l)
2024-05-30 05:27:32 -04:00
MT_LIST_FOR_EACH_ENTRY_LOCKED(stream, &srv->per_thr[thr].streams, by_srv, back)
if (stream->srv_conn == srv)
stream_shutdown(stream, why);
/* also kill the possibly pending streams in the queue */
pendconn_redistribute(srv);
}
/* Shutdown all connections of all backup servers of a proxy. The caller must
* pass a termination code in <why>, which must be one of SF_ERR_* indicating
* the reason for the shutdown.
*
* Must be called with the server lock held.
*/
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
void srv_shutdown_backup_streams(struct proxy *px, int why)
{
struct server *srv;
for (srv = px->srv; srv != NULL; srv = srv->next)
if (srv->flags & SRV_F_BACKUP)
REORG/MAJOR: session: rename the "session" entity to "stream" With HTTP/2, we'll have to support multiplexed streams. A stream is in fact the largest part of what we currently call a session, it has buffers, logs, etc. In order to catch any error, this commit removes any reference to the struct session and tries to rename most "session" occurrences in function names to "stream" and "sess" to "strm" when that's related to a session. The files stream.{c,h} were added and session.{c,h} removed. The session will be reintroduced later and a few parts of the stream will progressively be moved overthere. It will more or less contain only what we need in an embryonic session. Sample fetch functions and converters will have to change a bit so that they'll use an L5 (session) instead of what's currently called "L4" which is in fact L6 for now. Once all changes are completed, we should see approximately this : L7 - http_txn L6 - stream L5 - session L4 - connection | applet There will be at most one http_txn per stream, and a same session will possibly be referenced by multiple streams. A connection will point to a session and to a stream. The session will hold all the information we need to keep even when we don't yet have a stream. Some more cleanup is needed because some code was already far from being clean. The server queue management still refers to sessions at many places while comments talk about connections. This will have to be cleaned up once we have a server-side connection pool manager. Stream flags "SN_*" still need to be renamed, it doesn't seem like any of them will need to move to the session.
2015-04-02 18:22:06 -04:00
srv_shutdown_streams(srv, why);
}
static void srv_append_op_chg_cause(struct buffer *msg, struct server *s, enum srv_op_st_chg_cause cause)
{
switch (cause) {
case SRV_OP_STCHGC_NONE:
break; /* do nothing */
case SRV_OP_STCHGC_HEALTH:
check_append_info(msg, &s->check);
break;
case SRV_OP_STCHGC_AGENT:
check_append_info(msg, &s->agent);
break;
default:
chunk_appendf(msg, ", %s", srv_op_st_chg_cause(cause));
break;
}
}
static void srv_append_adm_chg_cause(struct buffer *msg, struct server *s, enum srv_adm_st_chg_cause cause)
{
if (cause)
chunk_appendf(msg, " (%s)", srv_adm_st_chg_cause(cause));
}
/* Appends some information to a message string related to a server tracking
* or requeued connections info.
*
* If <forced> is null and the server tracks another one, a "via"
* If <xferred> is non-negative, some information about requeued sessions are
* provided.
*
* Must be called with the server lock held.
*/
static void srv_append_more(struct buffer *msg, struct server *s,
int xferred, int forced)
{
if (!forced && s->track) {
chunk_appendf(msg, " via %s/%s", s->track->proxy->id, s->track->id);
}
if (xferred >= 0) {
if (s->next_state == SRV_ST_STOPPED)
chunk_appendf(msg, ". %d active and %d backup servers left.%s"
" %d sessions active, %d requeued, %d remaining in queue",
s->proxy->srv_act, s->proxy->srv_bck,
(s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
s->cur_sess, xferred, s->queueslength);
else
chunk_appendf(msg, ". %d active and %d backup servers online.%s"
" %d sessions requeued, %d total in queue",
s->proxy->srv_act, s->proxy->srv_bck,
(s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
xferred, s->queueslength);
}
}
/* Marks server <s> down, regardless of its checks' statuses. The server
* transfers queued streams whenever possible to other servers at a sync
* point. Maintenance servers are ignored.
*
* Must be called with the server lock held.
*/
void srv_set_stopped(struct server *s, enum srv_op_st_chg_cause cause)
{
struct server *srv;
if ((s->cur_admin & SRV_ADMF_MAINT) || s->next_state == SRV_ST_STOPPED)
return;
s->next_state = SRV_ST_STOPPED;
/* propagate changes */
srv_update_status(s, 0, cause);
for (srv = s->trackers; srv; srv = srv->tracknext) {
HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
srv_set_stopped(srv, SRV_OP_STCHGC_NONE);
HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
}
}
/* Marks server <s> up regardless of its checks' statuses and provided it isn't
* in maintenance. The server tries to grab requests from the proxy at a sync
* point. Maintenance servers are ignored.
*
* Must be called with the server lock held.
*/
void srv_set_running(struct server *s, enum srv_op_st_chg_cause cause)
{
struct server *srv;
if (s->cur_admin & SRV_ADMF_MAINT)
return;
if (s->next_state == SRV_ST_STARTING || s->next_state == SRV_ST_RUNNING)
return;
s->next_state = SRV_ST_STARTING;
if (s->slowstart <= 0)
s->next_state = SRV_ST_RUNNING;
/* propagate changes */
srv_update_status(s, 0, cause);
for (srv = s->trackers; srv; srv = srv->tracknext) {
HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
srv_set_running(srv, SRV_OP_STCHGC_NONE);
HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
}
}
/* Marks server <s> stopping regardless of its checks' statuses and provided it
* isn't in maintenance. The server tries to redispatch pending requests
* to the proxy. Maintenance servers are ignored.
*
* Must be called with the server lock held.
*/
void srv_set_stopping(struct server *s, enum srv_op_st_chg_cause cause)
{
struct server *srv;
if (s->cur_admin & SRV_ADMF_MAINT)
return;
if (s->next_state == SRV_ST_STOPPING)
return;
s->next_state = SRV_ST_STOPPING;
/* propagate changes */
srv_update_status(s, 0, cause);
for (srv = s->trackers; srv; srv = srv->tracknext) {
HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
srv_set_stopping(srv, SRV_OP_STCHGC_NONE);
HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
}
}
/* Enables admin flag <mode> (among SRV_ADMF_*) on server <s>. This is used to
* enforce either maint mode or drain mode. It is not allowed to set more than
* one flag at once. The equivalent "inherited" flag is propagated to all
* tracking servers. Maintenance mode disables health checks (but not agent
* checks). When either the flag is already set or no flag is passed, nothing
* is done. If <cause> is non-null, it will be displayed at the end of the log
* lines to justify the state change.
*
* Must be called with the server lock held.
*/
void srv_set_admin_flag(struct server *s, enum srv_admin mode, enum srv_adm_st_chg_cause cause)
{
struct server *srv;
if (!mode)
return;
/* stop going down as soon as we meet a server already in the same state */
if (s->next_admin & mode)
return;
s->next_admin |= mode;
/* propagate changes */
srv_update_status(s, 1, cause);
/* stop going down if the equivalent flag was already present (forced or inherited) */
if (((mode & SRV_ADMF_MAINT) && (s->next_admin & ~mode & SRV_ADMF_MAINT)) ||
((mode & SRV_ADMF_DRAIN) && (s->next_admin & ~mode & SRV_ADMF_DRAIN)))
return;
/* compute the inherited flag to propagate */
if (mode & SRV_ADMF_MAINT)
mode = SRV_ADMF_IMAINT;
else if (mode & SRV_ADMF_DRAIN)
mode = SRV_ADMF_IDRAIN;
for (srv = s->trackers; srv; srv = srv->tracknext) {
HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
srv_set_admin_flag(srv, mode, cause);
HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
}
}
/* Disables admin flag <mode> (among SRV_ADMF_*) on server <s>. This is used to
* stop enforcing either maint mode or drain mode. It is not allowed to set more
* than one flag at once. The equivalent "inherited" flag is propagated to all
* tracking servers. Leaving maintenance mode re-enables health checks. When
* either the flag is already cleared or no flag is passed, nothing is done.
*
* Must be called with the server lock held.
*/
void srv_clr_admin_flag(struct server *s, enum srv_admin mode)
{
struct server *srv;
if (!mode)
return;
/* stop going down as soon as we see the flag is not there anymore */
if (!(s->next_admin & mode))
return;
s->next_admin &= ~mode;
/* propagate changes */
srv_update_status(s, 1, SRV_ADM_STCHGC_NONE);
/* stop going down if the equivalent flag is still present (forced or inherited) */
if (((mode & SRV_ADMF_MAINT) && (s->next_admin & SRV_ADMF_MAINT)) ||
((mode & SRV_ADMF_DRAIN) && (s->next_admin & SRV_ADMF_DRAIN)))
return;
if (mode & SRV_ADMF_MAINT)
mode = SRV_ADMF_IMAINT;
else if (mode & SRV_ADMF_DRAIN)
mode = SRV_ADMF_IDRAIN;
for (srv = s->trackers; srv; srv = srv->tracknext) {
HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
srv_clr_admin_flag(srv, mode);
HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
}
}
BUG/MEDIUM: servers: properly propagate the maintenance states during startup Right now there is an issue with the way the maintenance flags are propagated upon startup. They are not propagate, just copied from the tracked server. This implies that depending on the server's order, some tracking servers may not be marked down. For example this configuration does not work as expected : server s1 1.1.1.1:8000 track s2 server s2 1.1.1.1:8000 track s3 server s3 1.1.1.1:8000 track s4 server s4 wtap:8000 check inter 1s disabled It results in s1/s2 being up, and s3/s4 being down, while all of them should be down. The only clean way to process this is to run through all "root" servers (those not tracking any other server), and to propagate their state down to all their trackers. This is the same algorithm used to propagate the state changes. It has to be done both to compute the IDRAIN flag and the IMAINT flag. However, doing so requires that tracking servers are not marked as inherited maintenance anymore while parsing the configuration (and given that it is wrong, better drop it). This fix also addresses another side effect of the bug above which is that the IDRAIN/IMAINT flags are stored in the state files, and if restored while the tracked server doesn't have the equivalent flag, the servers may end up in a situation where it's impossible to remove these flags. For example in the configuration above, after removing "disabled" on server s4, the other servers would have remained down, and not anymore with this fix. Similarly, the combination of IMAINT or IDRAIN with their respective forced modes was not accepted on reload, which is wrong as well. This bug has been present at least since 1.5, maybe even 1.4 (it came with tracking support). The fix needs to be backported there, though the srv-state parts are irrelevant. This commit relies on previous patch to silence warnings on startup.
2016-11-03 14:22:19 -04:00
/* principle: propagate maint and drain to tracking servers. This is useful
* upon startup so that inherited states are correct.
*/
static void srv_propagate_admin_state(struct server *srv)
{
struct server *srv2;
if (!srv->trackers)
return;
for (srv2 = srv->trackers; srv2; srv2 = srv2->tracknext) {
HA_SPIN_LOCK(SERVER_LOCK, &srv2->lock);
if (srv->next_admin & (SRV_ADMF_MAINT | SRV_ADMF_CMAINT))
srv_set_admin_flag(srv2, SRV_ADMF_IMAINT, SRV_ADM_STCHGC_NONE);
BUG/MEDIUM: servers: properly propagate the maintenance states during startup Right now there is an issue with the way the maintenance flags are propagated upon startup. They are not propagate, just copied from the tracked server. This implies that depending on the server's order, some tracking servers may not be marked down. For example this configuration does not work as expected : server s1 1.1.1.1:8000 track s2 server s2 1.1.1.1:8000 track s3 server s3 1.1.1.1:8000 track s4 server s4 wtap:8000 check inter 1s disabled It results in s1/s2 being up, and s3/s4 being down, while all of them should be down. The only clean way to process this is to run through all "root" servers (those not tracking any other server), and to propagate their state down to all their trackers. This is the same algorithm used to propagate the state changes. It has to be done both to compute the IDRAIN flag and the IMAINT flag. However, doing so requires that tracking servers are not marked as inherited maintenance anymore while parsing the configuration (and given that it is wrong, better drop it). This fix also addresses another side effect of the bug above which is that the IDRAIN/IMAINT flags are stored in the state files, and if restored while the tracked server doesn't have the equivalent flag, the servers may end up in a situation where it's impossible to remove these flags. For example in the configuration above, after removing "disabled" on server s4, the other servers would have remained down, and not anymore with this fix. Similarly, the combination of IMAINT or IDRAIN with their respective forced modes was not accepted on reload, which is wrong as well. This bug has been present at least since 1.5, maybe even 1.4 (it came with tracking support). The fix needs to be backported there, though the srv-state parts are irrelevant. This commit relies on previous patch to silence warnings on startup.
2016-11-03 14:22:19 -04:00
if (srv->next_admin & SRV_ADMF_DRAIN)
srv_set_admin_flag(srv2, SRV_ADMF_IDRAIN, SRV_ADM_STCHGC_NONE);
HA_SPIN_UNLOCK(SERVER_LOCK, &srv2->lock);
BUG/MEDIUM: servers: properly propagate the maintenance states during startup Right now there is an issue with the way the maintenance flags are propagated upon startup. They are not propagate, just copied from the tracked server. This implies that depending on the server's order, some tracking servers may not be marked down. For example this configuration does not work as expected : server s1 1.1.1.1:8000 track s2 server s2 1.1.1.1:8000 track s3 server s3 1.1.1.1:8000 track s4 server s4 wtap:8000 check inter 1s disabled It results in s1/s2 being up, and s3/s4 being down, while all of them should be down. The only clean way to process this is to run through all "root" servers (those not tracking any other server), and to propagate their state down to all their trackers. This is the same algorithm used to propagate the state changes. It has to be done both to compute the IDRAIN flag and the IMAINT flag. However, doing so requires that tracking servers are not marked as inherited maintenance anymore while parsing the configuration (and given that it is wrong, better drop it). This fix also addresses another side effect of the bug above which is that the IDRAIN/IMAINT flags are stored in the state files, and if restored while the tracked server doesn't have the equivalent flag, the servers may end up in a situation where it's impossible to remove these flags. For example in the configuration above, after removing "disabled" on server s4, the other servers would have remained down, and not anymore with this fix. Similarly, the combination of IMAINT or IDRAIN with their respective forced modes was not accepted on reload, which is wrong as well. This bug has been present at least since 1.5, maybe even 1.4 (it came with tracking support). The fix needs to be backported there, though the srv-state parts are irrelevant. This commit relies on previous patch to silence warnings on startup.
2016-11-03 14:22:19 -04:00
}
}
/* Compute and propagate the admin states for all servers in proxy <px>.
* Only servers *not* tracking another one are considered, because other
* ones will be handled when the server they track is visited.
*/
void srv_compute_all_admin_states(struct proxy *px)
{
struct server *srv;
for (srv = px->srv; srv; srv = srv->next) {
if (srv->track)
continue;
srv_propagate_admin_state(srv);
}
}
/* Note: must not be declared <const> as its list will be overwritten.
*
*** P L E A S E R E A D B E L O W B E F O R E T O U C H I N G !!! ***
*
* Some mistakes are commonly repeated when touching this table, so please
* read the following rules before changing / adding an entry, and better
* ask on the mailing list in case of doubt.
*
* - this list is alphabetically ordered, doing so helps all code contributors
* spot how to name a keyword, which helps users thanks to a form of naming
* consistency. Please insert new entries at the right position so as not
* to break alphabetical ordering. If in doubt, sorting the lines in your
* editor should not change anything (or should fix your addition).
*
* - the fields for each entry in the array are, from left to right:
* - the keyword itself (a string, all characters lower case, no special
* chars, no space/dot/underscore, use-dash-to-delimit-multiple-words)
* - the parsing function (edit or copy one close to your needs, parsers
* can easily support multiple keywords if adapted to check args[0]).
* - the number of arguments the keyword takes. Please do not add new
* keywords taking other than exactly 1 argument, they're hard to adapt
* to for external parsers. The special value -1 indicates a variable
* number, used by "source" only. Never do this.
* - whether or not the keyword is supported on default-server lines
* (0 = not supported, 1 = supported). Please do not add unsupported
* keywords without a prior discussion with maintainers on the list,
* as usually it hides a deeper problem.
* - whether or not the keyword is supported for dynamic servers added at
* run time on the CLI (0 = not supported, 1 = supported). Please do not
* add unsupported keywords without a prior discussion with maintainers
* on the list, as usually it hides a deeper problem.
*
* - please also add a short comment reminding what the keyword does.
*
* - please test your changes with default-server and dynamic servers on the
* CLI (see "add server" in the management guide).
*
*** P L E A S E R E A D A B O V E B E F O R E T O U C H I N G !!! ***
*
* Optional keywords are also declared with a NULL ->parse() function so that
* the config parser can report an appropriate error when a known keyword was
* not enabled.
*/
static struct srv_kw_list srv_kws = { "ALL", { }, {
/* { "keyword", parsing_function, args, def, dyn }, */
{ "backup", srv_parse_backup, 0, 1, 1 }, /* Flag as backup server */
{ "cookie", srv_parse_cookie, 1, 1, 1 }, /* Assign a cookie to the server */
{ "disabled", srv_parse_disabled, 0, 1, 1 }, /* Start the server in 'disabled' state */
{ "enabled", srv_parse_enabled, 0, 1, 0 }, /* Start the server in 'enabled' state */
{ "error-limit", srv_parse_error_limit, 1, 1, 1 }, /* Configure the consecutive count of check failures to consider a server on error */
{ "guid", srv_parse_guid, 1, 0, 1 }, /* Set global unique ID of the server */
{ "ws", srv_parse_ws, 1, 1, 1 }, /* websocket protocol */
{ "hash-key", srv_parse_hash_key, 1, 1, 1 }, /* Configure how chash keys are computed */
{ "id", srv_parse_id, 1, 0, 1 }, /* set id# of server */
{ "idle-ping", srv_parse_idle_ping, 1, 1, 1 }, /* Activate idle ping if mux support it */
{ "init-addr", srv_parse_init_addr, 1, 1, 0 }, /* */
{ "init-state", srv_parse_init_state, 1, 1, 1 }, /* Set the initial state of the server */
{ "log-bufsize", srv_parse_log_bufsize, 1, 1, 0 }, /* Set the ring bufsize for log server (only for log backends) */
{ "log-proto", srv_parse_log_proto, 1, 1, 0 }, /* Set the protocol for event messages, only relevant in a log or ring section */
{ "maxconn", srv_parse_maxconn, 1, 1, 1 }, /* Set the max number of concurrent connection */
{ "maxqueue", srv_parse_maxqueue, 1, 1, 1 }, /* Set the max number of connection to put in queue */
{ "max-reuse", srv_parse_max_reuse, 1, 1, 0 }, /* Set the max number of requests on a connection, -1 means unlimited */
{ "minconn", srv_parse_minconn, 1, 1, 1 }, /* Enable a dynamic maxconn limit */
{ "namespace", srv_parse_namespace, 1, 1, 0 }, /* Namespace the server socket belongs to (if supported) */
{ "no-backup", srv_parse_no_backup, 0, 1, 1 }, /* Flag as non-backup server */
{ "no-send-proxy", srv_parse_no_send_proxy, 0, 1, 1 }, /* Disable use of PROXY V1 protocol */
{ "no-send-proxy-v2", srv_parse_no_send_proxy_v2, 0, 1, 1 }, /* Disable use of PROXY V2 protocol */
{ "no-tfo", srv_parse_no_tfo, 0, 1, 1 }, /* Disable use of TCP Fast Open */
{ "non-stick", srv_parse_non_stick, 0, 1, 0 }, /* Disable stick-table persistence */
{ "observe", srv_parse_observe, 1, 1, 1 }, /* Enables health adjusting based on observing communication with the server */
{ "on-error", srv_parse_on_error, 1, 1, 1 }, /* Configure the action on check failure */
{ "on-marked-down", srv_parse_on_marked_down, 1, 1, 1 }, /* Configure the action when a server is marked down */
{ "on-marked-up", srv_parse_on_marked_up, 1, 1, 1 }, /* Configure the action when a server is marked up */
{ "pool-conn-name", srv_parse_pool_conn_name, 1, 1, 1 }, /* Define expression to identify connections in idle pool */
{ "pool-low-conn", srv_parse_pool_low_conn, 1, 1, 1 }, /* Set the min number of orphan idle connecbefore being allowed to pick from other threads */
{ "pool-max-conn", srv_parse_pool_max_conn, 1, 1, 1 }, /* Set the max number of orphan idle connections, -1 means unlimited */
{ "pool-purge-delay", srv_parse_pool_purge_delay, 1, 1, 1 }, /* Set the time before we destroy orphan idle connections, defaults to 1s */
{ "proto", srv_parse_proto, 1, 1, 1 }, /* Set the proto to use for all outgoing connections */
{ "proxy-v2-options", srv_parse_proxy_v2_options, 1, 1, 1 }, /* options for send-proxy-v2 */
{ "redir", srv_parse_redir, 1, 1, 0 }, /* Enable redirection mode */
{ "resolve-net", srv_parse_resolve_net, 1, 1, 0 }, /* Set the preferred network range for name resolution */
{ "resolve-opts", srv_parse_resolve_opts, 1, 1, 0 }, /* Set options for name resolution */
{ "resolve-prefer", srv_parse_resolve_prefer, 1, 1, 0 }, /* Set the preferred family for name resolution */
{ "resolvers", srv_parse_resolvers, 1, 1, 0 }, /* Configure the resolver to use for name resolution */
{ "send-proxy", srv_parse_send_proxy, 0, 1, 1 }, /* Enforce use of PROXY V1 protocol */
{ "send-proxy-v2", srv_parse_send_proxy_v2, 0, 1, 1 }, /* Enforce use of PROXY V2 protocol */
{ "set-proxy-v2-tlv-fmt", srv_parse_set_proxy_v2_tlv_fmt, 0, 1, 1 }, /* Set TLV of PROXY V2 protocol */
{ "shard", srv_parse_shard, 1, 1, 1 }, /* Server shard (only in peers protocol context) */
{ "slowstart", srv_parse_slowstart, 1, 1, 1 }, /* Set the warm-up timer for a previously failed server */
{ "source", srv_parse_source, -1, 1, 1 }, /* Set the source address to be used to connect to the server */
{ "stick", srv_parse_stick, 0, 1, 0 }, /* Enable stick-table persistence */
{ "strict-maxconn", srv_parse_strict_maxconn, 0, 1, 1 }, /* Strictly enforces maxconn */
{ "tfo", srv_parse_tfo, 0, 1, 1 }, /* enable TCP Fast Open of server */
{ "track", srv_parse_track, 1, 1, 1 }, /* Set the current state of the server, tracking another one */
{ "socks4", srv_parse_socks4, 1, 1, 0 }, /* Set the socks4 proxy of the server*/
{ "usesrc", srv_parse_usesrc, 0, 1, 1 }, /* safe-guard against usesrc without preceding <source> keyword */
{ "weight", srv_parse_weight, 1, 1, 1 }, /* Set the load-balancing weight */
{ NULL, NULL, 0 },
}};
INITCALL1(STG_REGISTER, srv_register_keywords, &srv_kws);
BUG/MAJOR: server: weight calculation fails for map-based algorithms A crash was reported by Igor at owind when changing a server's weight on the CLI. Lukas Tribus could reproduce a related bug where setting a server's weight would result in the new weight being multiplied by the initial one. The two bugs are the same. The incorrect weight calculation results in the total farm weight being larger than what was initially allocated, causing the map index to be out of bounds on some hashes. It's easy to reproduce using "balance url_param" with a variable param, or with "balance static-rr". It appears that the calculation is made at many places and is not always right and not always wrong the same way. Thus, this patch introduces a new function "server_recalc_eweight()" which is dedicated to this task of computing ->eweight from many other elements including uweight and current time (for slowstart), and all users now switch to use this function. The patch is a bit large but the code was not trivially fixable in a way that could guarantee this situation would not occur anymore. The fix is much more readable and has been verified to work with all algorithms, with both consistent and map-based hashes, and even with static-rr. Slowstart was tested as well, just like enable/disable server. The same bug is very likely present in 1.4 as well, so the patch will probably need to be backported eventhough it will not apply as-is. Thanks to Lukas and Igor for the information they provided to reproduce it.
2013-11-21 05:22:01 -05:00
/* Recomputes the server's eweight based on its state, uweight, the current time,
* and the proxy's algorithm. To be used after updating sv->uweight. The warmup
* state is automatically disabled if the time is elapsed. If <must_update> is
* not zero, the update will be propagated immediately.
*
* Must be called with the server lock held.
BUG/MAJOR: server: weight calculation fails for map-based algorithms A crash was reported by Igor at owind when changing a server's weight on the CLI. Lukas Tribus could reproduce a related bug where setting a server's weight would result in the new weight being multiplied by the initial one. The two bugs are the same. The incorrect weight calculation results in the total farm weight being larger than what was initially allocated, causing the map index to be out of bounds on some hashes. It's easy to reproduce using "balance url_param" with a variable param, or with "balance static-rr". It appears that the calculation is made at many places and is not always right and not always wrong the same way. Thus, this patch introduces a new function "server_recalc_eweight()" which is dedicated to this task of computing ->eweight from many other elements including uweight and current time (for slowstart), and all users now switch to use this function. The patch is a bit large but the code was not trivially fixable in a way that could guarantee this situation would not occur anymore. The fix is much more readable and has been verified to work with all algorithms, with both consistent and map-based hashes, and even with static-rr. Slowstart was tested as well, just like enable/disable server. The same bug is very likely present in 1.4 as well, so the patch will probably need to be backported eventhough it will not apply as-is. Thanks to Lukas and Igor for the information they provided to reproduce it.
2013-11-21 05:22:01 -05:00
*/
void server_recalc_eweight(struct server *sv, int must_update)
BUG/MAJOR: server: weight calculation fails for map-based algorithms A crash was reported by Igor at owind when changing a server's weight on the CLI. Lukas Tribus could reproduce a related bug where setting a server's weight would result in the new weight being multiplied by the initial one. The two bugs are the same. The incorrect weight calculation results in the total farm weight being larger than what was initially allocated, causing the map index to be out of bounds on some hashes. It's easy to reproduce using "balance url_param" with a variable param, or with "balance static-rr". It appears that the calculation is made at many places and is not always right and not always wrong the same way. Thus, this patch introduces a new function "server_recalc_eweight()" which is dedicated to this task of computing ->eweight from many other elements including uweight and current time (for slowstart), and all users now switch to use this function. The patch is a bit large but the code was not trivially fixable in a way that could guarantee this situation would not occur anymore. The fix is much more readable and has been verified to work with all algorithms, with both consistent and map-based hashes, and even with static-rr. Slowstart was tested as well, just like enable/disable server. The same bug is very likely present in 1.4 as well, so the patch will probably need to be backported eventhough it will not apply as-is. Thanks to Lukas and Igor for the information they provided to reproduce it.
2013-11-21 05:22:01 -05:00
{
struct proxy *px = sv->proxy;
unsigned w;
if (ns_to_sec(now_ns) < sv->last_change || ns_to_sec(now_ns) >= sv->last_change + sv->slowstart) {
BUG/MINOR: server: fix slowstart behavior We observed that a dynamic server which health check is down for longer than slowstart delay at startup doesn't trigger the warmup phase, it receives full traffic immediately. This has been confirmed by checking haproxy UI, weight is immediately the full one (e.g. 75/75), without any throttle applied. Further tests showed that it was similar if it was in maintenance, and even when entering a down or maintenance state after being up. Another issue is that if the server is down for less time than slowstart, when it comes back up, it briefly has a much higher weight than expected for a slowstart. An easy way to reproduce is to do the following: - Add a server with e.g. a 20s slowstart and a weight of 10 in config file - Put it in maintenance using CLI (set server be1/srv1 state maint) - Wait more than 20s, enable it again (set server be1/srv1 state ready) - Observe UI, weight will show 10/10 immediately. If server was down for less than 20s, you'd briefly see a weight and throttle value that is inconsistent, e.g. 50% throttle value and a weight of 5 if server comes back up after 10s before going back to 6% after a second or two. Code analysis shows that the logic in server_recalc_eweight stops the warmup task by setting server's next state to SRV_ST_RUNNING if it didn't change state for longer than the slowstart duration, regardless of its current state. As a consequence, a server being down or disabled for longer than the slowstart duration will never enter the warmup phase when it will be up again. Regarding the weight when server comes back up, issue is that even if the server is down, we still compute its next weight as if it was up, hence when it comes back up, it can briefly have a much higher weight than expected during slowstart, until the warmup task is called again after last_change is updated. This patch aims to fix both issues.
2024-04-09 11:37:07 -04:00
/* go to full throttle if the slowstart interval is reached unless server is currently down */
if ((sv->cur_state != SRV_ST_STOPPED) && (sv->next_state == SRV_ST_STARTING))
sv->next_state = SRV_ST_RUNNING;
BUG/MAJOR: server: weight calculation fails for map-based algorithms A crash was reported by Igor at owind when changing a server's weight on the CLI. Lukas Tribus could reproduce a related bug where setting a server's weight would result in the new weight being multiplied by the initial one. The two bugs are the same. The incorrect weight calculation results in the total farm weight being larger than what was initially allocated, causing the map index to be out of bounds on some hashes. It's easy to reproduce using "balance url_param" with a variable param, or with "balance static-rr". It appears that the calculation is made at many places and is not always right and not always wrong the same way. Thus, this patch introduces a new function "server_recalc_eweight()" which is dedicated to this task of computing ->eweight from many other elements including uweight and current time (for slowstart), and all users now switch to use this function. The patch is a bit large but the code was not trivially fixable in a way that could guarantee this situation would not occur anymore. The fix is much more readable and has been verified to work with all algorithms, with both consistent and map-based hashes, and even with static-rr. Slowstart was tested as well, just like enable/disable server. The same bug is very likely present in 1.4 as well, so the patch will probably need to be backported eventhough it will not apply as-is. Thanks to Lukas and Igor for the information they provided to reproduce it.
2013-11-21 05:22:01 -05:00
}
/* We must take care of not pushing the server to full throttle during slow starts.
* It must also start immediately, at least at the minimal step when leaving maintenance.
*/
BUG/MINOR: server: fix slowstart behavior We observed that a dynamic server which health check is down for longer than slowstart delay at startup doesn't trigger the warmup phase, it receives full traffic immediately. This has been confirmed by checking haproxy UI, weight is immediately the full one (e.g. 75/75), without any throttle applied. Further tests showed that it was similar if it was in maintenance, and even when entering a down or maintenance state after being up. Another issue is that if the server is down for less time than slowstart, when it comes back up, it briefly has a much higher weight than expected for a slowstart. An easy way to reproduce is to do the following: - Add a server with e.g. a 20s slowstart and a weight of 10 in config file - Put it in maintenance using CLI (set server be1/srv1 state maint) - Wait more than 20s, enable it again (set server be1/srv1 state ready) - Observe UI, weight will show 10/10 immediately. If server was down for less than 20s, you'd briefly see a weight and throttle value that is inconsistent, e.g. 50% throttle value and a weight of 5 if server comes back up after 10s before going back to 6% after a second or two. Code analysis shows that the logic in server_recalc_eweight stops the warmup task by setting server's next state to SRV_ST_RUNNING if it didn't change state for longer than the slowstart duration, regardless of its current state. As a consequence, a server being down or disabled for longer than the slowstart duration will never enter the warmup phase when it will be up again. Regarding the weight when server comes back up, issue is that even if the server is down, we still compute its next weight as if it was up, hence when it comes back up, it can briefly have a much higher weight than expected during slowstart, until the warmup task is called again after last_change is updated. This patch aims to fix both issues.
2024-04-09 11:37:07 -04:00
if ((sv->cur_state == SRV_ST_STOPPED) && (sv->next_state == SRV_ST_STARTING) && (px->lbprm.algo & BE_LB_PROP_DYN))
w = 1;
else if ((sv->next_state == SRV_ST_STARTING) && (px->lbprm.algo & BE_LB_PROP_DYN))
w = (px->lbprm.wdiv * (ns_to_sec(now_ns) - sv->last_change) + sv->slowstart) / sv->slowstart;
BUG/MAJOR: server: weight calculation fails for map-based algorithms A crash was reported by Igor at owind when changing a server's weight on the CLI. Lukas Tribus could reproduce a related bug where setting a server's weight would result in the new weight being multiplied by the initial one. The two bugs are the same. The incorrect weight calculation results in the total farm weight being larger than what was initially allocated, causing the map index to be out of bounds on some hashes. It's easy to reproduce using "balance url_param" with a variable param, or with "balance static-rr". It appears that the calculation is made at many places and is not always right and not always wrong the same way. Thus, this patch introduces a new function "server_recalc_eweight()" which is dedicated to this task of computing ->eweight from many other elements including uweight and current time (for slowstart), and all users now switch to use this function. The patch is a bit large but the code was not trivially fixable in a way that could guarantee this situation would not occur anymore. The fix is much more readable and has been verified to work with all algorithms, with both consistent and map-based hashes, and even with static-rr. Slowstart was tested as well, just like enable/disable server. The same bug is very likely present in 1.4 as well, so the patch will probably need to be backported eventhough it will not apply as-is. Thanks to Lukas and Igor for the information they provided to reproduce it.
2013-11-21 05:22:01 -05:00
else
w = px->lbprm.wdiv;
sv->next_eweight = (sv->uweight * w + px->lbprm.wmult - 1) / px->lbprm.wmult;
BUG/MAJOR: server: weight calculation fails for map-based algorithms A crash was reported by Igor at owind when changing a server's weight on the CLI. Lukas Tribus could reproduce a related bug where setting a server's weight would result in the new weight being multiplied by the initial one. The two bugs are the same. The incorrect weight calculation results in the total farm weight being larger than what was initially allocated, causing the map index to be out of bounds on some hashes. It's easy to reproduce using "balance url_param" with a variable param, or with "balance static-rr". It appears that the calculation is made at many places and is not always right and not always wrong the same way. Thus, this patch introduces a new function "server_recalc_eweight()" which is dedicated to this task of computing ->eweight from many other elements including uweight and current time (for slowstart), and all users now switch to use this function. The patch is a bit large but the code was not trivially fixable in a way that could guarantee this situation would not occur anymore. The fix is much more readable and has been verified to work with all algorithms, with both consistent and map-based hashes, and even with static-rr. Slowstart was tested as well, just like enable/disable server. The same bug is very likely present in 1.4 as well, so the patch will probably need to be backported eventhough it will not apply as-is. Thanks to Lukas and Igor for the information they provided to reproduce it.
2013-11-21 05:22:01 -05:00
/* propagate changes only if needed (i.e. not recursively) */
if (must_update)
srv_update_status(sv, 0, SRV_OP_STCHGC_NONE);
BUG/MAJOR: server: weight calculation fails for map-based algorithms A crash was reported by Igor at owind when changing a server's weight on the CLI. Lukas Tribus could reproduce a related bug where setting a server's weight would result in the new weight being multiplied by the initial one. The two bugs are the same. The incorrect weight calculation results in the total farm weight being larger than what was initially allocated, causing the map index to be out of bounds on some hashes. It's easy to reproduce using "balance url_param" with a variable param, or with "balance static-rr". It appears that the calculation is made at many places and is not always right and not always wrong the same way. Thus, this patch introduces a new function "server_recalc_eweight()" which is dedicated to this task of computing ->eweight from many other elements including uweight and current time (for slowstart), and all users now switch to use this function. The patch is a bit large but the code was not trivially fixable in a way that could guarantee this situation would not occur anymore. The fix is much more readable and has been verified to work with all algorithms, with both consistent and map-based hashes, and even with static-rr. Slowstart was tested as well, just like enable/disable server. The same bug is very likely present in 1.4 as well, so the patch will probably need to be backported eventhough it will not apply as-is. Thanks to Lukas and Igor for the information they provided to reproduce it.
2013-11-21 05:22:01 -05:00
}
/* requeuing tasklet used to asynchronously queue the server into its tree in
* case of extreme contention. It is woken up by the code that failed to grab
* an important lock.
*/
struct task *server_requeue(struct task *t, void *context, unsigned int state)
{
struct server *srv = context;
/* let's call the LB's requeue function. If it fails, it will itself
* wake us up.
*/
if (srv->proxy->lbprm.server_requeue)
srv->proxy->lbprm.server_requeue(srv);
return t;
}
/*
* Parses weight_str and configures sv accordingly.
* Returns NULL on success, error message string otherwise.
*
* Must be called with the server lock held.
*/
const char *server_parse_weight_change_request(struct server *sv,
const char *weight_str)
{
struct proxy *px;
long int w;
char *end;
px = sv->proxy;
/* if the weight is terminated with '%', it is set relative to
* the initial weight, otherwise it is absolute.
*/
if (!*weight_str)
return "Require <weight> or <weight%>.\n";
w = strtol(weight_str, &end, 10);
if (end == weight_str)
return "Empty weight string empty or preceded by garbage\n";
else if (end[0] == '%' && end[1] == '\0') {
if (w < 0)
return "Relative weight must be positive.\n";
/* Avoid integer overflow */
if (w > 25600)
w = 25600;
w = sv->iweight * w / 100;
if (w > 256)
w = 256;
}
else if (w < 0 || w > 256)
return "Absolute weight can only be between 0 and 256 inclusive.\n";
else if (end[0] != '\0')
return "Trailing garbage in weight string\n";
if (w && w != sv->iweight && !(px->lbprm.algo & BE_LB_PROP_DYN))
return "Backend is using a static LB algorithm and only accepts weights '0%' and '100%'.\n";
sv->uweight = w;
server_recalc_eweight(sv, 1);
return NULL;
}
/*
* Must be called with the server lock held.
*/
const char *server_parse_maxconn_change_request(struct server *sv,
const char *maxconn_str)
{
long int v;
char *end;
if (!*maxconn_str)
return "Require <maxconn>.\n";
v = strtol(maxconn_str, &end, 10);
if (end == maxconn_str)
return "maxconn string empty or preceded by garbage\n";
else if (end[0] != '\0')
return "Trailing garbage in maxconn string\n";
if (sv->maxconn == sv->minconn) { // static maxconn
sv->maxconn = sv->minconn = v;
} else { // dynamic maxconn
sv->maxconn = v;
}
if (may_dequeue_tasks(sv, sv->proxy))
process_srv_queue(sv);
return NULL;
}
/* Interpret <expr> as sample expression. This function is reserved for
* internal server allocation. On parsing use parse_srv_expr() for extra sample
* check validity.
*
* Returns the allocated sample on success or NULL on error.
*/
struct sample_expr *_parse_srv_expr(char *expr, struct arg_list *args_px,
const char *file, int linenum, char **err)
{
int idx;
const char *args[] = {
expr,
NULL,
};
idx = 0;
args_px->ctx = ARGC_SRV;
return sample_parse_expr((char **)args, &idx, file, linenum, err, args_px, NULL);
}
/* Interpret <str> if not empty as a sample expression and store it into <out>.
* Contrary to _parse_srv_expr(), fetch scope validity is checked to ensure it
* is valid on a server line context. It also updates <px> HTTP mode
* requirement depending on fetch method used.
*
* Returns 0 on success else non zero.
*/
static int parse_srv_expr(char *str, struct sample_expr **out, struct proxy *px,
char **err)
{
struct sample_expr *expr;
if (!str)
return 0;
expr = _parse_srv_expr(str, &px->conf.args, px->conf.file, px->conf.line, err);
if (!expr)
return ERR_ALERT | ERR_FATAL;
if (!(expr->fetch->val & SMP_VAL_BE_SRV_CON)) {
memprintf(err, "fetch method '%s' extracts information from '%s', "
"none of which is available here.",
str, sample_src_names(expr->fetch->use));
return ERR_ALERT | ERR_FATAL;
}
px->http_needed |= !!(expr->fetch->use & SMP_USE_HTTP_ANY);
release_sample_expr(*out);
*out = expr;
return 0;
}
static void display_parser_err(const char *file, int linenum, char **args, int cur_arg, int err_code, char **err)
{
char *msg = "error encountered while processing ";
char *quote = "'";
char *token = args[cur_arg];
if (err && *err) {
indent_msg(err, 2);
msg = *err;
quote = "";
token = "";
}
if (err_code & ERR_WARN && !(err_code & ERR_ALERT))
ha_warning("%s%s%s%s.\n", msg, quote, token, quote);
else
ha_alert("%s%s%s%s.\n", msg, quote, token, quote);
}
static void srv_conn_src_sport_range_cpy(struct server *srv, const struct server *src)
{
int range_sz;
range_sz = src->conn_src.sport_range->size;
if (range_sz > 0) {
srv->conn_src.sport_range = port_range_alloc_range(range_sz);
if (srv->conn_src.sport_range != NULL) {
int i;
for (i = 0; i < range_sz; i++) {
srv->conn_src.sport_range->ports[i] =
src->conn_src.sport_range->ports[i];
}
}
}
}
/*
* Copy <src> server connection source settings to <srv> server everything needed.
*/
static void srv_conn_src_cpy(struct server *srv, const struct server *src)
{
srv->conn_src.opts = src->conn_src.opts;
srv->conn_src.source_addr = src->conn_src.source_addr;
/* Source port range copy. */
if (src->conn_src.sport_range != NULL)
srv_conn_src_sport_range_cpy(srv, src);
#ifdef CONFIG_HAP_TRANSPARENT
if (src->conn_src.bind_hdr_name != NULL) {
srv->conn_src.bind_hdr_name = strdup(src->conn_src.bind_hdr_name);
srv->conn_src.bind_hdr_len = strlen(src->conn_src.bind_hdr_name);
}
srv->conn_src.bind_hdr_occ = src->conn_src.bind_hdr_occ;
srv->conn_src.tproxy_addr = src->conn_src.tproxy_addr;
#endif
if (src->conn_src.iface_name != NULL) {
srv->conn_src.iface_name = strdup(src->conn_src.iface_name);
srv->conn_src.iface_len = src->conn_src.iface_len;
}
}
/*
* Copy <src> server SSL settings to <srv> server allocating
* everything needed.
*/
#if defined(USE_OPENSSL)
static void srv_ssl_settings_cpy(struct server *srv, const struct server *src)
{
/* <src> is the current proxy's default server and SSL is enabled */
BUG_ON(src->ssl_ctx.ctx != NULL); /* the SSL_CTX must never be initialized in a default-server */
if (srv->proxy && src == srv->proxy->defsrv && src->use_ssl == 1)
srv->flags |= SRV_F_DEFSRV_USE_SSL;
if (src->ssl_ctx.ca_file != NULL)
srv->ssl_ctx.ca_file = strdup(src->ssl_ctx.ca_file);
if (src->ssl_ctx.crl_file != NULL)
srv->ssl_ctx.crl_file = strdup(src->ssl_ctx.crl_file);
if (src->ssl_ctx.client_crt != NULL)
srv->ssl_ctx.client_crt = strdup(src->ssl_ctx.client_crt);
srv->ssl_ctx.verify = src->ssl_ctx.verify;
srv->ssl_ctx.renegotiate = src->ssl_ctx.renegotiate;
if (src->ssl_ctx.verify_host != NULL)
srv->ssl_ctx.verify_host = strdup(src->ssl_ctx.verify_host);
if (src->ssl_ctx.ciphers != NULL)
srv->ssl_ctx.ciphers = strdup(src->ssl_ctx.ciphers);
if (src->ssl_ctx.options)
srv->ssl_ctx.options = src->ssl_ctx.options;
if (src->ssl_ctx.methods.flags)
srv->ssl_ctx.methods.flags = src->ssl_ctx.methods.flags;
if (src->ssl_ctx.methods.min)
srv->ssl_ctx.methods.min = src->ssl_ctx.methods.min;
if (src->ssl_ctx.methods.max)
srv->ssl_ctx.methods.max = src->ssl_ctx.methods.max;
if (src->ssl_ctx.ciphersuites != NULL)
srv->ssl_ctx.ciphersuites = strdup(src->ssl_ctx.ciphersuites);
if (src->sni_expr != NULL)
srv->sni_expr = strdup(src->sni_expr);
if (src->ssl_ctx.alpn_str) {
srv->ssl_ctx.alpn_str = malloc(src->ssl_ctx.alpn_len);
if (srv->ssl_ctx.alpn_str) {
memcpy(srv->ssl_ctx.alpn_str, src->ssl_ctx.alpn_str,
src->ssl_ctx.alpn_len);
srv->ssl_ctx.alpn_len = src->ssl_ctx.alpn_len;
}
}
if (src->ssl_ctx.npn_str) {
srv->ssl_ctx.npn_str = malloc(src->ssl_ctx.npn_len);
if (srv->ssl_ctx.npn_str) {
memcpy(srv->ssl_ctx.npn_str, src->ssl_ctx.npn_str,
src->ssl_ctx.npn_len);
srv->ssl_ctx.npn_len = src->ssl_ctx.npn_len;
}
}
}
/* Activate ssl on server <s>.
* do nothing if there is no change to apply
*
* Must be called with the server lock held.
*/
int srv_set_ssl(struct server *s, int use_ssl)
{
if (s->use_ssl == use_ssl)
return 0;
s->use_ssl = use_ssl;
if (s->use_ssl) {
if (server_parse_exprs(s, s->proxy, NULL))
return -1;
s->xprt = xprt_get(XPRT_SSL);
}
else {
if (s->sni_expr && s->pool_conn_name && strcmp(s->sni_expr, s->pool_conn_name) == 0) {
release_sample_expr(s->pool_conn_name_expr);
s->pool_conn_name_expr = NULL;
}
BUG/MEDIUM: server: avoid changing healthcheck ctx with set server ssl While giving a fresh try to `set server ssl` (which I wrote), I realised the behavior is a bit inconsistent. Indeed when using this command over a server with ssl enabled for the data path but also for the health check path we have: - data and health check done using tls - emit `set server be_foo/srv0 ssl off` - data path and health check path becomes plain text - emit `set server be_foo/srv0 ssl on` - data path becomes tls and health check path remains plain text while I thought the end result would be: - data path and health check path comes back in tls In the current code we indeed erase all connections while deactivating, but restore only the data path while activating. I made this mistake in the past because I was testing with a case where the health check plain text by default. There are several ways to solve this issue. The cleanest one would probably be to avoid changing the health check connection when we use `set server ssl` command, and create a new command `set server ssl-check` to change this. For now I assumed this would be ok to simply avoid changing the health check path and be more consistent. This patch tries to address that and also update the documentation. It should not break the existing usage with health check on plain text, as in this case they should have `no-check-ssl` in defaults. Without this patch, it makes the command unusable in an env where you have a list of server to add along the way with initial `server-template`, and all using tls for data and healthcheck path. For 2.6 we should probably reconsider and add `set server ssl-check` command for better granularity of cases. If this solution is accepted, this patch should be backported up to >= 2.4. The alternative solution was to restore the previous state, but I believe this will create even more confusion in the future. Signed-off-by: William Dauchy <wdauchy@gmail.com>
2022-01-06 10:57:15 -05:00
s->xprt = xprt_get(XPRT_RAW);
}
/* Check if we must rely on the server XPRT for the health-check */
if (!s->check.port && !is_addr(&s->check.addr) && !s->check.use_ssl)
s->check.xprt = s->xprt;
return 0;
}
#endif /* USE_OPENSSL */
/*
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
* Prepare <srv> for hostname resolution.
* May be safely called with a default server as <src> argument (without hostname).
* Returns -1 in case of any allocation failure, 0 if not.
*/
int srv_prepare_for_resolution(struct server *srv, const char *hostname)
{
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
char *hostname_dn;
int hostname_len, hostname_dn_len;
if (!hostname)
return 0;
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
hostname_len = strlen(hostname);
hostname_dn = trash.area;
hostname_dn_len = resolv_str_to_dn_label(hostname, hostname_len,
hostname_dn, trash.size);
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
if (hostname_dn_len == -1)
goto err;
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
free(srv->hostname);
free(srv->hostname_dn);
srv->hostname = strdup(hostname);
srv->hostname_dn = strdup(hostname_dn);
srv->hostname_dn_len = hostname_dn_len;
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
if (!srv->hostname || !srv->hostname_dn)
goto err;
return 0;
err:
ha_free(&srv->hostname);
ha_free(&srv->hostname_dn);
return -1;
}
/* Initialize default values for <srv>. Used both for dynamic servers and
* default servers. The latter are not initialized via new_server(), hence this
* function purpose. For static servers, srv_settings_cpy() is used instead
* reusing their default server instance.
*/
void srv_settings_init(struct server *srv)
{
srv->check.inter = DEF_CHKINTR;
srv->check.fastinter = 0;
srv->check.downinter = 0;
srv->check.rise = DEF_RISETIME;
srv->check.fall = DEF_FALLTIME;
srv->check.port = 0;
srv->agent.inter = DEF_CHKINTR;
srv->agent.fastinter = 0;
srv->agent.downinter = 0;
srv->agent.rise = DEF_AGENT_RISETIME;
srv->agent.fall = DEF_AGENT_FALLTIME;
srv->agent.port = 0;
srv->init_state = SRV_INIT_STATE_UP;
srv->maxqueue = 0;
srv->minconn = 0;
srv->maxconn = 0;
srv->max_reuse = -1;
srv->max_idle_conns = -1;
srv->pool_purge_delay = 5000;
srv->slowstart = 0;
srv->onerror = DEF_HANA_ONERR;
srv->consecutive_errors_limit = DEF_HANA_ERRLIMIT;
srv->uweight = srv->iweight = 1;
#ifdef USE_QUIC
srv->quic_max_cwnd = quic_tune.be.cc_max_win_size;
#endif
LIST_INIT(&srv->pp_tlvs);
}
/*
* Copy <src> server settings to <srv> server allocating
* everything needed. This is used to pre-initialize a server from
* default-server settings. If the source is NULL (i.e. no defsrv)
* then we fall back to srv_settings_init() to pre-initialize a
* clean new server.
*
* This function is not supposed to be called at any time, but only
* during server settings parsing or during server allocations from
* a server template, and just after having calloc()'ed a new server.
* So, <src> may only be a default server (when parsing server settings)
* or a server template (during server allocations from a server template).
* <srv_tmpl> distinguishes these two cases (must be 1 if <srv> is a template,
* 0 if not).
*/
void srv_settings_cpy(struct server *srv, const struct server *src, int srv_tmpl)
{
struct srv_pp_tlv_list *srv_tlv = NULL, *new_srv_tlv = NULL;
if (!src) {
srv_settings_init(srv);
return;
}
/* Connection source settings copy */
srv_conn_src_cpy(srv, src);
if (srv_tmpl) {
srv->addr = src->addr;
srv->addr_type = src->addr_type;
srv->svc_port = src->svc_port;
}
srv->pp_opts = src->pp_opts;
if (src->rdr_pfx != NULL) {
srv->rdr_pfx = strdup(src->rdr_pfx);
srv->rdr_len = src->rdr_len;
}
if (src->cookie != NULL) {
srv->cookie = strdup(src->cookie);
srv->cklen = src->cklen;
}
srv->use_ssl = src->use_ssl;
srv->check.addr = src->check.addr;
srv->agent.addr = src->agent.addr;
srv->check.use_ssl = src->check.use_ssl;
srv->check.port = src->check.port;
if (src->check.sni != NULL)
srv->check.sni = strdup(src->check.sni);
if (src->check.alpn_str) {
srv->check.alpn_str = malloc(src->check.alpn_len);
if (srv->check.alpn_str) {
memcpy(srv->check.alpn_str, src->check.alpn_str,
src->check.alpn_len);
srv->check.alpn_len = src->check.alpn_len;
}
}
if (!(srv->flags & SRV_F_RHTTP))
srv->check.reuse_pool = src->check.reuse_pool;
if (src->check.pool_conn_name)
srv->check.pool_conn_name = strdup(src->check.pool_conn_name);
/* Note: 'flags' field has potentially been already initialized. */
srv->flags |= src->flags;
srv->do_check = src->do_check;
srv->do_agent = src->do_agent;
srv->check.inter = src->check.inter;
srv->check.fastinter = src->check.fastinter;
srv->check.downinter = src->check.downinter;
srv->agent.use_ssl = src->agent.use_ssl;
srv->agent.port = src->agent.port;
if (src->agent.tcpcheck_rules) {
srv->agent.tcpcheck_rules = calloc(1, sizeof(*srv->agent.tcpcheck_rules));
if (srv->agent.tcpcheck_rules) {
srv->agent.tcpcheck_rules->flags = src->agent.tcpcheck_rules->flags;
srv->agent.tcpcheck_rules->list = src->agent.tcpcheck_rules->list;
LIST_INIT(&srv->agent.tcpcheck_rules->preset_vars);
dup_tcpcheck_vars(&srv->agent.tcpcheck_rules->preset_vars,
&src->agent.tcpcheck_rules->preset_vars);
}
}
srv->agent.inter = src->agent.inter;
srv->agent.fastinter = src->agent.fastinter;
srv->agent.downinter = src->agent.downinter;
srv->maxqueue = src->maxqueue;
srv->ws = src->ws;
srv->minconn = src->minconn;
srv->maxconn = src->maxconn;
srv->slowstart = src->slowstart;
srv->hash_key = src->hash_key;
srv->observe = src->observe;
srv->onerror = src->onerror;
srv->onmarkeddown = src->onmarkeddown;
srv->onmarkedup = src->onmarkedup;
if (src->trackit != NULL)
srv->trackit = strdup(src->trackit);
srv->consecutive_errors_limit = src->consecutive_errors_limit;
srv->uweight = srv->iweight = src->iweight;
srv->check.send_proxy = src->check.send_proxy;
/* health: up, but will fall down at first failure */
srv->check.rise = srv->check.health = src->check.rise;
srv->check.fall = src->check.fall;
/* Here we check if 'disabled' is the default server state */
if (src->next_admin & (SRV_ADMF_CMAINT | SRV_ADMF_FMAINT)) {
srv->next_admin |= SRV_ADMF_CMAINT | SRV_ADMF_FMAINT;
srv->next_state = SRV_ST_STOPPED;
srv->check.state |= CHK_ST_PAUSED;
srv->check.health = 0;
}
/* health: up but will fall down at first failure */
srv->agent.rise = srv->agent.health = src->agent.rise;
srv->agent.fall = src->agent.fall;
if (src->resolvers_id != NULL)
srv->resolvers_id = strdup(src->resolvers_id);
srv->resolv_opts.family_prio = src->resolv_opts.family_prio;
srv->resolv_opts.accept_duplicate_ip = src->resolv_opts.accept_duplicate_ip;
srv->resolv_opts.ignore_weight = src->resolv_opts.ignore_weight;
if (srv->resolv_opts.family_prio == AF_UNSPEC)
srv->resolv_opts.family_prio = AF_INET6;
memcpy(srv->resolv_opts.pref_net,
src->resolv_opts.pref_net,
sizeof srv->resolv_opts.pref_net);
srv->resolv_opts.pref_net_nb = src->resolv_opts.pref_net_nb;
srv->init_addr_methods = src->init_addr_methods;
srv->init_addr = src->init_addr;
srv->init_state = src->init_state;
#if defined(USE_OPENSSL)
srv_ssl_settings_cpy(srv, src);
#endif
#ifdef TCP_MD5SIG
if (src->tcp_md5sig != NULL)
srv->tcp_md5sig = strdup(src->tcp_md5sig);
#endif
if (src->cc_algo != NULL)
srv->cc_algo = strdup(src->cc_algo);
#ifdef TCP_USER_TIMEOUT
srv->tcp_ut = src->tcp_ut;
#endif
srv->mux_proto = src->mux_proto;
if (srv->pool_conn_name)
srv->pool_conn_name = strdup(srv->pool_conn_name);
srv->pool_purge_delay = src->pool_purge_delay;
MEDIUM: server: add a new pool-low-conn server setting The problem with the way idle connections currently work is that it's easy for a thread to steal all of its siblings' connections, then release them, then it's done by another one, etc. This happens even more easily due to scheduling latencies, or merged events inside the same pool loop, which, when dealing with a fast server responding in sub-millisecond delays, can really result in one thread being fully at work at a time. In such a case, we perform a huge amount of takeover() which consumes CPU and requires quite some locking, sometimes resulting in lower performance than expected. In order to fight against this problem, this patch introduces a new server setting "pool-low-conn", whose purpose is to dictate when it is allowed to steal connections from a sibling. As long as the number of idle connections remains at least as high as this value, it is permitted to take over another connection. When the idle connection count becomes lower, a thread may only use its own connections or create a new one. By proceeding like this even with a low number (typically 2*nbthreads), we quickly end up in a situation where all active threads have a few connections. It then becomes possible to connect to a server without bothering other threads the vast majority of the time, while still being able to use these connections when the number of available FDs becomes low. We also use this threshold instead of global.nbthread in the connection release logic, allowing to keep more extra connections if needed. A test performed with 10000 concurrent HTTP/1 connections, 16 threads and 210 servers with 1 millisecond of server response time showed the following numbers: haproxy 2.1.7: 185000 requests per second haproxy 2.2: 314000 requests per second haproxy 2.2 lowconn 32: 352000 requests per second The takeover rate goes down from 300k/s to 13k/s. The difference is further amplified as the response time shrinks.
2020-07-01 01:43:51 -04:00
srv->low_idle_conns = src->low_idle_conns;
srv->max_idle_conns = src->max_idle_conns;
srv->max_reuse = src->max_reuse;
#ifdef USE_QUIC
srv->quic_cc_algo = src->quic_cc_algo;
srv->quic_max_cwnd = src->quic_max_cwnd;
#endif
if (srv_tmpl)
srv->srvrq = src->srvrq;
srv->netns = src->netns;
srv->check.via_socks4 = src->check.via_socks4;
srv->socks4_addr = src->socks4_addr;
srv->log_bufsize = src->log_bufsize;
LIST_INIT(&srv->pp_tlvs);
list_for_each_entry(srv_tlv, &src->pp_tlvs, list) {
new_srv_tlv = malloc(sizeof(*new_srv_tlv));
if (unlikely(!new_srv_tlv)) {
break;
}
new_srv_tlv->fmt_string = strdup(srv_tlv->fmt_string);
if (unlikely(!new_srv_tlv->fmt_string)) {
free(new_srv_tlv);
break;
}
new_srv_tlv->type = srv_tlv->type;
lf_expr_init(&new_srv_tlv->fmt);
if (srv_tmpl) {
if (new_srv_tlv->fmt_string && unlikely(!parse_logformat_string(new_srv_tlv->fmt_string,
srv->proxy, &new_srv_tlv->fmt, 0, SMP_VAL_BE_SRV_CON, NULL))) {
free(new_srv_tlv->fmt_string);
free(new_srv_tlv);
break;
}
}
LIST_APPEND(&srv->pp_tlvs, &new_srv_tlv->list);
}
}
/* Allocates a server, attaches it to the global servers_list
MEDIUM: server: automatically add server to proxy list in new_server() while new_server() takes the parent proxy as argument and even assigns srv->proxy to the parent proxy, it didn't actually inserted the server to the parent proxy server list on success. The result is that sometimes we add the server to the list after new_server() is called, and sometimes we don't. This is really error-prone and because of that hooks such as REGISTER_POST_SERVER_CHECK() which as run for all servers listed in all proxies may not be relied upon for servers which are not actually inserted in their parent proxy server list. Plus it feels very strange to have a server that points to a proxy, but then the proxy doesn't know about it because it cannot find it in its server list. To prevent errors and make proxy->srv list reliable, we move the insertion logic directly under new_server(). This requires to know if we are called during parsing or during runtime to either insert or append the server to the parent proxy list. For that we use PR_FL_CHECKED flag from the parent proxy (if the flag is set, then the proxy was checked so we are past the init phase, thus we assume we are called during runtime) This implies that during startup if new_server() has to be cancelled on error paths we need to call srv_detach() (which is now exposed in server.h) before srv_drop(). The consequence of this commit is that REGISTER_POST_SERVER_CHECK() should not run reliably on all servers created using new_server() (without having to manually loop on global servers_list)
2025-05-09 13:24:55 -04:00
* and adds it to <proxy> server list. Before deleting the server with
* srv_drop(), srv_detach() must be called to remove it from the parent
* proxy list
*
* Returns the server on success, otherwise NULL.
*/
struct server *new_server(struct proxy *proxy)
{
struct server *srv;
srv = srv_alloc();
if (!srv)
return NULL;
srv_take(srv);
srv->obj_type = OBJ_TYPE_SERVER;
srv->proxy = proxy;
MT_LIST_APPEND(&servers_list, &srv->global_list);
LIST_INIT(&srv->srv_rec_item);
LIST_INIT(&srv->ip_rec_item);
LIST_INIT(&srv->pp_tlvs);
event_hdl_sub_list_init(&srv->e_subs);
srv->rid = 0; /* rid defaults to 0 */
srv->next_state = SRV_ST_RUNNING; /* early server setup */
srv->last_change = ns_to_sec(now_ns);
srv->check.obj_type = OBJ_TYPE_CHECK;
srv->check.status = HCHK_STATUS_INI;
srv->check.server = srv;
srv->check.proxy = proxy;
srv->check.tcpcheck_rules = &proxy->tcpcheck_rules;
srv->agent.obj_type = OBJ_TYPE_CHECK;
srv->agent.status = HCHK_STATUS_INI;
srv->agent.server = srv;
srv->agent.proxy = proxy;
srv->xprt = srv->check.xprt = srv->agent.xprt = xprt_get(XPRT_RAW);
guid_init(&srv->guid);
2024-10-23 05:33:34 -04:00
MT_LIST_INIT(&srv->watcher_list);
srv->extra_counters = NULL;
#ifdef USE_OPENSSL
HA_RWLOCK_INIT(&srv->ssl_ctx.lock);
#endif
MEDIUM: server: automatically add server to proxy list in new_server() while new_server() takes the parent proxy as argument and even assigns srv->proxy to the parent proxy, it didn't actually inserted the server to the parent proxy server list on success. The result is that sometimes we add the server to the list after new_server() is called, and sometimes we don't. This is really error-prone and because of that hooks such as REGISTER_POST_SERVER_CHECK() which as run for all servers listed in all proxies may not be relied upon for servers which are not actually inserted in their parent proxy server list. Plus it feels very strange to have a server that points to a proxy, but then the proxy doesn't know about it because it cannot find it in its server list. To prevent errors and make proxy->srv list reliable, we move the insertion logic directly under new_server(). This requires to know if we are called during parsing or during runtime to either insert or append the server to the parent proxy list. For that we use PR_FL_CHECKED flag from the parent proxy (if the flag is set, then the proxy was checked so we are past the init phase, thus we assume we are called during runtime) This implies that during startup if new_server() has to be cancelled on error paths we need to call srv_detach() (which is now exposed in server.h) before srv_drop(). The consequence of this commit is that REGISTER_POST_SERVER_CHECK() should not run reliably on all servers created using new_server() (without having to manually loop on global servers_list)
2025-05-09 13:24:55 -04:00
// add server to proxy list:
/* TODO use a double-linked list for px->srv */
if (!(proxy->flags & PR_FL_CHECKED) || !proxy->srv) {
/* they are linked backwards first during parsing
* This will be restablished after parsing.
*/
srv->next = proxy->srv;
proxy->srv = srv;
}
else {
struct server *sv = proxy->srv;
// runtime, add the server at the end of the list
while (sv && sv->next)
sv = sv->next;
sv->next = srv;
}
HA_RWLOCK_INIT(&srv->path_params.param_lock);
return srv;
}
/* Increment the server refcount. */
void srv_take(struct server *srv)
{
HA_ATOMIC_INC(&srv->refcount);
}
/* deallocate common server parameters (may be used by default-servers) */
void srv_free_params(struct server *srv)
{
struct srv_pp_tlv_list *srv_tlv = NULL;
free(srv->cookie);
free(srv->rdr_pfx);
free(srv->hostname);
free(srv->hostname_dn);
free((char*)srv->conf.file);
ha_aligned_free(srv->per_thr);
ha_aligned_free(srv->per_tgrp);
ha_aligned_free(srv->curr_idle_thr);
free(srv->pool_conn_name);
release_sample_expr(srv->pool_conn_name_expr);
free(srv->resolvers_id);
free(srv->cc_algo);
free(srv->tcp_md5sig);
free(srv->addr_key);
MEDIUM: stats: avoid 1 indirection by storing the shared stats directly in counters struct Between 3.2 and 3.3-dev we noticed a noticeable performance regression due to stats handling. After bisecting, Willy found out that recent work to split stats computing accross multiple thread groups (stats sharding) was responsible for that performance regression. We're looking at roughly 20% performance loss. More precisely, it is the added indirections, multiplied by the number of statistics that are updated for each request, which in the end causes a significant amount of time being spent resolving pointers. We noticed that the fe_counters_shared and be_counters_shared structures which are currently allocated in dedicated memory since a0dcab5c ("MAJOR: counters: add shared counters base infrastructure") are no longer huge since 16eb0fab31 ("MAJOR: counters: dispatch counters over thread groups") because they now essentially hold flags plus the per-thread group id pointer mapping, not the counters themselves. As such we decided to try merging fe_counters_shared and be_counters_shared in their parent structures. The cost is slight memory overhead for the parent structure, but it allows to get rid of one pointer indirection. This patch alone yields visible performance gains and almost restores 3.2 stats performance. counters_fe_shared_get() was renamed to counters_fe_shared_prepare() and now returns either failure or success instead of a pointer because we don't need to retrieve a shared pointer anymore, the function takes care of initializing existing pointer.
2025-07-22 11:15:02 -04:00
counters_be_shared_drop(&srv->counters.shared);
if (srv->log_target) {
deinit_log_target(srv->log_target);
free(srv->log_target);
}
free(srv->tmpl_info.prefix);
if (xprt_get(XPRT_SSL) && xprt_get(XPRT_SSL)->destroy_srv)
xprt_get(XPRT_SSL)->destroy_srv(srv);
else if (xprt_get(XPRT_QUIC) && xprt_get(XPRT_QUIC)->destroy_srv)
xprt_get(XPRT_QUIC)->destroy_srv(srv);
while (!LIST_ISEMPTY(&srv->pp_tlvs)) {
srv_tlv = LIST_ELEM(srv->pp_tlvs.n, struct srv_pp_tlv_list *, list);
LIST_DEL_INIT(&srv_tlv->list);
lf_expr_deinit(&srv_tlv->fmt);
ha_free(&srv_tlv->fmt_string);
ha_free(&srv_tlv);
}
}
/* Deallocate a server <srv> and its member. <srv> must be allocated. For
* dynamic servers, its refcount is decremented first. The free operations are
* conducted only if the refcount is nul.
*
* A general rule is to assume that proxy may already be freed, so cleanup checks
* must not depend on the proxy
*
* As a convenience, <srv.next> is returned if srv is not NULL. It may be useful
* when calling srv_drop on the list of servers.
*/
struct server *srv_drop(struct server *srv)
{
struct server *next = NULL;
struct proxy *px = NULL;
int i __maybe_unused;
if (!srv)
goto end;
next = srv->next;
/* If srv was deleted, a proxy refcount must be dropped. */
if (srv->flags & SRV_F_DELETED)
px = srv->proxy;
/* For dynamic servers, decrement the reference counter. Only free the
* server when reaching zero.
*/
if (HA_ATOMIC_SUB_FETCH(&srv->refcount, 1))
goto end;
BUG/MEDIUM: server: fix race on server_atomic_sync() The following patch fixes a race condition during server addr/port update : cd994407a9545a8d84e410dc0cc18c30966b70d8 BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates The new update mechanism is implemented via an event update. It uses thread isolation to guarantee that no other thread is accessing server addr/port. Furthermore, to ensure server instance is not deleted just before the event handler, server instance is lookup via its ID in proxy tree. However, thread isolation is only entered after server lookup. This leaves a tiny race condition as the thread will be marked as harmless and a concurrent thread can delete the server in the meantime. This causes server_atomic_sync() to manipulated a deleted server instance to reinsert it in used_server_addr backend tree. This can cause a segfault during this operation or possibly on a future used_server_addr tree access. This issue was detected by criteo. Several backtraces were retrieved, each related to server addr_node insert or delete operation, either in srv_set_addr_desc(), or add/delete dynamic server handlers. To fix this, simply extend thread isolation section to start it before server lookup. This ensures that once retrieved the server cannot be deleted until its addr/port are updated. To ensure this issue won't happen anymore, a new BUG_ON() is added in srv_set_addr_desc(). Also note that ebpt_delete() is now called every time on delete handler as this is a safe idempotent operation. To reproduce these crashes, a script was executed to add then remove different servers every second. In parallel, the following CLI command was issued repeatdly without any delay to force multiple update on servers port : set server <srv> addr 0.0.0.0 port $((1024 + RANDOM % 1024)) This must be backported at least up to 3.0. If above mentionned patch has been selected for previous version, this commit must also be backported on them.
2024-07-02 12:14:57 -04:00
/* This BUG_ON() is invalid for now as server released on deinit will
* trigger it as they are not properly removed from their tree.
*/
//BUG_ON(ceb_intree(&srv->addr_node) ||
BUG/MEDIUM: server: fix race on server_atomic_sync() The following patch fixes a race condition during server addr/port update : cd994407a9545a8d84e410dc0cc18c30966b70d8 BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates The new update mechanism is implemented via an event update. It uses thread isolation to guarantee that no other thread is accessing server addr/port. Furthermore, to ensure server instance is not deleted just before the event handler, server instance is lookup via its ID in proxy tree. However, thread isolation is only entered after server lookup. This leaves a tiny race condition as the thread will be marked as harmless and a concurrent thread can delete the server in the meantime. This causes server_atomic_sync() to manipulated a deleted server instance to reinsert it in used_server_addr backend tree. This can cause a segfault during this operation or possibly on a future used_server_addr tree access. This issue was detected by criteo. Several backtraces were retrieved, each related to server addr_node insert or delete operation, either in srv_set_addr_desc(), or add/delete dynamic server handlers. To fix this, simply extend thread isolation section to start it before server lookup. This ensures that once retrieved the server cannot be deleted until its addr/port are updated. To ensure this issue won't happen anymore, a new BUG_ON() is added in srv_set_addr_desc(). Also note that ebpt_delete() is now called every time on delete handler as this is a safe idempotent operation. To reproduce these crashes, a script was executed to add then remove different servers every second. In parallel, the following CLI command was issued repeatdly without any delay to force multiple update on servers port : set server <srv> addr 0.0.0.0 port $((1024 + RANDOM % 1024)) This must be backported at least up to 3.0. If above mentionned patch has been selected for previous version, this commit must also be backported on them.
2024-07-02 12:14:57 -04:00
// srv->idle_node.node.leaf_p ||
// srv->conf.id.node.leaf_p ||
// ceb_intree(&srv->name_node));
BUG/MEDIUM: server: fix race on server_atomic_sync() The following patch fixes a race condition during server addr/port update : cd994407a9545a8d84e410dc0cc18c30966b70d8 BUG/MAJOR: server/addr: fix a race during server addr:svc_port updates The new update mechanism is implemented via an event update. It uses thread isolation to guarantee that no other thread is accessing server addr/port. Furthermore, to ensure server instance is not deleted just before the event handler, server instance is lookup via its ID in proxy tree. However, thread isolation is only entered after server lookup. This leaves a tiny race condition as the thread will be marked as harmless and a concurrent thread can delete the server in the meantime. This causes server_atomic_sync() to manipulated a deleted server instance to reinsert it in used_server_addr backend tree. This can cause a segfault during this operation or possibly on a future used_server_addr tree access. This issue was detected by criteo. Several backtraces were retrieved, each related to server addr_node insert or delete operation, either in srv_set_addr_desc(), or add/delete dynamic server handlers. To fix this, simply extend thread isolation section to start it before server lookup. This ensures that once retrieved the server cannot be deleted until its addr/port are updated. To ensure this issue won't happen anymore, a new BUG_ON() is added in srv_set_addr_desc(). Also note that ebpt_delete() is now called every time on delete handler as this is a safe idempotent operation. To reproduce these crashes, a script was executed to add then remove different servers every second. In parallel, the following CLI command was issued repeatdly without any delay to force multiple update on servers port : set server <srv> addr 0.0.0.0 port $((1024 + RANDOM % 1024)) This must be backported at least up to 3.0. If above mentionned patch has been selected for previous version, this commit must also be backported on them.
2024-07-02 12:14:57 -04:00
guid_remove(&srv->guid);
if (srv->requeue_tasklet)
tasklet_kill(srv->requeue_tasklet);
task_destroy(srv->warmup);
task_destroy(srv->srvrq_check);
free(srv->id);
#ifdef USE_QUIC
if (srv->per_thr) {
for (i = 0; i < global.nbthread; i++)
istfree(&srv->per_thr[i].quic_retry_token);
}
#endif
EXTRA_COUNTERS_FREE(srv->extra_counters);
srv_free_params(srv);
HA_SPIN_DESTROY(&srv->lock);
MT_LIST_DELETE(&srv->global_list);
event_hdl_sub_list_destroy(&srv->e_subs);
srv_free(&srv);
proxy_drop(px);
end:
return next;
}
/* Remove a server <srv> from a tracking list if <srv> is tracking another
* server. No special care is taken if <srv> is tracked itself by another one :
* this situation should be avoided by the caller.
*
* Not thread-safe.
*/
static void release_server_track(struct server *srv)
{
struct server *strack = srv->track;
struct server **base;
if (!strack)
return;
for (base = &strack->trackers; *base; base = &((*base)->tracknext)) {
if (*base == srv) {
*base = srv->tracknext;
return;
}
}
/* srv not found on the tracking list, this should never happen */
BUG_ON(!*base);
}
/*
* Parse as much as possible such a range string argument: low[-high]
* Set <nb_low> and <nb_high> values so that they may be reused by this loop
* for(int i = nb_low; i <= nb_high; i++)... with nb_low >= 1.
* Fails if 'low' < 0 or 'high' is present and not higher than 'low'.
* Returns 0 if succeeded, -1 if not.
*/
static int _srv_parse_tmpl_range(struct server *srv, const char *arg,
int *nb_low, int *nb_high)
{
char *nb_high_arg;
*nb_high = 0;
chunk_printf(&trash, "%s", arg);
*nb_low = atoi(trash.area);
if ((nb_high_arg = strchr(trash.area, '-'))) {
*nb_high_arg++ = '\0';
*nb_high = atoi(nb_high_arg);
}
else {
*nb_high += *nb_low;
*nb_low = 1;
}
if (*nb_low < 0 || *nb_high < *nb_low)
return -1;
return 0;
}
/* Parse as much as possible such a range string argument: low[-high]
* Set <nb_low> and <nb_high> values so that they may be reused by this loop
* for(int i = nb_low; i <= nb_high; i++)... with nb_low >= 1.
*
* This function is first intended to be used through parse_server to
* initialize a new server on startup.
*
* Fails if 'low' < 0 or 'high' is present and not higher than 'low'.
* Returns 0 if succeeded, -1 if not.
*/
static inline void _srv_parse_set_id_from_prefix(struct server *srv,
const char *prefix, int nb)
{
chunk_printf(&trash, "%s%d", prefix, nb);
free(srv->id);
srv->id = strdup(trash.area);
}
/* Parse the sni and pool-conn-name expressions. Returns 0 on success and non-zero on
* error. */
int server_parse_exprs(struct server *srv, struct proxy *px, char **errmsg)
{
int ret = 0;
if (srv->use_ssl == 1) {
/* Use sni as fallback if pool_conn_name isn't set, but only if
* the server is configured to use SSL */
if (!srv->pool_conn_name && srv->sni_expr) {
srv->pool_conn_name = strdup(srv->sni_expr);
if (!srv->pool_conn_name) {
memprintf(errmsg, "cannot duplicate sni expression (out of memory)");
ret = ERR_ALERT | ERR_FATAL;
goto out;
}
}
}
if (srv->sni_expr && !srv->ssl_ctx.sni) {
ret = parse_srv_expr(srv->sni_expr, &srv->ssl_ctx.sni, px, errmsg);
if (ret)
goto out;
}
if (srv->pool_conn_name && !srv->pool_conn_name_expr) {
ret = parse_srv_expr(srv->pool_conn_name, &srv->pool_conn_name_expr, px, errmsg);
if (ret)
goto out;
}
out:
return ret;
}
/* Fill <srv> SNI expression to reuse the host header on outgoing requests.
*
* Returns 0 on success else non-zero. On error, <err_code> and <err> message
* are both set.
*/
int srv_configure_auto_sni(struct server *srv, int *err_code, char **err)
{
srv->sni_expr = strdup("req.hdr(host),field(1,:)");
if (!srv->sni_expr) {
memprintf(err, "out of memory while generating server auto SNI expression");
*err_code |= ERR_ALERT | ERR_ABORT;
return 1;
}
if (server_parse_exprs(srv, srv->proxy, err)) {
memprintf(err, "failed to parse auto SNI expression: %s", *err);
*err_code |= ERR_ALERT | ERR_FATAL;
return 1;
}
return 0;
}
/* Initialize as much as possible servers from <srv> server template.
* Note that a server template is a special server with
* a few different parameters than a server which has
* been parsed mostly the same way as a server.
*
* This function is first intended to be used through parse_server to
* initialize a new server on startup.
*
* Returns the number of servers successfully allocated,
* 'srv' template included.
*/
static int _srv_parse_tmpl_init(struct server *srv, struct proxy *px)
{
int i;
struct server *newsrv;
/* Set the first server's ID. */
_srv_parse_set_id_from_prefix(srv, srv->tmpl_info.prefix, srv->tmpl_info.nb_low);
cebis_item_insert(&curproxy->conf.used_server_name, conf.name_node, id, srv);
/* then create other servers from this one */
for (i = srv->tmpl_info.nb_low + 1; i <= srv->tmpl_info.nb_high; i++) {
newsrv = new_server(px);
if (!newsrv)
goto err;
newsrv->conf.file = strdup(srv->conf.file);
newsrv->conf.line = srv->conf.line;
srv_settings_cpy(newsrv, srv, 1);
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
srv_prepare_for_resolution(newsrv, srv->hostname);
if (server_parse_exprs(newsrv, px, NULL))
goto err;
/* append to list of servers available to receive an hostname */
if (newsrv->srvrq)
LIST_APPEND(&newsrv->srvrq->attached_servers, &newsrv->srv_rec_item);
/* Set this new server ID. */
_srv_parse_set_id_from_prefix(newsrv, srv->tmpl_info.prefix, i);
cebis_item_insert(&curproxy->conf.used_server_name, conf.name_node, id, newsrv);
}
return i - srv->tmpl_info.nb_low;
err:
if (newsrv) {
release_sample_expr(newsrv->ssl_ctx.sni);
free_check(&newsrv->agent);
free_check(&newsrv->check);
MT_LIST_DELETE(&newsrv->global_list);
srv_detach(newsrv);
}
srv_drop(newsrv);
return i - srv->tmpl_info.nb_low;
}
/* Ensure server config will work with effective proxy mode
*
* This function is expected to be called after _srv_parse_init() initialization
* but only when the effective server's proxy mode is known, which is not always
* the case during parsing time, in which case the function will be called during
* postparsing thanks to the srv_postinit() below.
*
* Returns ERR_NONE on success else a combination or ERR_CODE.
*/
static int _srv_check_proxy_mode(struct server *srv, char postparse)
{
int err_code = ERR_NONE;
if (postparse && !(srv->proxy->cap & PR_CAP_LB))
return ERR_NONE; /* nothing to do, the check was already performed during parsing */
if (srv->conf.file)
set_usermsgs_ctx(srv->conf.file, srv->conf.line, NULL);
if (!srv->proxy) {
/* proxy mode not known, cannot perform checks (ie: defaults section) */
goto out;
}
if (srv->proxy->mode == PR_MODE_SYSLOG) {
/* log backend server (belongs to proxy with mode log enabled):
* perform some compatibility checks
*/
/* supported address family types are:
* - ipv4
* - ipv6
* (UNSPEC is supported because it means it will be resolved later)
*/
if (srv->addr.ss_family != AF_UNSPEC &&
srv->addr.ss_family != AF_INET && srv->addr.ss_family != AF_INET6) {
ha_alert("log server address family not supported for log backend server.\n");
err_code |= ERR_ALERT | ERR_FATAL;
goto out;
}
/* only @tcp or @udp address forms (or equivalent) are supported */
if (!(srv->addr_type.xprt_type == PROTO_TYPE_DGRAM && srv->addr_type.proto_type == PROTO_TYPE_DGRAM) &&
!(srv->addr_type.xprt_type == PROTO_TYPE_STREAM && srv->addr_type.proto_type == PROTO_TYPE_STREAM)) {
ha_alert("log server address type not supported for log backend server.\n");
err_code |= ERR_ALERT | ERR_FATAL;
}
}
else {
/* for all other proxy modes: only TCP expected as srv's transport type for now */
if (srv->addr_type.xprt_type != PROTO_TYPE_STREAM) {
ha_alert("unsupported transport for server address in '%s' backend.\n", proxy_mode_str(srv->proxy->mode));
err_code |= ERR_ALERT | ERR_FATAL;
}
}
if (srv->proxy->mode != PR_MODE_TCP && srv->proxy->mode != PR_MODE_HTTP &&
srv->pp_opts) {
srv->pp_opts = 0;
ha_warning("'send-proxy*' server option is unsupported there, ignoring it\n");
err_code |= ERR_WARN;
}
out:
if (srv->conf.file)
reset_usermsgs_ctx();
return err_code;
}
/* Finish initializing the server after parsing and before config checks
*
* Returns ERR_NONE on success else a combination or ERR_CODE.
*/
static int srv_init_per_thr(struct server *srv);
int srv_preinit(struct server *srv)
{
int err_code = ERR_NONE;
if (srv_init_per_thr(srv) == -1) {
ha_alert("error during per-thread init for %s/%s server\n", srv->proxy->id, srv->id);
err_code |= ERR_ALERT | ERR_FATAL;
goto out;
}
out:
return err_code;
}
/* Finish initializing the server after parsing and config checks
*
* We must be careful that checks / postinits performed within this function
* don't depend or conflict with other postcheck functions that are registered
* using REGISTER_POST_SERVER_CHECK() hook.
*
* Returns ERR_NONE on success else a combination or ERR_CODE.
*/
static int init_srv_requeue(struct server *srv);
static int init_srv_slowstart(struct server *srv);
int srv_postinit(struct server *srv)
{
int err_code = ERR_NONE;
char *errmsg = NULL;
err_code |= _srv_check_proxy_mode(srv, 1);
if (err_code & ERR_CODE)
goto out;
if (!counters_be_shared_prepare(&srv->counters.shared, &srv->guid, &errmsg)) {
ha_alert("memory error while setting up shared counters for %s/%s server : %s\n", srv->proxy->id, srv->id, errmsg);
ha_free(&errmsg);
err_code |= ERR_ALERT | ERR_FATAL;
goto out;
}
if (srv->flags & SRV_F_DYNAMIC) {
/* A dynamic server is disabled on startup */
srv->next_admin = SRV_ADMF_FMAINT;
srv->next_state = SRV_ST_STOPPED;
server_recalc_eweight(srv, 0); // relies on srv counters
srv_lb_commit_status(srv);
}
err_code |= init_srv_requeue(srv);
if (err_code & ERR_CODE)
goto out;
err_code |= init_srv_slowstart(srv);
if (err_code & ERR_CODE)
goto out;
/* initialize idle conns lists */
if (srv->max_idle_conns != 0) {
srv->curr_idle_thr = ha_aligned_zalloc(64, global.nbthread * sizeof(*srv->curr_idle_thr));
if (!srv->curr_idle_thr) {
ha_alert("memory error during idle conn list init for %s/%s server\n",
srv->proxy->id, srv->id);
err_code |= ERR_ALERT | ERR_FATAL;
goto out;
}
}
out:
return err_code;
}
REGISTER_POST_SERVER_CHECK(srv_postinit);
/* Allocate a new server pointed by <srv> and try to parse the first arguments
* in <args> as an address for a server or an address-range for a template or
* nothing for a default-server. <cur_arg> is incremented to the next argument.
*
* This function is first intended to be used through parse_server to
* initialize a new server on startup.
*
* A mask of errors is returned. On a parsing error, ERR_FATAL is set. In case
* of memory exhaustion, ERR_ABORT is set. If the server cannot be allocated,
* <srv> will be set to NULL.
*/
static int _srv_parse_init(struct server **srv, char **args, int *cur_arg,
struct proxy *curproxy,
int parse_flags)
{
struct server *newsrv = NULL;
const char *err = NULL;
int err_code = 0;
char *fqdn = NULL;
int alt_proto = 0;
int tmpl_range_low = 0, tmpl_range_high = 0;
char *errmsg = NULL;
*srv = NULL;
/* There is no mandatory first arguments for default server. */
if (parse_flags & SRV_PARSE_PARSE_ADDR) {
if (parse_flags & SRV_PARSE_TEMPLATE) {
if (!*args[3]) {
/* 'server-template' line number of argument check. */
ha_alert("'%s' expects <prefix> <nb | range> <addr>[:<port>] as arguments.\n",
args[0]);
err_code |= ERR_ALERT | ERR_FATAL;
goto out;
}
err = invalid_prefix_char(args[1]);
}
else {
if (!*args[2]) {
/* 'server' line number of argument check. */
ha_alert("'%s' expects <name> and <addr>[:<port>] as arguments.\n",
args[0]);
err_code |= ERR_ALERT | ERR_FATAL;
goto out;
}
err = invalid_char(args[1]);
}
if (err) {
ha_alert("character '%c' is not permitted in %s %s '%s'.\n",
*err, args[0], !(parse_flags & SRV_PARSE_TEMPLATE) ? "name" : "prefix", args[1]);
err_code |= ERR_ALERT | ERR_FATAL;
goto out;
}
}
*cur_arg = 2;
if (parse_flags & SRV_PARSE_TEMPLATE) {
/* Parse server-template <nb | range> arg. */
if (_srv_parse_tmpl_range(newsrv, args[*cur_arg], &tmpl_range_low, &tmpl_range_high) < 0) {
ha_alert("Wrong %s number or range arg '%s'.\n",
args[0], args[*cur_arg]);
err_code |= ERR_ALERT | ERR_FATAL;
goto out;
}
(*cur_arg)++;
}
if (!(parse_flags & SRV_PARSE_DEFAULT_SERVER)) {
struct sockaddr_storage *sk;
int port1, port2, port;
*srv = newsrv = new_server(curproxy);
if (!newsrv) {
ha_alert("out of memory.\n");
err_code |= ERR_ALERT | ERR_ABORT;
goto out;
}
register_parsing_obj(&newsrv->obj_type);
if (parse_flags & SRV_PARSE_TEMPLATE) {
newsrv->tmpl_info.nb_low = tmpl_range_low;
newsrv->tmpl_info.nb_high = tmpl_range_high;
}
if (parse_flags & SRV_PARSE_DYNAMIC)
newsrv->flags |= SRV_F_DYNAMIC;
/* Note: for a server template, its id is its prefix.
* This is a temporary id which will be used for server allocations to come
* after parsing.
*/
if (!(parse_flags & SRV_PARSE_TEMPLATE))
newsrv->id = strdup(args[1]);
else
newsrv->tmpl_info.prefix = strdup(args[1]);
/* several ways to check the port component :
* - IP => port=+0, relative (IPv4 only)
* - IP: => port=+0, relative
* - IP:N => port=N, absolute
* - IP:+N => port=+N, relative
* - IP:-N => port=-N, relative
*/
if (!(parse_flags & SRV_PARSE_PARSE_ADDR))
goto skip_addr;
sk = str2sa_range(args[*cur_arg], &port, &port1, &port2, NULL, NULL, &newsrv->addr_type,
&errmsg, NULL, &fqdn, &alt_proto,
(parse_flags & SRV_PARSE_INITIAL_RESOLVE ? PA_O_RESOLVE : 0) | PA_O_PORT_OK |
(parse_flags & SRV_PARSE_IN_PEER_SECTION ? PA_O_PORT_MAND : PA_O_PORT_OFS) |
PA_O_STREAM | PA_O_DGRAM | PA_O_XPRT);
if (!sk) {
ha_alert("%s\n", errmsg);
err_code |= ERR_ALERT | ERR_FATAL;
ha_free(&errmsg);
goto out;
}
#ifdef USE_QUIC
#ifdef HAVE_OPENSSL_QUIC_CLIENT_SUPPORT
if (srv_is_quic(newsrv)) {
/* TODO QUIC is currently incompatible with dynamic
* backends deletion. Please fix this before removing
* QUIC BE experimental status.
*/
if (!experimental_directives_allowed) {
ha_alert("QUIC is experimental for server '%s',"
" must be allowed via a global 'expose-experimental-directives'\n",
newsrv->id);
err_code |= ERR_ALERT | ERR_FATAL;
goto out;
}
mark_tainted(TAINTED_CONFIG_EXP_KW_DECLARED);
newsrv->xprt = xprt_get(XPRT_QUIC);
quic_transport_params_init(&newsrv->quic_params, 0);
}
#else
if (srv_is_quic(newsrv)) {
ha_alert("The SSL stack does not provide a support for QUIC server '%s'",
newsrv->id);
err_code |= ERR_ALERT | ERR_FATAL;
goto out;
}
#endif
#endif
if (!port1 || !port2) {
if (sk->ss_family != AF_CUST_RHTTP_SRV) {
/* no port specified, +offset, -offset */
newsrv->flags |= SRV_F_MAPPORTS;
}
else {
newsrv->flags |= SRV_F_RHTTP;
/* Automatically activate check-reuse-pool for rhttp@ servers. */
newsrv->check.reuse_pool = 1;
}
}
/* save hostname and create associated name resolution */
if (fqdn) {
if (fqdn[0] == '_') { /* SRV record */
/* Check if a SRV request already exists, and if not, create it */
if ((newsrv->srvrq = find_srvrq_by_name(fqdn, curproxy)) == NULL)
newsrv->srvrq = new_resolv_srvrq(newsrv, fqdn);
if (newsrv->srvrq == NULL) {
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
err_code |= ERR_ALERT | ERR_FATAL;
goto out;
}
LIST_APPEND(&newsrv->srvrq->attached_servers, &newsrv->srv_rec_item);
}
else if (srv_prepare_for_resolution(newsrv, fqdn) == -1) {
ha_alert("Can't create DNS resolution for server '%s'\n",
newsrv->id);
err_code |= ERR_ALERT | ERR_FATAL;
goto out;
}
}
newsrv->addr = *sk;
newsrv->svc_port = port;
newsrv->alt_proto = alt_proto;
/*
* we don't need to lock the server here, because
* we are in the process of initializing.
*
* Note that the server is not attached into the proxy tree if
* this is a dynamic server.
*/
srv_set_addr_desc(newsrv, !(parse_flags & SRV_PARSE_DYNAMIC));
if (!newsrv->srvrq && !newsrv->hostname &&
!protocol_lookup(newsrv->addr.ss_family, PROTO_TYPE_STREAM, 0)) {
ha_alert("Unknown protocol family %d '%s'\n",
newsrv->addr.ss_family, args[*cur_arg]);
err_code |= ERR_ALERT | ERR_FATAL;
goto out;
}
(*cur_arg)++;
skip_addr:
if (!(parse_flags & SRV_PARSE_DYNAMIC)) {
/* Copy default server settings to new server */
srv_settings_cpy(newsrv, curproxy->defsrv, 0);
} else
srv_settings_init(newsrv);
HA_SPIN_INIT(&newsrv->lock);
}
else {
/* This is a "default-server" line. Let's make certain the
* current proxy's default server exists, otherwise it's
* time to allocate it now.
*/
newsrv = curproxy->defsrv;
if (!newsrv) {
newsrv = srv_alloc();
if (!newsrv) {
ha_alert("out of memory.\n");
err_code |= ERR_ALERT | ERR_ABORT;
goto out;
}
newsrv->id = "default-server";
srv_settings_init(newsrv);
curproxy->defsrv = newsrv;
}
*srv = newsrv;
*cur_arg = 1;
}
free(fqdn);
return 0;
out:
free(fqdn);
return err_code;
}
/* Parse the server keyword in <args>.
* <cur_arg> is incremented beyond the keyword optional value. Note that this
* might not be the case if an error is reported.
*
* This function is first intended to be used through parse_server to
* initialize a new server on startup.
*
* A mask of errors is returned. ERR_FATAL is set if the parsing should be
* interrupted.
*/
static int _srv_parse_kw(struct server *srv, char **args, int *cur_arg,
struct proxy *curproxy,
int parse_flags)
{
int err_code = 0;
struct srv_kw *kw;
const char *best;
char *errmsg = NULL;
kw = srv_find_kw(args[*cur_arg]);
if (!kw) {
best = srv_find_best_kw(args[*cur_arg]);
if (best)
ha_alert("unknown keyword '%s'; did you mean '%s' maybe ?%s\n",
args[*cur_arg], best,
(parse_flags & SRV_PARSE_PARSE_ADDR) ? "" :
" Hint: no address was expected for this server.");
else
ha_alert("unknown keyword '%s'.%s\n", args[*cur_arg],
(parse_flags & SRV_PARSE_PARSE_ADDR) ? "" :
" Hint: no address was expected for this server.");
return ERR_ALERT | ERR_FATAL;
}
if (!kw->parse) {
ha_alert("'%s' option is not implemented in this version (check build options)\n",
args[*cur_arg]);
err_code = ERR_ALERT | ERR_FATAL;
goto out;
}
if ((parse_flags & SRV_PARSE_DEFAULT_SERVER) && !kw->default_ok) {
ha_alert("'%s' option is not accepted in default-server sections\n",
args[*cur_arg]);
err_code = ERR_ALERT;
goto out;
}
else if ((parse_flags & SRV_PARSE_DYNAMIC) && !kw->dynamic_ok) {
ha_alert("'%s' option is not accepted for dynamic server\n",
args[*cur_arg]);
err_code |= ERR_ALERT;
goto out;
}
err_code = kw->parse(args, cur_arg, curproxy, srv, &errmsg);
if (err_code) {
display_parser_err(NULL, 0, args, *cur_arg, err_code, &errmsg);
free(errmsg);
}
out:
if (kw->skip != -1)
*cur_arg += 1 + kw->skip;
return err_code;
}
/* Server initializations finalization.
* Initialize health check, agent check, SNI expression and outgoing TLVs if enabled.
* Must not be called for a default server instance.
*
* This function is first intended to be used through parse_server to
* initialize a new server on startup.
*/
static int _srv_parse_finalize(char **args, int cur_arg,
struct server *srv, struct proxy *px,
int parse_flags)
{
int ret;
char *errmsg = NULL;
struct srv_pp_tlv_list *srv_tlv = NULL;
if (srv->do_check && srv->trackit) {
ha_alert("unable to enable checks and tracking at the same time!\n");
return ERR_ALERT | ERR_FATAL;
}
if (srv->do_agent && !srv->agent.port) {
ha_alert("server %s does not have agent port. Agent check has been disabled.\n",
srv->id);
return ERR_ALERT | ERR_FATAL;
}
if ((ret = server_parse_exprs(srv, px, &errmsg))) {
if (errmsg) {
ha_alert("error detected while parsing sni or pool-conn-name expressions : %s.\n", errmsg);
free(errmsg);
}
return ret;
}
/* A dynamic server is disabled on startup. It must not be counted as
* an active backend entry.
*/
if (!(parse_flags & SRV_PARSE_DYNAMIC)) {
if (srv->flags & SRV_F_BACKUP)
px->srv_bck++;
else
px->srv_act++;
}
list_for_each_entry(srv_tlv, &srv->pp_tlvs, list) {
if (srv_tlv->fmt_string && unlikely(!parse_logformat_string(srv_tlv->fmt_string,
srv->proxy, &srv_tlv->fmt, 0, SMP_VAL_BE_SRV_CON, &errmsg))) {
if (errmsg) {
ha_alert("%s\n", errmsg);
free(errmsg);
}
return ERR_ALERT | ERR_FATAL;
}
}
if (srv_is_quic(srv)) {
#ifdef USE_QUIC
if (!srv->use_ssl) {
srv->use_ssl = 1;
ha_warning("QUIC protocol detected, enabling ssl. Use 'ssl' to shut this warning.\n");
}
if (!srv->ssl_ctx.alpn_str) {
srv->ssl_ctx.alpn_str = strdup("\002h3");
if (!srv->ssl_ctx.alpn_str) {
ha_alert("out of memory while trying to allocate a default alpn string.\n");
return ERR_ALERT | ERR_FATAL;
}
srv->ssl_ctx.alpn_len = strlen(srv->ssl_ctx.alpn_str);
}
/* Deletion of backend when QUIC servers were used is currently
* not implemented. This is because quic_conn instances
* directly references its parent proxy via <prx_counters>
* member.
*
* TODO lift this restriction by ensuring safe access on proxy
* counters or via refcount.
*/
srv->proxy->flags |= PR_FL_NON_PURGEABLE;
#else
ha_alert("QUIC protocol selected but support not compiled in (check build options).\n");
return ERR_ALERT | ERR_FATAL;
#endif
}
if (!(srv->proxy->cap & PR_CAP_LB)) {
/* No need to wait for effective proxy mode, it is already known:
* Only general purpose user-declared proxies ("listen", "frontend", "backend")
* offer the possibility to configure the mode of the proxy. Hopefully for us,
* they have the PR_CAP_LB set.
*/
return _srv_check_proxy_mode(srv, 0);
}
srv_lb_commit_status(srv);
return 0;
}
int parse_server(const char *file, int linenum, char **args,
struct proxy *curproxy, const struct proxy *defproxy,
int parse_flags)
{
struct server *newsrv = NULL;
int err_code = 0;
int cur_arg;
set_usermsgs_ctx(file, linenum, NULL);
if (!(parse_flags & SRV_PARSE_DEFAULT_SERVER) && curproxy == defproxy) {
ha_alert("'%s' not allowed in 'defaults' section.\n", args[0]);
err_code |= ERR_ALERT | ERR_FATAL;
goto out;
}
else if (failifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL)) {
err_code |= ERR_ALERT | ERR_FATAL;
goto out;
}
if ((parse_flags & (SRV_PARSE_IN_PEER_SECTION|SRV_PARSE_PARSE_ADDR)) ==
(SRV_PARSE_IN_PEER_SECTION|SRV_PARSE_PARSE_ADDR)) {
if (!*args[2])
goto out;
}
err_code = _srv_parse_init(&newsrv, args, &cur_arg, curproxy,
parse_flags);
if (err_code & ERR_CODE)
goto out;
if (!newsrv->conf.file) // note: do it only once for default-server
newsrv->conf.file = strdup(file);
newsrv->conf.line = linenum;
while (*args[cur_arg]) {
err_code = _srv_parse_kw(newsrv, args, &cur_arg, curproxy,
parse_flags);
if (err_code & ERR_FATAL)
goto out;
}
if (!(parse_flags & SRV_PARSE_DEFAULT_SERVER)) {
err_code |= _srv_parse_finalize(args, cur_arg, newsrv, curproxy, parse_flags);
if (err_code & ERR_FATAL)
goto out;
}
if (parse_flags & SRV_PARSE_TEMPLATE) {
_srv_parse_tmpl_init(newsrv, curproxy);
}
else if (!(parse_flags & SRV_PARSE_DEFAULT_SERVER)) {
cebis_item_insert(&curproxy->conf.used_server_name, conf.name_node, id, newsrv);
}
/* If the server id is fixed, insert it in the proxy used_id tree.
* This is needed to detect a later duplicate id via srv_parse_id.
*
* If no is specified, a dynamic one is generated in
* check_config_validity.
*/
if (newsrv->flags & SRV_F_FORCED_ID)
server_index_id(curproxy, newsrv);
HA_DIAG_WARNING_COND((curproxy->cap & PR_CAP_LB) && !newsrv->uweight,
"configured with weight of 0 will never be selected by load balancing algorithms\n");
reset_usermsgs_ctx();
return 0;
out:
reset_usermsgs_ctx();
return err_code;
}
/*
* This function finds a server with matching "<puid> x <rid>" within
* selected backend <bk>.
* Using the combination of proxy-uid + revision id ensures that the function
* will either return the server we're expecting or NULL if it has been removed
* from the proxy (<id> is unique within the list, but it is not true over the
* process lifetime as new servers may reuse the id of a previously deleted
* server).
*/
struct server *server_find_by_id_unique(struct proxy *bk, int id, uint32_t rid)
{
struct server *curserver;
curserver = server_find_by_id(bk, id);
if (!curserver || curserver->rid != rid)
return NULL;
return curserver;
}
/*
* This function returns the server with a matching name within selected proxy,
* or NULL if not found.
*/
struct server *server_find_by_name(struct proxy *px, const char *name)
{
if (!px)
return NULL;
return cebis_item_lookup(&px->conf.used_server_name, conf.name_node, id, name, struct server);
}
/*
* This function returns the server with a matching address within selected
* proxy, or NULL if not found. The proxy lock is taken for reads during this
* operation since we don't want the address to change under us.
*/
struct server *server_find_by_addr(struct proxy *px, const char *addr)
{
struct server *cursrv;
HA_RWLOCK_RDLOCK(PROXY_LOCK, &px->lock);
cursrv = cebuis_item_lookup(&px->used_server_addr, addr_node, addr_key, addr, struct server);
HA_RWLOCK_RDUNLOCK(PROXY_LOCK, &px->lock);
return cursrv;
}
/* Returns a pointer to the first server matching either name <name>, or id
* if <name> starts with a '#'. NULL is returned if no match is found.
* the lookup is performed in the backend <bk>
*/
struct server *server_find(struct proxy *bk, const char *name)
{
struct server *curserver;
if (!bk || !name)
return NULL;
/* <bk> has no backend capabilities, so it can't have a server */
if (!(bk->cap & PR_CAP_BE))
return NULL;
curserver = NULL;
if (*name == '#') {
curserver = server_find_by_id(bk, atoi(name + 1));
}
else {
curserver = server_find_by_name(bk, name);
}
return curserver;
}
/*
* This function finds a server with matching "<name> x <rid>" within
* selected backend <bk>.
* Using the combination of name + revision id ensures that the function
* will either return the server we're expecting or NULL if it has been removed
* from the proxy. For this we assume that <name> is unique within the list,
* which is the case in most setups, but in rare cases the user may have
* enforced duplicate server names in the initial config (ie: if he intends to
* use numerical IDs for identification instead). In this particular case, the
* function will not work as expected so server_find_by_id_unique() should be
* used to match a unique server instead.
*
* Just like server_find_by_id_unique(), if a server is deleted and a new server
* reuses the same name, the rid check will prevent the function from returning
* a different server from the one we were expecting to match against at a given
* time.
*/
struct server *server_find_unique(struct proxy *bk, const char *name, uint32_t rid)
{
struct server *curserver;
curserver = server_find(bk, name);
if (!curserver || curserver->rid != rid)
return NULL;
return curserver;
}
struct server *server_find_best_match(struct proxy *bk, char *name, int id, int *diff)
{
struct server *byname;
struct server *byid;
if (!name && !id)
return NULL;
if (diff)
*diff = 0;
byname = byid = NULL;
if (name) {
byname = server_find(bk, name);
if (byname && (!id || byname->puid == id))
return byname;
}
/* remaining possibilities :
* - name not set
* - name set but not found
* - name found but ID doesn't match
*/
if (id) {
byid = server_find_by_id(bk, id);
if (byid) {
if (byname) {
/* use id only if forced by configuration */
if (byid->flags & SRV_F_FORCED_ID) {
if (diff)
*diff |= 2;
return byid;
}
else {
if (diff)
*diff |= 1;
return byname;
}
}
/* remaining possibilities:
* - name not set
* - name set but not found
*/
if (name && diff)
*diff |= 2;
return byid;
}
/* id bot found */
if (byname) {
if (diff)
*diff |= 1;
return byname;
}
}
return NULL;
}
/* This functions retrieves server's addr and port to fill
* <inetaddr> struct passed as argument.
*
* This may only be used under inet context.
*/
void server_get_inetaddr(struct server *s, struct server_inetaddr *inetaddr)
{
struct sockaddr_storage *addr = &s->addr;
unsigned int port = s->svc_port;
uint8_t mapports = !!(s->flags & SRV_F_MAPPORTS);
/* only INET families are supported */
BUG_ON((addr->ss_family != AF_UNSPEC &&
addr->ss_family != AF_INET && addr->ss_family != AF_INET6));
inetaddr->family = addr->ss_family;
memset(&inetaddr->addr, 0, sizeof(inetaddr->addr));
if (addr->ss_family == AF_INET)
inetaddr->addr.v4 =
((struct sockaddr_in *)addr)->sin_addr;
else if (addr->ss_family == AF_INET6)
inetaddr->addr.v6 =
((struct sockaddr_in6 *)addr)->sin6_addr;
inetaddr->port.svc = port;
inetaddr->port.map = mapports;
}
/* get human readable name for server_inetaddr_updater .by struct member
*/
const char *server_inetaddr_updater_by_to_str(enum server_inetaddr_updater_by by)
{
switch (by) {
case SERVER_INETADDR_UPDATER_BY_CLI:
return "stats socket command";
case SERVER_INETADDR_UPDATER_BY_LUA:
return "Lua script";
case SERVER_INETADDR_UPDATER_BY_DNS_AR:
return "DNS additional record";
case SERVER_INETADDR_UPDATER_BY_DNS_CACHE:
return "DNS cache";
case SERVER_INETADDR_UPDATER_BY_DNS_RESOLVER:
return "DNS resolver";
default:
/* unknown, don't mention updater */
break;
}
return NULL;
}
/* append inetaddr updater info to chunk <out>
*/
static void _srv_append_inetaddr_updater_info(struct buffer *out,
struct server *s,
struct server_inetaddr_updater updater)
{
switch (updater.by) {
case SERVER_INETADDR_UPDATER_BY_DNS_RESOLVER:
/* we need to report the resolver/nameserver id which is
* responsible for the update
*/
{
struct resolvers *r = s->resolvers;
struct dns_nameserver *ns;
/* we already know that the update comes from the
* resolver section linked to the server, but we
* need to find out which nameserver handled the dns
* query
*/
BUG_ON(!r);
ns = find_nameserver_by_resolvers_and_id(r, updater.u.dns_resolver.ns_id);
BUG_ON(!ns);
chunk_appendf(out, " by '%s/%s'", r->id, ns->id);
}
break;
default:
{
const char *by_name;
by_name = server_inetaddr_updater_by_to_str(updater.by);
if (by_name)
chunk_appendf(out, " by '%s'", by_name);
}
break;
}
}
/* server_set_inetaddr() helper */
static void _addr_to_str(int family, const void *addr, char *addr_str, size_t len)
{
memset(addr_str, 0, len);
switch (family) {
case AF_INET:
case AF_INET6:
inet_ntop(family, addr, addr_str, len);
break;
default:
strlcpy2(addr_str, "(none)", len);
break;
}
}
/* server_set_inetaddr() helper */
static int _inetaddr_addr_cmp(const struct server_inetaddr *inetaddr, const struct sockaddr_storage *addr)
{
struct in_addr *v4;
struct in6_addr *v6;
if (inetaddr->family != addr->ss_family)
return 1;
if (inetaddr->family == AF_INET) {
v4 = &((struct sockaddr_in *)addr)->sin_addr;
if (memcmp(&inetaddr->addr.v4, v4, sizeof(struct in_addr)))
return 1;
}
else if (inetaddr->family == AF_INET6) {
v6 = &((struct sockaddr_in6 *)addr)->sin6_addr;
if (memcmp(&inetaddr->addr.v6, v6, sizeof(struct in6_addr)))
return 1;
}
return 0; // both inetaddr storage are equivalent
}
/* This function sets a server's addr and port in inet context based on new
* inetaddr input
*
* The function first does the following, in that order:
* - checks if an update is required (new IP or port is different than current
* one)
* - check the update is allowed:
* - allow all changes if no CHECKS are configured
* - if CHECK is configured:
* - if switch to port map (SRV_F_MAPPORTS), ensure health check have their
* own ports
* - applies required changes to both ADDR and PORT if both 'required' and
* 'allowed' conditions are met.
*
* Caller can pass <msg> buffer so that it gets some information about the
* operation. It may as well provide <updater> so that messages mention that
* the update was performed on the behalf of it.
*
* <inetaddr> family may be set to UNSPEC to reset server's addr
*
* Caller must set <inetaddr>->port.map to 1 if <inetaddr>->port.svc must be
* handled as an offset
*
* The function returns 1 if an update was performed and 0 if nothing was
* changed.
*/
int server_set_inetaddr(struct server *s,
const struct server_inetaddr *inetaddr,
struct server_inetaddr_updater updater, struct buffer *msg)
{
union {
struct event_hdl_cb_data_server_inetaddr addr;
struct event_hdl_cb_data_server common;
} cb_data;
char addr_str[INET6_ADDRSTRLEN];
uint16_t current_port;
uint8_t ip_change = 0;
uint8_t port_change = 0;
int ret = 0;
/* only INET families are supported */
BUG_ON((inetaddr->family != AF_UNSPEC &&
inetaddr->family != AF_INET && inetaddr->family != AF_INET6) ||
(s->addr.ss_family != AF_UNSPEC &&
s->addr.ss_family != AF_INET && s->addr.ss_family != AF_INET6));
/* ignore if no change */
if (!_inetaddr_addr_cmp(inetaddr, &s->addr))
goto port;
ip_change = 1;
/* update report for caller */
if (msg) {
void *from_ptr = NULL;
if (s->addr.ss_family == AF_INET)
from_ptr = &((struct sockaddr_in *)&s->addr)->sin_addr;
else if (s->addr.ss_family == AF_INET6)
from_ptr = &((struct sockaddr_in6 *)&s->addr)->sin6_addr;
_addr_to_str(s->addr.ss_family, from_ptr, addr_str, sizeof(addr_str));
chunk_printf(msg, "IP changed from '%s'", addr_str);
_addr_to_str(inetaddr->family, &inetaddr->addr, addr_str, sizeof(addr_str));
chunk_appendf(msg, " to '%s'", addr_str);
}
if (inetaddr->family == AF_UNSPEC)
goto out; // ignore port information when unsetting addr
port:
/* collection data currently setup */
current_port = s->svc_port;
/* check if caller triggers a port mapped or offset */
if (inetaddr->port.map) {
/* check if server currently uses port map */
if (!(s->flags & SRV_F_MAPPORTS)) {
/* we're switching from a fixed port to a SRV_F_MAPPORTS
* (mapped) port, prevent PORT change if check is enabled
* and it doesn't have it's dedicated port while switching
* to port mapping
*/
if ((s->check.state & CHK_ST_ENABLED) && !s->check.port) {
if (msg) {
if (ip_change)
chunk_appendf(msg, ", ");
chunk_appendf(msg, "can't change <port> to port map because it is incompatible with current health check port configuration (use 'port' statement from the 'server' directive).");
}
goto out;
}
/* switch from fixed port to port map mandatorily triggers
* a port change
*/
port_change = 1;
}
/* else we're already using port maps */
else {
port_change = current_port != inetaddr->port.svc;
}
}
/* fixed port */
else {
if ((s->flags & SRV_F_MAPPORTS))
port_change = 1; // changing from mapped to fixed
else
port_change = current_port != inetaddr->port.svc;
}
/* update response message about PORT change */
if (port_change && msg) {
if (ip_change)
chunk_appendf(msg, ", ");
chunk_appendf(msg, "port changed from '");
if (s->flags & SRV_F_MAPPORTS)
chunk_appendf(msg, "+");
chunk_appendf(msg, "%d' to '", s->svc_port);
if (inetaddr->port.map)
chunk_appendf(msg, "+");
chunk_appendf(msg, "%d'", inetaddr->port.svc);
}
out:
if (ip_change || port_change) {
_srv_event_hdl_prepare(&cb_data.common, s, 0);
_srv_event_hdl_prepare_inetaddr(&cb_data.addr, s,
inetaddr,
updater);
/* server_atomic_sync_task will apply the changes for us */
_srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_INETADDR, cb_data, s);
ret = 1;
}
if (ret && msg && updater.by != SERVER_INETADDR_UPDATER_BY_NONE)
_srv_append_inetaddr_updater_info(msg, s, updater);
return ret;
}
/* Sets new server's addr and/or svc_port, then send a log and report a
* warning on stderr if something has changed.
*
* Returns 1 if something has changed, 0 otherwise.
* see server_set_inetaddr() for more information.
*/
int server_set_inetaddr_warn(struct server *s,
const struct server_inetaddr *inetaddr,
struct server_inetaddr_updater updater)
{
struct buffer *msg = get_trash_chunk();
int ret;
chunk_reset(msg);
ret = server_set_inetaddr(s, inetaddr, updater, msg);
if (msg->data) {
/* write the buffer on stderr */
ha_warning("%s/%s: %s.\n", s->proxy->id, s->id, msg->area);
/* send a log */
send_log(s->proxy, LOG_NOTICE, "%s/%s: %s.\n", s->proxy->id, s->id, msg->area);
}
return ret;
}
/*
* update a server's current IP address.
* ip is a pointer to the new IP address, whose address family is ip_sin_family.
* ip is in network format.
* updater is a string which contains an information about the requester of the update.
* updater is used if not NULL.
*
* A log line and a stderr warning message is generated based on server's backend options.
*
* Must be called with the server lock held.
*/
int srv_update_addr(struct server *s, void *ip, int ip_sin_family, struct server_inetaddr_updater updater)
{
struct server_inetaddr inetaddr;
server_get_inetaddr(s, &inetaddr);
BUG_ON(ip_sin_family != AF_INET && ip_sin_family != AF_INET6);
/* save the new IP family */
inetaddr.family = ip_sin_family;
/* save the new IP address */
switch (ip_sin_family) {
case AF_INET:
memcpy(&inetaddr.addr.v4, ip, 4);
break;
case AF_INET6:
memcpy(&inetaddr.addr.v6, ip, 16);
break;
};
server_set_inetaddr_warn(s, &inetaddr, updater);
return 0;
}
/* update agent health check address and port
* addr can be ip4/ip6 or a hostname
* if one error occurs, don't apply anything
* must be called with the server lock held.
*/
const char *srv_update_agent_addr_port(struct server *s, const char *addr, const char *port)
{
struct sockaddr_storage sk;
struct buffer *msg;
int new_port;
msg = get_trash_chunk();
chunk_reset(msg);
if (!(s->agent.state & CHK_ST_ENABLED)) {
chunk_strcat(msg, "agent checks are not enabled on this server");
goto out;
}
if (addr) {
memset(&sk, 0, sizeof(struct sockaddr_storage));
if (str2ip(addr, &sk) == NULL) {
chunk_appendf(msg, "invalid addr '%s'", addr);
goto out;
}
}
if (port) {
if (strl2irc(port, strlen(port), &new_port) != 0) {
chunk_appendf(msg, "provided port is not an integer");
goto out;
}
if (new_port < 0 || new_port > 65535) {
chunk_appendf(msg, "provided port is invalid");
goto out;
}
}
out:
if (msg->data)
return msg->area;
else {
if (addr)
set_srv_agent_addr(s, &sk);
if (port)
set_srv_agent_port(s, new_port);
}
return NULL;
}
/* update server health check address and port
* addr must be ip4 or ip6, it won't be resolved
* if one error occurs, don't apply anything
* must be called with the server lock held.
*/
const char *srv_update_check_addr_port(struct server *s, const char *addr, const char *port)
{
struct sockaddr_storage sk;
struct buffer *msg;
int new_port;
msg = get_trash_chunk();
chunk_reset(msg);
if (!(s->check.state & CHK_ST_ENABLED)) {
chunk_strcat(msg, "health checks are not enabled on this server");
goto out;
}
if (addr) {
memset(&sk, 0, sizeof(struct sockaddr_storage));
if (str2ip2(addr, &sk, 0) == NULL) {
chunk_appendf(msg, "invalid addr '%s'", addr);
goto out;
}
}
if (port) {
if (strl2irc(port, strlen(port), &new_port) != 0) {
chunk_appendf(msg, "provided port is not an integer");
goto out;
}
if (new_port < 0 || new_port > 65535) {
chunk_appendf(msg, "provided port is invalid");
goto out;
}
/* prevent the update of port to 0 if MAPPORTS are in use */
if ((s->flags & SRV_F_MAPPORTS) && new_port == 0) {
chunk_appendf(msg, "can't unset 'port' since MAPPORTS is in use");
goto out;
}
}
out:
if (msg->data)
return msg->area;
else {
if (addr)
s->check.addr = sk;
if (port)
s->check.port = new_port;
/* Fallback to raw XPRT for the health-check */
if (!s->check.use_ssl)
s->check.xprt = xprt_get(XPRT_RAW);
}
return NULL;
}
/*
* This function update a server's addr and port only for AF_INET and AF_INET6 families.
*
* Caller can pass its info through <updater> to get it integrated in the response
* message returned by the function.
*
* The function first does the following, in that order:
* - checks that don't switch from/to a family other than AF_INET and AF_INET6
* - validates the new addr and/or port
* - calls server_set_inetaddr() to check and apply the change
*
* Must be called with the server lock held.
*/
const char *srv_update_addr_port(struct server *s, const char *addr, const char *port,
struct server_inetaddr_updater updater)
{
struct sockaddr_storage sa;
struct server_inetaddr inetaddr;
struct buffer *msg;
int ret;
msg = get_trash_chunk();
chunk_reset(msg);
/* even a simple port change is not supported outside of inet context, because
* s->svc_port is only relevant under inet context
*/
if ((s->addr.ss_family != AF_INET) && (s->addr.ss_family != AF_INET6)) {
chunk_printf(msg, "Update for the current server address family is only supported through configuration file.");
goto out;
}
server_get_inetaddr(s, &inetaddr);
if (addr) {
memset(&sa, 0, sizeof(struct sockaddr_storage));
if (str2ip2(addr, &sa, 0) == NULL) {
chunk_printf(msg, "Invalid addr '%s'", addr);
goto out;
}
/* changes are allowed on AF_INET* families only */
if ((sa.ss_family != AF_INET) && (sa.ss_family != AF_INET6)) {
chunk_printf(msg, "Update to families other than AF_INET and AF_INET6 supported only through configuration file");
goto out;
}
inetaddr.family = sa.ss_family;
switch (inetaddr.family) {
case AF_INET:
inetaddr.addr.v4 = ((struct sockaddr_in *)&sa)->sin_addr;
break;
case AF_INET6:
inetaddr.addr.v6 = ((struct sockaddr_in6 *)&sa)->sin6_addr;
break;
}
}
if (port) {
uint16_t new_port;
char sign = '\0';
char *endptr;
sign = *port;
errno = 0;
new_port = strtol(port, &endptr, 10);
if ((errno != 0) || (port == endptr)) {
chunk_appendf(msg, "problem converting port '%s' to an int", port);
goto out;
}
/* check if caller triggers a port mapped or offset */
if (sign == '-' || sign == '+')
inetaddr.port.map = 1;
else
inetaddr.port.map = 0;
inetaddr.port.svc = new_port;
/* note: negative offset was converted to positive offset
* (new_port is unsigned) to prevent later conversions errors
* since svc_port is handled as an unsigned int all along the
* chain. Unfortunately this is a one-way operation so the user
* could be surprised to see a negative offset reported using
* its equivalent positive offset in the generated message
* (-X = +(65535 - (X-1))), but thanks to proper wraparound it
* will be interpreted as a negative offset during port
* remapping so it will work as expected.
*/
}
ret = server_set_inetaddr(s, &inetaddr, updater, msg);
if (!ret)
chunk_printf(msg, "nothing changed");
out:
return msg->area;
}
/*
* put the server in maintenance because of failing SRV resolution
* returns:
* 0 if server was put under maintenance
* 1 if server status has not changed
*
* Must be called with the server lock held.
*/
int srvrq_set_srv_down(struct server *s)
{
if (!s->srvrq)
return 1;
if (s->next_admin & SRV_ADMF_RMAINT)
return 1;
srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_NOENT);
return 0;
}
/*
* put server under maintenance as a result of name resolution
* returns:
* 0 if server was put under maintenance
* 1 if server status has not changed
*
* Must be called with the server lock held.
*/
int snr_set_srv_down(struct server *s)
{
struct resolvers *resolvers = s->resolvers;
struct resolv_resolution *resolution = (s->resolv_requester ? s->resolv_requester->resolution : NULL);
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
int exp;
/* server already under maintenance */
if (s->next_admin & SRV_ADMF_RMAINT)
goto out;
/* If resolution is NULL we're dealing with SRV records Additional records */
if (resolution == NULL)
return srvrq_set_srv_down(s);
switch (resolution->status) {
case RSLV_STATUS_NONE:
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
/* status when HAProxy has just (re)started.
* Nothing to do, since the task is already automatically started */
goto out;
case RSLV_STATUS_VALID:
/*
* valid resolution but no usable server address
*/
srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_NOIP);
return 0;
case RSLV_STATUS_NX:
/* stop server if resolution is NX for a long enough period */
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
exp = tick_add(resolution->last_valid, resolvers->hold.nx);
if (!tick_is_expired(exp, now_ms))
goto out; // not yet expired
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_NX);
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
return 0;
case RSLV_STATUS_TIMEOUT:
/* stop server if resolution is TIMEOUT for a long enough period */
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
exp = tick_add(resolution->last_valid, resolvers->hold.timeout);
if (!tick_is_expired(exp, now_ms))
goto out; // not yet expired
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_TIMEOUT);
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
return 0;
case RSLV_STATUS_REFUSED:
/* stop server if resolution is REFUSED for a long enough period */
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
exp = tick_add(resolution->last_valid, resolvers->hold.refused);
if (!tick_is_expired(exp, now_ms))
goto out; // not yet expired
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_REFUSED);
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
return 0;
default:
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
/* stop server if resolution failed for a long enough period */
exp = tick_add(resolution->last_valid, resolvers->hold.other);
if (!tick_is_expired(exp, now_ms))
goto out; // not yet expired
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_UNSPEC);
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
return 0;
}
out:
return 1;
}
/*
* Server Name Resolution valid response callback
* It expects:
* - <nameserver>: the name server which answered the valid response
* - <response>: buffer containing a valid DNS response
* - <response_len>: size of <response>
* It performs the following actions:
* - ignore response if current ip found and server family not met
* - update with first new ip found if family is met and current IP is not found
* returns:
* 0 on error
* 1 when no error or safe ignore
*
* Must be called with server lock held
*/
int snr_resolution_cb(struct resolv_requester *requester, struct dns_counters *counters)
{
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
struct server *s = NULL;
struct resolv_resolution *resolution = NULL;
void *serverip, *firstip;
short server_sin_family, firstip_sin_family;
int ret;
int has_no_ip = 0;
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
s = objt_server(requester->owner);
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
if (!s)
return 1;
if (s->srvrq) {
/* If DNS resolution is disabled ignore it.
* This is the case if the server was associated to
* a SRV record and this record is now expired.
*/
if (s->flags & SRV_F_NO_RESOLUTION)
return 1;
}
resolution = (s->resolv_requester ? s->resolv_requester->resolution : NULL);
if (!resolution)
return 1;
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
/* initializing variables */
firstip = NULL; /* pointer to the first valid response found */
/* it will be used as the new IP if a change is required */
firstip_sin_family = AF_UNSPEC;
serverip = NULL; /* current server IP address */
/* initializing server IP pointer */
server_sin_family = s->addr.ss_family;
switch (server_sin_family) {
case AF_INET:
serverip = &((struct sockaddr_in *)&s->addr)->sin_addr.s_addr;
break;
case AF_INET6:
serverip = &((struct sockaddr_in6 *)&s->addr)->sin6_addr.s6_addr;
break;
case AF_UNSPEC:
break;
default:
goto invalid;
}
ret = resolv_get_ip_from_response(&resolution->response, &s->resolv_opts,
serverip, server_sin_family, &firstip,
&firstip_sin_family, s);
switch (ret) {
case RSLV_UPD_NO:
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
goto update_status;
case RSLV_UPD_SRVIP_NOT_FOUND:
goto save_ip;
case RSLV_UPD_NO_IP_FOUND:
has_no_ip = 1;
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
goto update_status;
default:
has_no_ip = 1;
goto invalid;
}
save_ip:
if (counters) {
counters->app.resolver.update++;
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
/* save the first ip we found */
srv_update_addr(s, firstip, firstip_sin_family,
SERVER_INETADDR_UPDATER_DNS_RESOLVER(counters->ns_puid));
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
}
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
else
srv_update_addr(s, firstip, firstip_sin_family, SERVER_INETADDR_UPDATER_DNS_CACHE);
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
update_status:
if (has_no_ip && !snr_set_srv_down(s)) {
struct server_inetaddr srv_addr;
BUG/MEDIUM: server/dns: preserve server's port upon resolution timeout or error @boi4 reported in GH #2578 that since 3.0-dev1 for servers with address learned from A/AAAA records after a DNS flap server would be put out of maintenance with proper address but with invalid port (== 0), making it unusable and causing tcp checks to fail: [NOTICE] (1) : Loading success. [WARNING] (8) : Server mybackend/myserver1 is going DOWN for maintenance (DNS refused status). 0 active and 0 backup servers left. 0 sessions active, 0 requeued, 0 remaining in queue. [ALERT] (8) : backend 'mybackend' has no server available! [WARNING] (8) : mybackend/myserver1: IP changed from '(none)' to '127.0.0.1' by 'myresolver/ns1'. [WARNING] (8) : Server mybackend/myserver1 ('myhost') is UP/READY (resolves again). [WARNING] (8) : Server mybackend/myserver1 administratively READY thanks to valid DNS answer. [WARNING] (8) : Server mybackend/myserver1 is DOWN, reason: Layer4 connection problem, info: "Connection refused", check duration: 0ms. 0 active and 0 backup servers left. 0 sessions active, 0 requeued, 0 remaining in queue. @boi4 also mentioned that this used to work fine before. Willy suggested that this regression may have been introduced by 64c9c8e ("BUG/MINOR: server/dns: use server_set_inetaddr() to unset srv addr from DNS") Turns out he was right! Indeed, in 64c9c8e we systematically memset the whole server_inetaddr struct (which contains both the requested server's addr and port planned for atomic update) instead of only memsetting the addr part of the structure: except when SRV records are involved (SRV records provide both the address and the port unlike A or AAAA records), we must not reset the server's port upon DNS errors because the port may have been provided at config time and we don't want to lose its value. Big thanks to @boi4 for his well-documented issue that really helped us to pinpoint the bug right on time for the dev-13 release. No backport needed (unless 64c9c8e gets backported).
2024-05-24 07:55:41 -04:00
/* unset server's addr, keep port */
server_get_inetaddr(s, &srv_addr);
srv_addr.family = AF_UNSPEC;
BUG/MEDIUM: server/dns: preserve server's port upon resolution timeout or error @boi4 reported in GH #2578 that since 3.0-dev1 for servers with address learned from A/AAAA records after a DNS flap server would be put out of maintenance with proper address but with invalid port (== 0), making it unusable and causing tcp checks to fail: [NOTICE] (1) : Loading success. [WARNING] (8) : Server mybackend/myserver1 is going DOWN for maintenance (DNS refused status). 0 active and 0 backup servers left. 0 sessions active, 0 requeued, 0 remaining in queue. [ALERT] (8) : backend 'mybackend' has no server available! [WARNING] (8) : mybackend/myserver1: IP changed from '(none)' to '127.0.0.1' by 'myresolver/ns1'. [WARNING] (8) : Server mybackend/myserver1 ('myhost') is UP/READY (resolves again). [WARNING] (8) : Server mybackend/myserver1 administratively READY thanks to valid DNS answer. [WARNING] (8) : Server mybackend/myserver1 is DOWN, reason: Layer4 connection problem, info: "Connection refused", check duration: 0ms. 0 active and 0 backup servers left. 0 sessions active, 0 requeued, 0 remaining in queue. @boi4 also mentioned that this used to work fine before. Willy suggested that this regression may have been introduced by 64c9c8e ("BUG/MINOR: server/dns: use server_set_inetaddr() to unset srv addr from DNS") Turns out he was right! Indeed, in 64c9c8e we systematically memset the whole server_inetaddr struct (which contains both the requested server's addr and port planned for atomic update) instead of only memsetting the addr part of the structure: except when SRV records are involved (SRV records provide both the address and the port unlike A or AAAA records), we must not reset the server's port upon DNS errors because the port may have been provided at config time and we don't want to lose its value. Big thanks to @boi4 for his well-documented issue that really helped us to pinpoint the bug right on time for the dev-13 release. No backport needed (unless 64c9c8e gets backported).
2024-05-24 07:55:41 -04:00
memset(&srv_addr.addr, 0, sizeof(srv_addr.addr));
server_set_inetaddr(s, &srv_addr, SERVER_INETADDR_UPDATER_NONE, NULL);
}
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
return 1;
invalid:
if (counters) {
counters->app.resolver.invalid++;
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
goto update_status;
}
if (has_no_ip && !snr_set_srv_down(s)) {
struct server_inetaddr srv_addr;
BUG/MEDIUM: server/dns: preserve server's port upon resolution timeout or error @boi4 reported in GH #2578 that since 3.0-dev1 for servers with address learned from A/AAAA records after a DNS flap server would be put out of maintenance with proper address but with invalid port (== 0), making it unusable and causing tcp checks to fail: [NOTICE] (1) : Loading success. [WARNING] (8) : Server mybackend/myserver1 is going DOWN for maintenance (DNS refused status). 0 active and 0 backup servers left. 0 sessions active, 0 requeued, 0 remaining in queue. [ALERT] (8) : backend 'mybackend' has no server available! [WARNING] (8) : mybackend/myserver1: IP changed from '(none)' to '127.0.0.1' by 'myresolver/ns1'. [WARNING] (8) : Server mybackend/myserver1 ('myhost') is UP/READY (resolves again). [WARNING] (8) : Server mybackend/myserver1 administratively READY thanks to valid DNS answer. [WARNING] (8) : Server mybackend/myserver1 is DOWN, reason: Layer4 connection problem, info: "Connection refused", check duration: 0ms. 0 active and 0 backup servers left. 0 sessions active, 0 requeued, 0 remaining in queue. @boi4 also mentioned that this used to work fine before. Willy suggested that this regression may have been introduced by 64c9c8e ("BUG/MINOR: server/dns: use server_set_inetaddr() to unset srv addr from DNS") Turns out he was right! Indeed, in 64c9c8e we systematically memset the whole server_inetaddr struct (which contains both the requested server's addr and port planned for atomic update) instead of only memsetting the addr part of the structure: except when SRV records are involved (SRV records provide both the address and the port unlike A or AAAA records), we must not reset the server's port upon DNS errors because the port may have been provided at config time and we don't want to lose its value. Big thanks to @boi4 for his well-documented issue that really helped us to pinpoint the bug right on time for the dev-13 release. No backport needed (unless 64c9c8e gets backported).
2024-05-24 07:55:41 -04:00
/* unset server's addr, keep port */
server_get_inetaddr(s, &srv_addr);
srv_addr.family = AF_UNSPEC;
BUG/MEDIUM: server/dns: preserve server's port upon resolution timeout or error @boi4 reported in GH #2578 that since 3.0-dev1 for servers with address learned from A/AAAA records after a DNS flap server would be put out of maintenance with proper address but with invalid port (== 0), making it unusable and causing tcp checks to fail: [NOTICE] (1) : Loading success. [WARNING] (8) : Server mybackend/myserver1 is going DOWN for maintenance (DNS refused status). 0 active and 0 backup servers left. 0 sessions active, 0 requeued, 0 remaining in queue. [ALERT] (8) : backend 'mybackend' has no server available! [WARNING] (8) : mybackend/myserver1: IP changed from '(none)' to '127.0.0.1' by 'myresolver/ns1'. [WARNING] (8) : Server mybackend/myserver1 ('myhost') is UP/READY (resolves again). [WARNING] (8) : Server mybackend/myserver1 administratively READY thanks to valid DNS answer. [WARNING] (8) : Server mybackend/myserver1 is DOWN, reason: Layer4 connection problem, info: "Connection refused", check duration: 0ms. 0 active and 0 backup servers left. 0 sessions active, 0 requeued, 0 remaining in queue. @boi4 also mentioned that this used to work fine before. Willy suggested that this regression may have been introduced by 64c9c8e ("BUG/MINOR: server/dns: use server_set_inetaddr() to unset srv addr from DNS") Turns out he was right! Indeed, in 64c9c8e we systematically memset the whole server_inetaddr struct (which contains both the requested server's addr and port planned for atomic update) instead of only memsetting the addr part of the structure: except when SRV records are involved (SRV records provide both the address and the port unlike A or AAAA records), we must not reset the server's port upon DNS errors because the port may have been provided at config time and we don't want to lose its value. Big thanks to @boi4 for his well-documented issue that really helped us to pinpoint the bug right on time for the dev-13 release. No backport needed (unless 64c9c8e gets backported).
2024-05-24 07:55:41 -04:00
memset(&srv_addr.addr, 0, sizeof(srv_addr.addr));
server_set_inetaddr(s, &srv_addr, SERVER_INETADDR_UPDATER_NONE, NULL);
}
return 0;
}
/*
* SRV record error management callback
* returns:
* 0 if we can trash answser items.
* 1 when safely ignored and we must kept answer items
*
* Grabs the server's lock.
*/
int srvrq_resolution_error_cb(struct resolv_requester *requester, int error_code)
{
struct resolv_srvrq *srvrq;
struct resolv_resolution *res;
struct resolvers *resolvers;
int exp;
/* SRV records */
srvrq = objt_resolv_srvrq(requester->owner);
if (!srvrq)
return 0;
resolvers = srvrq->resolvers;
res = requester->resolution;
switch (res->status) {
case RSLV_STATUS_NX:
/* stop server if resolution is NX for a long enough period */
exp = tick_add(res->last_valid, resolvers->hold.nx);
if (!tick_is_expired(exp, now_ms))
return 1;
break;
case RSLV_STATUS_TIMEOUT:
/* stop server if resolution is TIMEOUT for a long enough period */
exp = tick_add(res->last_valid, resolvers->hold.timeout);
if (!tick_is_expired(exp, now_ms))
return 1;
break;
case RSLV_STATUS_REFUSED:
/* stop server if resolution is REFUSED for a long enough period */
exp = tick_add(res->last_valid, resolvers->hold.refused);
if (!tick_is_expired(exp, now_ms))
return 1;
break;
default:
/* stop server if resolution failed for a long enough period */
exp = tick_add(res->last_valid, resolvers->hold.other);
if (!tick_is_expired(exp, now_ms))
return 1;
}
/* Remove any associated server ref */
resolv_detach_from_resolution_answer_items(res, requester);
return 0;
}
/*
* Server Name Resolution error management callback
* returns:
* 0 if we can trash answser items.
* 1 when safely ignored and we must kept answer items
*
* Grabs the server's lock.
*/
int snr_resolution_error_cb(struct resolv_requester *requester, int error_code)
{
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
struct server *s;
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
s = objt_server(requester->owner);
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
if (!s)
return 0;
HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
if (!snr_set_srv_down(s)) {
struct server_inetaddr srv_addr;
BUG/MEDIUM: server/dns: preserve server's port upon resolution timeout or error @boi4 reported in GH #2578 that since 3.0-dev1 for servers with address learned from A/AAAA records after a DNS flap server would be put out of maintenance with proper address but with invalid port (== 0), making it unusable and causing tcp checks to fail: [NOTICE] (1) : Loading success. [WARNING] (8) : Server mybackend/myserver1 is going DOWN for maintenance (DNS refused status). 0 active and 0 backup servers left. 0 sessions active, 0 requeued, 0 remaining in queue. [ALERT] (8) : backend 'mybackend' has no server available! [WARNING] (8) : mybackend/myserver1: IP changed from '(none)' to '127.0.0.1' by 'myresolver/ns1'. [WARNING] (8) : Server mybackend/myserver1 ('myhost') is UP/READY (resolves again). [WARNING] (8) : Server mybackend/myserver1 administratively READY thanks to valid DNS answer. [WARNING] (8) : Server mybackend/myserver1 is DOWN, reason: Layer4 connection problem, info: "Connection refused", check duration: 0ms. 0 active and 0 backup servers left. 0 sessions active, 0 requeued, 0 remaining in queue. @boi4 also mentioned that this used to work fine before. Willy suggested that this regression may have been introduced by 64c9c8e ("BUG/MINOR: server/dns: use server_set_inetaddr() to unset srv addr from DNS") Turns out he was right! Indeed, in 64c9c8e we systematically memset the whole server_inetaddr struct (which contains both the requested server's addr and port planned for atomic update) instead of only memsetting the addr part of the structure: except when SRV records are involved (SRV records provide both the address and the port unlike A or AAAA records), we must not reset the server's port upon DNS errors because the port may have been provided at config time and we don't want to lose its value. Big thanks to @boi4 for his well-documented issue that really helped us to pinpoint the bug right on time for the dev-13 release. No backport needed (unless 64c9c8e gets backported).
2024-05-24 07:55:41 -04:00
/* unset server's addr, keep port */
server_get_inetaddr(s, &srv_addr);
srv_addr.family = AF_UNSPEC;
BUG/MEDIUM: server/dns: preserve server's port upon resolution timeout or error @boi4 reported in GH #2578 that since 3.0-dev1 for servers with address learned from A/AAAA records after a DNS flap server would be put out of maintenance with proper address but with invalid port (== 0), making it unusable and causing tcp checks to fail: [NOTICE] (1) : Loading success. [WARNING] (8) : Server mybackend/myserver1 is going DOWN for maintenance (DNS refused status). 0 active and 0 backup servers left. 0 sessions active, 0 requeued, 0 remaining in queue. [ALERT] (8) : backend 'mybackend' has no server available! [WARNING] (8) : mybackend/myserver1: IP changed from '(none)' to '127.0.0.1' by 'myresolver/ns1'. [WARNING] (8) : Server mybackend/myserver1 ('myhost') is UP/READY (resolves again). [WARNING] (8) : Server mybackend/myserver1 administratively READY thanks to valid DNS answer. [WARNING] (8) : Server mybackend/myserver1 is DOWN, reason: Layer4 connection problem, info: "Connection refused", check duration: 0ms. 0 active and 0 backup servers left. 0 sessions active, 0 requeued, 0 remaining in queue. @boi4 also mentioned that this used to work fine before. Willy suggested that this regression may have been introduced by 64c9c8e ("BUG/MINOR: server/dns: use server_set_inetaddr() to unset srv addr from DNS") Turns out he was right! Indeed, in 64c9c8e we systematically memset the whole server_inetaddr struct (which contains both the requested server's addr and port planned for atomic update) instead of only memsetting the addr part of the structure: except when SRV records are involved (SRV records provide both the address and the port unlike A or AAAA records), we must not reset the server's port upon DNS errors because the port may have been provided at config time and we don't want to lose its value. Big thanks to @boi4 for his well-documented issue that really helped us to pinpoint the bug right on time for the dev-13 release. No backport needed (unless 64c9c8e gets backported).
2024-05-24 07:55:41 -04:00
memset(&srv_addr.addr, 0, sizeof(srv_addr.addr));
server_set_inetaddr(s, &srv_addr, SERVER_INETADDR_UPDATER_NONE, NULL);
HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
resolv_detach_from_resolution_answer_items(requester->resolution, requester);
return 0;
}
HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
return 1;
}
/*
* Function to check if <ip> is already affected to a server in the backend
* which owns <srv> and is up.
* It returns a pointer to the first server found or NULL if <ip> is not yet
* assigned.
*
* Must be called with server lock held
*/
struct server *snr_check_ip_callback(struct server *srv, void *ip, unsigned char *ip_family)
{
struct server *tmpsrv;
struct proxy *be;
if (!srv)
return NULL;
be = srv->proxy;
for (tmpsrv = be->srv; tmpsrv; tmpsrv = tmpsrv->next) {
/* we found the current server is the same, ignore it */
if (srv == tmpsrv)
continue;
/* We want to compare the IP in the record with the IP of the servers in the
* same backend, only if:
* * DNS resolution is enabled on the server
* * the hostname used for the resolution by our server is the same than the
* one used for the server found in the backend
* * the server found in the backend is not our current server
*/
HA_SPIN_LOCK(SERVER_LOCK, &tmpsrv->lock);
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
if ((tmpsrv->hostname_dn == NULL) ||
(srv->hostname_dn_len != tmpsrv->hostname_dn_len) ||
(memcmp(srv->hostname_dn, tmpsrv->hostname_dn, srv->hostname_dn_len) != 0) ||
(srv->puid == tmpsrv->puid)) {
HA_SPIN_UNLOCK(SERVER_LOCK, &tmpsrv->lock);
continue;
}
/* If the server has been taken down, don't consider it */
if (tmpsrv->next_admin & SRV_ADMF_RMAINT) {
HA_SPIN_UNLOCK(SERVER_LOCK, &tmpsrv->lock);
continue;
}
/* At this point, we have 2 different servers using the same DNS hostname
* for their respective resolution.
*/
if (*ip_family == tmpsrv->addr.ss_family &&
((tmpsrv->addr.ss_family == AF_INET &&
memcmp(ip, &((struct sockaddr_in *)&tmpsrv->addr)->sin_addr, 4) == 0) ||
(tmpsrv->addr.ss_family == AF_INET6 &&
memcmp(ip, &((struct sockaddr_in6 *)&tmpsrv->addr)->sin6_addr, 16) == 0))) {
HA_SPIN_UNLOCK(SERVER_LOCK, &tmpsrv->lock);
return tmpsrv;
}
HA_SPIN_UNLOCK(SERVER_LOCK, &tmpsrv->lock);
}
return NULL;
}
/* Sets the server's address (srv->addr) from srv->hostname using the libc's
* resolver. This is suited for initial address configuration. Returns 0 on
* success otherwise a non-zero error code. In case of error, *err_code, if
* not NULL, is filled up.
*/
int srv_set_addr_via_libc(struct server *srv, int *err_code)
{
struct sockaddr_storage new_addr;
memset(&new_addr, 0, sizeof(new_addr));
/* Use the preferred family, if configured */
new_addr.ss_family = srv->addr.ss_family;
if (str2ip2(srv->hostname, &new_addr, 1) == NULL) {
if (err_code)
*err_code |= ERR_WARN;
return 1;
}
_srv_set_inetaddr(srv, &new_addr);
return 0;
}
/* Set the server's FDQN (->hostname) from <hostname>.
* Returns -1 if failed, 0 if not.
*
* Must be called with the server lock held.
*/
int srv_set_fqdn(struct server *srv, const char *hostname, int resolv_locked)
{
struct resolv_resolution *resolution;
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
char *hostname_dn;
int hostname_len, hostname_dn_len;
/* Note that the server lock is already held. */
if (!srv->resolvers)
return -1;
if (!resolv_locked)
HA_SPIN_LOCK(DNS_LOCK, &srv->resolvers->lock);
/* run time DNS/SRV resolution was not active for this server
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
* and we can't enable it at run time for now.
*/
if (!srv->resolv_requester && !srv->srvrq)
goto err;
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
chunk_reset(&trash);
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
hostname_len = strlen(hostname);
hostname_dn = trash.area;
hostname_dn_len = resolv_str_to_dn_label(hostname, hostname_len,
hostname_dn, trash.size);
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
if (hostname_dn_len == -1)
goto err;
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
resolution = (srv->resolv_requester ? srv->resolv_requester->resolution : NULL);
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
if (resolution &&
resolution->hostname_dn &&
resolution->hostname_dn_len == hostname_dn_len &&
memcmp(resolution->hostname_dn, hostname_dn, hostname_dn_len) == 0)
goto end;
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
resolv_unlink_resolution(srv->resolv_requester);
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
free(srv->hostname);
free(srv->hostname_dn);
MAJOR: dns: Refactor the DNS code This is a huge patch with many changes, all about the DNS. Initially, the idea was to update the DNS part to ease the threads support integration. But quickly, I started to refactor some parts. And after several iterations, it was impossible for me to commit the different parts atomically. So, instead of adding tens of patches, often reworking the same parts, it was easier to merge all my changes in a uniq patch. Here are all changes made on the DNS. First, the DNS initialization has been refactored. The DNS configuration parsing remains untouched, in cfgparse.c. But all checks have been moved in a post-check callback. In the function dns_finalize_config, for each resolvers, the nameservers configuration is tested and the task used to manage DNS resolutions is created. The links between the backend's servers and the resolvers are also created at this step. Here no connection are kept alive. So there is no needs anymore to reopen them after HAProxy fork. Connections used to send DNS queries will be opened on demand. Then, the way DNS requesters are linked to a DNS resolution has been reworked. The resolution used by a requester is now referenced into the dns_requester structure and the resolution pointers in server and dns_srvrq structures have been removed. wait and curr list of requesters, for a DNS resolution, have been replaced by a uniq list. And Finally, the way a requester is removed from a DNS resolution has been simplified. Now everything is done in dns_unlink_resolution. srv_set_fqdn function has been simplified. Now, there is only 1 way to set the server's FQDN, independently it is done by the CLI or when a SRV record is resolved. The static DNS resolutions pool has been replaced by a dynamoc pool. The part has been modified by Baptiste Assmann. The way the DNS resolutions are triggered by the task or by a health-check has been totally refactored. Now, all timeouts are respected. Especially hold.valid. The default frequency to wake up a resolvers is now configurable using "timeout resolve" parameter. Now, as documented, as long as invalid repsonses are received, we really wait all name servers responses before retrying. As far as possible, resources allocated during DNS configuration parsing are releases when HAProxy is shutdown. Beside all these changes, the code has been cleaned to ease code review and the doc has been updated.
2017-09-27 05:00:59 -04:00
srv->hostname = strdup(hostname);
srv->hostname_dn = strdup(hostname_dn);
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
srv->hostname_dn_len = hostname_dn_len;
if (!srv->hostname || !srv->hostname_dn)
goto err;
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
MEDIUM: dns: use Additional records from SRV responses Most DNS servers provide A/AAAA records in the Additional section of a response, which correspond to the SRV records from the Answer section: ;; QUESTION SECTION: ;_http._tcp.be1.domain.tld. IN SRV ;; ANSWER SECTION: _http._tcp.be1.domain.tld. 3600 IN SRV 5 500 80 A1.domain.tld. _http._tcp.be1.domain.tld. 3600 IN SRV 5 500 80 A8.domain.tld. _http._tcp.be1.domain.tld. 3600 IN SRV 5 500 80 A5.domain.tld. _http._tcp.be1.domain.tld. 3600 IN SRV 5 500 80 A6.domain.tld. _http._tcp.be1.domain.tld. 3600 IN SRV 5 500 80 A4.domain.tld. _http._tcp.be1.domain.tld. 3600 IN SRV 5 500 80 A3.domain.tld. _http._tcp.be1.domain.tld. 3600 IN SRV 5 500 80 A2.domain.tld. _http._tcp.be1.domain.tld. 3600 IN SRV 5 500 80 A7.domain.tld. ;; ADDITIONAL SECTION: A1.domain.tld. 3600 IN A 192.168.0.1 A8.domain.tld. 3600 IN A 192.168.0.8 A5.domain.tld. 3600 IN A 192.168.0.5 A6.domain.tld. 3600 IN A 192.168.0.6 A4.domain.tld. 3600 IN A 192.168.0.4 A3.domain.tld. 3600 IN A 192.168.0.3 A2.domain.tld. 3600 IN A 192.168.0.2 A7.domain.tld. 3600 IN A 192.168.0.7 SRV record support was introduced in HAProxy 1.8 and the first design did not take into account the records from the Additional section. Instead, a new resolution is associated to each server with its relevant FQDN. This behavior generates a lot of DNS requests (1 SRV + 1 per server associated). This patch aims at fixing this by: - when a DNS response is validated, we associate A/AAAA records to relevant SRV ones - set a flag on associated servers to prevent them from running a DNS resolution for said FADN - update server IP address with information found in the Additional section If no relevant record can be found in the Additional section, then HAProxy will failback to running a dedicated resolution for this server, as it used to do. This behavior is the one described in RFC 2782.
2019-06-07 03:40:55 -04:00
if (srv->flags & SRV_F_NO_RESOLUTION)
goto end;
if (resolv_link_resolution(srv, OBJ_TYPE_SERVER, 1) == -1)
goto err;
end:
if (!resolv_locked)
HA_SPIN_UNLOCK(DNS_LOCK, &srv->resolvers->lock);
MAJOR/REORG: dns: DNS resolution task and requester queues This patch is a major upgrade of the internal run-time DNS resolver in HAProxy and it brings the following 2 main changes: 1. DNS resolution task Up to now, DNS resolution was triggered by the health check task. From now, DNS resolution task is autonomous. It is started by HAProxy right after the scheduler is available and it is woken either when a network IO occurs for one of its nameserver or when a timeout is matched. From now, this means we can enable DNS resolution for a server without enabling health checking. 2. Introduction of a dns_requester structure Up to now, DNS resolution was purposely made for resolving server hostnames. The idea, is to ensure that any HAProxy internal object should be able to trigger a DNS resolution. For this purpose, 2 things has to be done: - clean up the DNS code from the server structure (this was already quite clean actually) and clean up the server's callbacks from manipulating too much DNS resolution - create an agnostic structure which allows linking a DNS resolution and a requester of any type (using obj_type enum) 3. Manage requesters through queues Up to now, there was an uniq relationship between a resolution and it's owner (aka the requester now). It's a shame, because in some cases, multiple objects may share the same hostname and may benefit from a resolution being performed by a third party. This patch introduces the notion of queues, which are basically lists of either currently running resolution or waiting ones. The resolutions are now available as a pool, which belongs to the resolvers. The pool has has a default size of 64 resolutions per resolvers and is allocated at configuration parsing.
2017-05-22 09:17:15 -04:00
return 0;
err:
if (!resolv_locked)
HA_SPIN_UNLOCK(DNS_LOCK, &srv->resolvers->lock);
return -1;
}
/* Sets the server's address (srv->addr) from srv->lastaddr which was filled
* from the state file. This is suited for initial address configuration.
* Returns 0 on success otherwise a non-zero error code. In case of error,
* *err_code, if not NULL, is filled up.
*/
static int srv_apply_lastaddr(struct server *srv, int *err_code)
{
struct sockaddr_storage new_addr;
memset(&new_addr, 0, sizeof(new_addr));
/* Use the preferred family, if configured */
new_addr.ss_family = srv->addr.ss_family;
if (!str2ip2(srv->lastaddr, &new_addr, 0)) {
if (err_code)
*err_code |= ERR_WARN;
return 1;
}
_srv_set_inetaddr(srv, &new_addr);
return 0;
}
/* returns 0 if no error, otherwise a combination of ERR_* flags */
static int srv_iterate_initaddr(struct server *srv)
{
char *name = srv->hostname;
int return_code = 0;
int err_code;
unsigned int methods;
/* If no addr and no hostname set, get the name from the DNS SRV request */
if (!name && srv->srvrq)
name = srv->srvrq->name;
methods = srv->init_addr_methods;
if (!methods) {
/* otherwise default to "last,libc" */
srv_append_initaddr(&methods, SRV_IADDR_LAST);
srv_append_initaddr(&methods, SRV_IADDR_LIBC);
if (srv->resolvers_id) {
/* dns resolution is configured, add "none" to not fail on startup */
srv_append_initaddr(&methods, SRV_IADDR_NONE);
}
}
/* "-dr" : always append "none" so that server addresses resolution
* failures are silently ignored, this is convenient to validate some
* configs out of their environment.
*/
if (global.tune.options & GTUNE_RESOLVE_DONTFAIL)
srv_append_initaddr(&methods, SRV_IADDR_NONE);
while (methods) {
err_code = 0;
switch (srv_get_next_initaddr(&methods)) {
case SRV_IADDR_LAST:
if (!srv->lastaddr)
continue;
if (srv_apply_lastaddr(srv, &err_code) == 0)
goto out;
return_code |= err_code;
break;
case SRV_IADDR_LIBC:
if (!srv->hostname)
continue;
if (srv_set_addr_via_libc(srv, &err_code) == 0)
goto out;
return_code |= err_code;
break;
case SRV_IADDR_NONE:
srv_set_admin_flag(srv, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_NONE);
if (return_code) {
ha_notice("could not resolve address '%s', disabling server.\n",
name);
}
return return_code;
case SRV_IADDR_IP:
_srv_set_inetaddr(srv, &srv->init_addr);
if (return_code) {
ha_notice("could not resolve address '%s', falling back to configured address.\n",
name);
}
goto out;
default: /* unhandled method */
break;
}
}
if (!return_code)
ha_alert("no method found to resolve address '%s'.\n", name);
else
ha_alert("could not resolve address '%s'.\n", name);
return_code |= ERR_ALERT | ERR_FATAL;
return return_code;
out:
srv_set_dyncookie(srv);
srv_set_addr_desc(srv, 1);
return return_code;
}
/*
* This function parses all backends and all servers within each backend
* and performs servers' addr resolution based on information provided by:
* - configuration file
* - server-state file (states provided by an 'old' haproxy process)
*
* Returns 0 if no error, otherwise, a combination of ERR_ flags.
*/
int srv_init_addr(void)
{
struct proxy *curproxy;
int return_code = 0;
curproxy = proxies_list;
while (curproxy) {
struct server *srv;
/* servers are in backend only */
if (!(curproxy->cap & PR_CAP_BE) || (curproxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED)))
goto srv_init_addr_next;
for (srv = curproxy->srv; srv; srv = srv->next) {
set_usermsgs_ctx(srv->conf.file, srv->conf.line, &srv->obj_type);
if (srv->hostname || srv->srvrq)
return_code |= srv_iterate_initaddr(srv);
reset_usermsgs_ctx();
}
srv_init_addr_next:
curproxy = curproxy->next;
}
return return_code;
}
/*
* Must be called with the server lock held.
*/
const char *srv_update_fqdn(struct server *server, const char *fqdn, const char *updater, int resolv_locked)
{
struct buffer *msg;
msg = get_trash_chunk();
chunk_reset(msg);
CLEANUP: Compare the return value of `XXXcmp()` functions with zero According to coding-style.txt it is recommended to use: `strcmp(a, b) == 0` instead of `!strcmp(a, b)` So let's do this. The change was performed by running the following (very long) coccinelle patch on src/: @@ statement S; expression E; expression F; @@ if ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) ( S | { ... } ) @@ statement S; expression E; expression F; @@ if ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) ( S | { ... } ) @@ expression E; expression F; expression G; @@ ( G && ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( G || ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 && G ) @@ expression E; expression F; expression G; @@ ( ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) != 0 || G ) @@ expression E; expression F; expression G; @@ ( G && - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( G || - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 && G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 || G ) @@ expression E; expression F; expression G; @@ ( - ! ( dns_hostname_cmp | eb_memcmp | memcmp | strcasecmp | strcmp | strncasecmp | strncmp ) - (E, F) + (E, F) == 0 )
2021-01-02 16:31:53 -05:00
if (server->hostname && strcmp(fqdn, server->hostname) == 0) {
chunk_appendf(msg, "no need to change the FDQN");
goto out;
}
if (strlen(fqdn) > DNS_MAX_NAME_SIZE || invalid_domainchar(fqdn)) {
chunk_appendf(msg, "invalid fqdn '%s'", fqdn);
goto out;
}
chunk_appendf(msg, "%s/%s changed its FQDN from %s to %s",
server->proxy->id, server->id, server->hostname, fqdn);
if (srv_set_fqdn(server, fqdn, resolv_locked) < 0) {
chunk_reset(msg);
chunk_appendf(msg, "could not update %s/%s FQDN",
server->proxy->id, server->id);
goto out;
}
BUG/MEDIUM: server: server stuck in maintenance after FQDN change Pierre Bonnat reported that SRV-based server-template recently stopped to work properly. After reviewing the changes, it was found that the regression was caused by a4d04c6 ("BUG/MINOR: server: make sure the HMAINT state is part of MAINT") Indeed, HMAINT is not a regular maintenance flag. It was implemented in b418c122 a4d04c6 ("BUG/MINOR: server: make sure the HMAINT state is part of MAINT"). This flag is only set (and never removed) when the server FQDN is changed from its initial config-time value. This can happen with "set server fqdn" command as well as SRV records updates from the DNS. This flag should ideally belong to server flags.. but it was stored under srv_admin enum because cur_admin is properly exported/imported via server state-file while regular server's flags are not. Due to a4d04c6, when a server FQDN changes, the server is considered in maintenance, and since the HMAINT flag is never removed, the server is stuck in maintenance. To fix the issue, we partially revert a4d04c6. But this latter commit is right on one point: HMAINT flag was way too confusing and mixed-up between regular MAINT flags, thus there's nothing to blame about a4d04c6 as it was error-prone anyway.. To prevent such kind of bugs from happening again, let's rename HMAINT to something more explicit (SRV_ADMF_FQDN_CHANGED) and make it stand out under srv_admin enum so we're not tempted to mix it with regular maintenance flags anymore. Since a4d04c6 was set to be backported in all versions, this patch must be backported there as well.
2024-10-16 04:57:32 -04:00
/* Flag as FQDN changed (e.g.: set from stats socket or resolvers) */
server->next_admin |= SRV_ADMF_FQDN_CHANGED;
out:
if (updater)
chunk_appendf(msg, " by '%s'", updater);
chunk_appendf(msg, "\n");
return msg->area;
}
/* Expects to find a backend and a server in <arg> under the form <backend>/<server>,
* and returns the pointer to the server. Otherwise, display adequate error messages
* on the CLI, sets the CLI's state to CLI_ST_PRINT and returns NULL. This is only
* used for CLI commands requiring a server name.
* Important: the <arg> is modified to remove the '/'.
*/
struct server *cli_find_server(struct appctx *appctx, char *arg)
{
struct proxy *px;
struct server *sv;
struct ist be_name, sv_name = ist(arg);
be_name = istsplit(&sv_name, '/');
if (!istlen(sv_name)) {
cli_err(appctx, "Require 'backend/server'.\n");
return NULL;
}
if (!(px = proxy_be_by_name(ist0(be_name)))) {
cli_err(appctx, "No such backend.\n");
return NULL;
}
if (!(sv = server_find(px, ist0(sv_name)))) {
cli_err(appctx, "No such server.\n");
return NULL;
}
if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) {
cli_err(appctx, "Proxy is disabled.\n");
return NULL;
}
return sv;
}
/* grabs the server lock */
static int cli_parse_set_server(char **args, char *payload, struct appctx *appctx, void *private)
{
struct server *sv;
const char *warning;
if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
return 1;
sv = cli_find_server(appctx, args[2]);
if (!sv)
return 1;
if (strcmp(args[3], "weight") == 0) {
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
warning = server_parse_weight_change_request(sv, args[4]);
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
if (warning)
cli_err(appctx, warning);
}
else if (strcmp(args[3], "state") == 0) {
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
if (strcmp(args[4], "ready") == 0)
srv_adm_set_ready(sv);
else if (strcmp(args[4], "drain") == 0)
srv_adm_set_drain(sv);
else if (strcmp(args[4], "maint") == 0)
srv_adm_set_maint(sv);
else
cli_err(appctx, "'set server <srv> state' expects 'ready', 'drain' and 'maint'.\n");
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
}
else if (strcmp(args[3], "health") == 0) {
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
if (sv->track)
cli_err(appctx, "cannot change health on a tracking server.\n");
else if (strcmp(args[4], "up") == 0) {
sv->check.health = sv->check.rise + sv->check.fall - 1;
srv_set_running(sv, SRV_OP_STCHGC_CLI);
}
else if (strcmp(args[4], "stopping") == 0) {
sv->check.health = sv->check.rise + sv->check.fall - 1;
srv_set_stopping(sv, SRV_OP_STCHGC_CLI);
}
else if (strcmp(args[4], "down") == 0) {
sv->check.health = 0;
srv_set_stopped(sv, SRV_OP_STCHGC_CLI);
}
else
cli_err(appctx, "'set server <srv> health' expects 'up', 'stopping', or 'down'.\n");
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
}
else if (strcmp(args[3], "agent") == 0) {
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
if (!(sv->agent.state & CHK_ST_ENABLED))
cli_err(appctx, "agent checks are not enabled on this server.\n");
else if (strcmp(args[4], "up") == 0) {
sv->agent.health = sv->agent.rise + sv->agent.fall - 1;
srv_set_running(sv, SRV_OP_STCHGC_CLI);
}
else if (strcmp(args[4], "down") == 0) {
sv->agent.health = 0;
srv_set_stopped(sv, SRV_OP_STCHGC_CLI);
}
else
cli_err(appctx, "'set server <srv> agent' expects 'up' or 'down'.\n");
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
}
else if (strcmp(args[3], "agent-addr") == 0) {
char *addr = NULL;
char *port = NULL;
if (strlen(args[4]) == 0) {
cli_err(appctx, "set server <b>/<s> agent-addr requires"
" an address and optionally a port.\n");
goto out;
}
addr = args[4];
if (strcmp(args[5], "port") == 0)
port = args[6];
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
warning = srv_update_agent_addr_port(sv, addr, port);
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
if (warning)
cli_msg(appctx, LOG_WARNING, warning);
}
else if (strcmp(args[3], "agent-port") == 0) {
char *port = NULL;
if (strlen(args[4]) == 0) {
cli_err(appctx, "set server <b>/<s> agent-port requires"
" a port.\n");
goto out;
}
port = args[4];
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
warning = srv_update_agent_addr_port(sv, NULL, port);
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
if (warning)
cli_msg(appctx, LOG_WARNING, warning);
}
else if (strcmp(args[3], "agent-send") == 0) {
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
if (!(sv->agent.state & CHK_ST_ENABLED))
cli_err(appctx, "agent checks are not enabled on this server.\n");
else {
if (!set_srv_agent_send(sv, args[4]))
cli_err(appctx, "cannot allocate memory for new string.\n");
}
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
}
else if (strcmp(args[3], "check-addr") == 0) {
char *addr = NULL;
char *port = NULL;
if (strlen(args[4]) == 0) {
cli_err(appctx, "set server <b>/<s> check-addr requires"
" an address and optionally a port.\n");
goto out;
}
addr = args[4];
if (strcmp(args[5], "port") == 0)
port = args[6];
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
warning = srv_update_check_addr_port(sv, addr, port);
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
if (warning)
cli_msg(appctx, LOG_WARNING, warning);
}
else if (strcmp(args[3], "check-port") == 0) {
char *port = NULL;
if (strlen(args[4]) == 0) {
cli_err(appctx, "set server <b>/<s> check-port requires"
" a port.\n");
goto out;
}
port = args[4];
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
warning = srv_update_check_addr_port(sv, NULL, port);
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
if (warning)
cli_msg(appctx, LOG_WARNING, warning);
}
else if (strcmp(args[3], "addr") == 0) {
char *addr = NULL;
char *port = NULL;
if (strlen(args[4]) == 0) {
cli_err(appctx, "set server <b>/<s> addr requires an address and optionally a port.\n");
goto out;
}
else {
addr = args[4];
}
if (strcmp(args[5], "port") == 0) {
port = args[6];
}
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
warning = srv_update_addr_port(sv, addr, port, SERVER_INETADDR_UPDATER_CLI);
if (warning)
cli_msg(appctx, LOG_WARNING, warning);
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
}
else if (strcmp(args[3], "fqdn") == 0) {
if (!*args[4]) {
cli_err(appctx, "set server <b>/<s> fqdn requires a FQDN.\n");
goto out;
}
if (!sv->resolvers) {
cli_err(appctx, "set server <b>/<s> fqdn failed because no resolution is configured.\n");
goto out;
}
if (sv->srvrq) {
cli_err(appctx, "set server <b>/<s> fqdn failed because SRV resolution is configured.\n");
goto out;
}
HA_SPIN_LOCK(DNS_LOCK, &sv->resolvers->lock);
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
MEDIUM: dns: use Additional records from SRV responses Most DNS servers provide A/AAAA records in the Additional section of a response, which correspond to the SRV records from the Answer section: ;; QUESTION SECTION: ;_http._tcp.be1.domain.tld. IN SRV ;; ANSWER SECTION: _http._tcp.be1.domain.tld. 3600 IN SRV 5 500 80 A1.domain.tld. _http._tcp.be1.domain.tld. 3600 IN SRV 5 500 80 A8.domain.tld. _http._tcp.be1.domain.tld. 3600 IN SRV 5 500 80 A5.domain.tld. _http._tcp.be1.domain.tld. 3600 IN SRV 5 500 80 A6.domain.tld. _http._tcp.be1.domain.tld. 3600 IN SRV 5 500 80 A4.domain.tld. _http._tcp.be1.domain.tld. 3600 IN SRV 5 500 80 A3.domain.tld. _http._tcp.be1.domain.tld. 3600 IN SRV 5 500 80 A2.domain.tld. _http._tcp.be1.domain.tld. 3600 IN SRV 5 500 80 A7.domain.tld. ;; ADDITIONAL SECTION: A1.domain.tld. 3600 IN A 192.168.0.1 A8.domain.tld. 3600 IN A 192.168.0.8 A5.domain.tld. 3600 IN A 192.168.0.5 A6.domain.tld. 3600 IN A 192.168.0.6 A4.domain.tld. 3600 IN A 192.168.0.4 A3.domain.tld. 3600 IN A 192.168.0.3 A2.domain.tld. 3600 IN A 192.168.0.2 A7.domain.tld. 3600 IN A 192.168.0.7 SRV record support was introduced in HAProxy 1.8 and the first design did not take into account the records from the Additional section. Instead, a new resolution is associated to each server with its relevant FQDN. This behavior generates a lot of DNS requests (1 SRV + 1 per server associated). This patch aims at fixing this by: - when a DNS response is validated, we associate A/AAAA records to relevant SRV ones - set a flag on associated servers to prevent them from running a DNS resolution for said FADN - update server IP address with information found in the Additional section If no relevant record can be found in the Additional section, then HAProxy will failback to running a dedicated resolution for this server, as it used to do. This behavior is the one described in RFC 2782.
2019-06-07 03:40:55 -04:00
/* ensure runtime resolver will process this new fqdn */
if (sv->flags & SRV_F_NO_RESOLUTION) {
sv->flags &= ~SRV_F_NO_RESOLUTION;
}
warning = srv_update_fqdn(sv, args[4], "stats socket command", 1);
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
HA_SPIN_UNLOCK(DNS_LOCK, &sv->resolvers->lock);
if (warning)
cli_msg(appctx, LOG_WARNING, warning);
}
else if (strcmp(args[3], "ssl") == 0) {
#ifdef USE_OPENSSL
char *err = NULL;
if (sv->flags & SRV_F_DYNAMIC) {
cli_err(appctx, "'set server <srv> ssl' not supported on dynamic servers\n");
goto out;
}
if (sv->ssl_ctx.ctx == NULL) {
cli_err(appctx, "'set server <srv> ssl' cannot be set. "
" default-server should define ssl settings\n");
goto out;
}
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
if (strcmp(args[4], "on") == 0) {
if (srv_set_ssl(sv, 1)) {
cli_dynerr(appctx, memprintf(&err, "failed to enable ssl for server %s.\n", args[2]));
goto out;
}
} else if (strcmp(args[4], "off") == 0) {
if (srv_set_ssl(sv, 0)) {
cli_dynerr(appctx, memprintf(&err, "failed to disable ssl for server %s.\n", args[2]));
goto out;
}
} else {
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
cli_err(appctx, "'set server <srv> ssl' expects 'on' or 'off'.\n");
goto out;
}
srv_cleanup_connections(sv);
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
cli_msg(appctx, LOG_NOTICE, "server ssl setting updated.\n");
#else
cli_msg(appctx, LOG_NOTICE, "server ssl setting not supported.\n");
#endif
} else {
cli_err(appctx,
"usage: set server <backend>/<server> "
"addr | agent | agent-addr | agent-port | agent-send | "
"check-addr | check-port | fqdn | health | ssl | "
"state | weight\n");
}
out:
return 1;
}
static int cli_parse_get_weight(char **args, char *payload, struct appctx *appctx, void *private)
{
struct proxy *be;
struct server *sv;
struct ist be_name, sv_name = ist(args[2]);
be_name = istsplit(&sv_name, '/');
if (!istlen(sv_name))
return cli_err(appctx, "Require 'backend/server'.\n");
if (!(be = proxy_be_by_name(ist0(be_name))))
return cli_err(appctx, "No such backend.\n");
if (!(sv = server_find(be, ist0(sv_name))))
return cli_err(appctx, "No such server.\n");
/* return server's effective weight at the moment */
snprintf(trash.area, trash.size, "%d (initial %d)\n", sv->uweight,
sv->iweight);
if (applet_putstr(appctx, trash.area) == -1)
return 0;
return 1;
}
/* Parse a "set weight" command.
*
* Grabs the server lock.
*/
static int cli_parse_set_weight(char **args, char *payload, struct appctx *appctx, void *private)
{
struct server *sv;
const char *warning;
if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
return 1;
sv = cli_find_server(appctx, args[2]);
if (!sv)
return 1;
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
warning = server_parse_weight_change_request(sv, args[3]);
if (warning)
cli_err(appctx, warning);
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
return 1;
}
/* parse a "set maxconn server" command. It always returns 1.
*
* Grabs the server lock.
*/
static int cli_parse_set_maxconn_server(char **args, char *payload, struct appctx *appctx, void *private)
{
struct server *sv;
const char *warning;
if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
return 1;
sv = cli_find_server(appctx, args[3]);
if (!sv)
return 1;
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
warning = server_parse_maxconn_change_request(sv, args[4]);
if (warning)
cli_err(appctx, warning);
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
return 1;
}
/* parse a "disable agent" command. It always returns 1.
*
* Grabs the server lock.
*/
static int cli_parse_disable_agent(char **args, char *payload, struct appctx *appctx, void *private)
{
struct server *sv;
if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
return 1;
sv = cli_find_server(appctx, args[2]);
if (!sv)
return 1;
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
sv->agent.state &= ~CHK_ST_ENABLED;
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
return 1;
}
/* parse a "disable health" command. It always returns 1.
*
* Grabs the server lock.
*/
static int cli_parse_disable_health(char **args, char *payload, struct appctx *appctx, void *private)
{
struct server *sv;
if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
return 1;
sv = cli_find_server(appctx, args[2]);
if (!sv)
return 1;
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
sv->check.state &= ~CHK_ST_ENABLED;
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
return 1;
}
/* parse a "disable server" command. It always returns 1.
*
* Grabs the server lock.
*/
static int cli_parse_disable_server(char **args, char *payload, struct appctx *appctx, void *private)
{
struct server *sv;
if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
return 1;
sv = cli_find_server(appctx, args[2]);
if (!sv)
return 1;
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
srv_adm_set_maint(sv);
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
return 1;
}
/* parse a "enable agent" command. It always returns 1.
*
* Grabs the server lock.
*/
static int cli_parse_enable_agent(char **args, char *payload, struct appctx *appctx, void *private)
{
struct server *sv;
if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
return 1;
sv = cli_find_server(appctx, args[2]);
if (!sv)
return 1;
if (!(sv->agent.state & CHK_ST_CONFIGURED))
return cli_err(appctx, "Agent was not configured on this server, cannot enable.\n");
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
sv->agent.state |= CHK_ST_ENABLED;
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
return 1;
}
/* parse a "enable health" command. It always returns 1.
*
* Grabs the server lock.
*/
static int cli_parse_enable_health(char **args, char *payload, struct appctx *appctx, void *private)
{
struct server *sv;
if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
return 1;
sv = cli_find_server(appctx, args[2]);
if (!sv)
return 1;
if (!(sv->check.state & CHK_ST_CONFIGURED))
return cli_err(appctx, "Health check was not configured on this server, cannot enable.\n");
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
sv->check.state |= CHK_ST_ENABLED;
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
return 1;
}
/* parse a "enable server" command. It always returns 1.
*
* Grabs the server lock.
*/
static int cli_parse_enable_server(char **args, char *payload, struct appctx *appctx, void *private)
{
struct server *sv;
if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
return 1;
sv = cli_find_server(appctx, args[2]);
if (!sv)
return 1;
HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
srv_adm_set_ready(sv);
if (!(sv->flags & SRV_F_COOKIESET)
&& (sv->proxy->ck_opts & PR_CK_DYNAMIC) &&
sv->cookie)
srv_check_for_dup_dyncookie(sv);
HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
return 1;
}
/* Allocates data structure related to load balancing for the server <sv>. It
* is only required for dynamic servers.
*
* At the moment, the server lock is not used as this function is only called
* for a dynamic server not yet registered.
*
* Returns 1 on success, 0 on allocation failure.
*/
static int srv_alloc_lb(struct server *sv, struct proxy *be)
{
sv->lb_tree = (sv->flags & SRV_F_BACKUP) ?
&be->lbprm.chash.bck : &be->lbprm.chash.act;
sv->lb_nodes_tot = sv->uweight * BE_WEIGHT_SCALE;
sv->lb_nodes_now = 0;
BUG/MEDIUM: lb-chash: always properly initialize lb_nodes with dynamic servers An issue was introduced in 3.0 with commit faa8c3e024 ("MEDIUM: lb-chash: Deterministic node hashes based on server address"): the new server_key field and lb_nodes entries initialization were not updated for servers added at run time with "add server": server_key remains zero and the key used in lb_node remains the one depending only on the server's ID. This will cause trouble when adding new servers with consistent hashing, because the hash-key will be ignored until the server's weight changes and the key difference is detected, leading to its recalculation. This is essentially caused by the poorly placed lb_nodes initialization that is specific to lb-chash and had to be replicated in the code dealing with server addition. This commit solves the problem by adding a new ->server_init() function in the lbprm proxy struct, that is called by the server addition code. This also allows to abandon the complex check for LB algos that was placed there for that purpose. For now only lb-chash provides such a function, and calls it as well during initial setup. This way newly added servers always use the correct key now. While it should also theoretically have had an impact on servers added with the "random" algorithm, it's unlikely that the difference between proper server keys and those based on their ID could have had any visible effect. This patch should be backported as far as 3.0. The backport may be eased by a preliminary backport of previous commit "CLEANUP: lb-chash: free lb_nodes from chash's deinit(), not global", though this is not strictly necessary if context is manually adjusted.
2026-02-10 01:10:09 -05:00
if (be->lbprm.server_init && be->lbprm.server_init(sv) < 0)
return 0; // typically out of memory
return 1;
}
/* updates the server's weight during a warmup stage. Once the final weight is
* reached, the task automatically stops. Note that any server status change
* must have updated server last_change accordingly.
*/
static struct task *server_warmup(struct task *t, void *context, unsigned int state)
{
struct server *s = context;
/* by default, plan on stopping the task */
t->expire = TICK_ETERNITY;
if ((s->next_admin & SRV_ADMF_MAINT) ||
(s->next_state != SRV_ST_STARTING))
return t;
HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
/* recalculate the weights and update the state */
server_recalc_eweight(s, 1);
HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
/* probably that we can refill this server with a bit more connections */
BUG/MEDIUM: queues: Do not use pendconn_grab_from_px(). pendconn_grab_from_px() was called when a server was brought back up, to get some streams waiting in the proxy's queue and get them to run on the newly available server. It is very similar to process_srv_queue(), except it only goes through the proxy's queue, which can be a problem, because there is a small race condition that could lead us to add more streams to the server queue just as it's going down. If that happens, the server would just be ignored when back up by new streams, as its queue is not empty, and it would never try to process its queue. The other problem with pendconn_grab_from_px() is that it is very liberal with how it dequeues streams, and it is not very good at enforcing maxconn, it could lead to having 3*maxconn connections. For both those reasons, just get rid of pendconn_grab_from_px(), and just use process_srv_queue(). Both problems are easy to reproduce, especially on a 64 threads machine, set a maxconn to 100, inject in H2 with 1000 concurrent connections containing up to 100 streams each, and after a few seconds/minutes the max number of concurrent output streams will be much higher than maxconn, and eventually the server will stop processing connections. It may be related to github issue #2744. Note that it doesn't totally fix the problem, we can occasionally see a few more connections than maxconn, but the max that have been observed is 4 more connections, we no longer get multiple times maxconn. have more outgoing connections than maxconn, This should be backported up to 2.6.
2024-12-17 09:39:21 -05:00
process_srv_queue(s);
/* get back there in 1 second or 1/20th of the slowstart interval,
* whichever is greater, resulting in small 5% steps.
*/
if (s->next_state == SRV_ST_STARTING)
t->expire = tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20)));
return t;
}
/* Allocate the slowstart task if the server is configured with a slowstart
* timer. If server next_state is SRV_ST_STARTING, the task is scheduled.
*
* Returns 0 on success else non-zero.
*/
static int init_srv_slowstart(struct server *srv)
{
struct task *t;
if (srv->slowstart) {
if ((t = task_new_anywhere()) == NULL) {
ha_alert("Cannot activate slowstart for server %s/%s: out of memory.\n", srv->proxy->id, srv->id);
return ERR_ALERT | ERR_FATAL;
}
/* We need a warmup task that will be called when the server
* state switches from down to up.
*/
srv->warmup = t;
t->process = server_warmup;
t->context = srv;
/* server can be in this state only because of */
if (srv->next_state == SRV_ST_STARTING) {
task_schedule(srv->warmup,
tick_add(now_ms,
MS_TO_TICKS(MAX(1000, (ns_to_sec(now_ns) - srv->last_change)) / 20)));
}
}
return ERR_NONE;
}
/* allocate the tasklet that's meant to permit a server */
static int init_srv_requeue(struct server *srv)
{
struct tasklet *t;
if ((t = tasklet_new()) == NULL) {
ha_alert("Cannot allocate a server requeuing tasklet for server %s/%s: out of memory.\n", srv->proxy->id, srv->id);
return ERR_ALERT | ERR_FATAL;
}
srv->requeue_tasklet = t;
t->process = server_requeue;
t->context = srv;
/*
* Run the tasklet once, in the very unlikely event the server
* failed to queue itself when brought up, which could happen if
* a memory allocation failed.
*/
tasklet_wakeup(t);
return ERR_NONE;
}
/* Memory allocation and initialization of the per_thr field.
* Returns 0 if the field has been successfully initialized, -1 on failure.
*/
static int srv_init_per_thr(struct server *srv)
{
int i;
srv->per_thr = ha_aligned_zalloc(64, global.nbthread * sizeof(*srv->per_thr));
srv->per_tgrp = ha_aligned_zalloc(64, global.nbtgroups * sizeof(*srv->per_tgrp));
if (!srv->per_thr || !srv->per_tgrp)
return -1;
for (i = 0; i < global.nbthread; i++) {
srv->per_thr[i].idle_conns = NULL;
srv->per_thr[i].safe_conns = NULL;
srv->per_thr[i].avail_conns = NULL;
MT_LIST_INIT(&srv->per_thr[i].sess_conns);
MT_LIST_INIT(&srv->per_thr[i].streams);
LIST_INIT(&srv->per_thr[i].idle_conn_list);
}
for (i = 0; i < global.nbtgroups; i++) {
srv->per_tgrp[i].server = srv;
queue_init(&srv->per_tgrp[i].queue, srv->proxy, srv);
}
return 0;
}
/* Distinguish between "add server" default usage or one of its sub-commands. */
enum add_srv_mode {
ADD_SRV_MODE_DEF, /* default mode, IO handler should be skipped by parser. */
ADD_SRV_MODE_HELP, /* help mode to list supported keywords */
};
/* Context for "add server" CLI. */
struct add_srv_ctx {
enum add_srv_mode mode;
void *obj1;
void *obj2;
};
/* Handler for "add server" command. Should be reserved to extra sub-commands
* such as "help".
*/
int cli_io_handler_add_server(struct appctx *appctx)
{
struct add_srv_ctx *ctx = appctx->svcctx;
struct srv_kw_list *kwl = ctx->obj1;
struct srv_kw *kw;
switch (ctx->mode) {
case ADD_SRV_MODE_HELP:
if (!kwl) {
/* first invocation */
if (applet_putstr(appctx, "List of keywords supported for dynamic server:\n") < 0)
return cli_err(appctx, "output error");
kwl = LIST_NEXT(&srv_keywords.list, struct srv_kw_list *, list);
ctx->obj1 = kwl;
ctx->obj2 = kwl->kw;
}
while (kwl != &srv_keywords) {
for (kw = ctx->obj2; kw->kw; ++kw) {
if (!kw->dynamic_ok)
continue;
ctx->obj2 = kw;
chunk_reset(&trash);
chunk_printf(&trash, "%s\n", kw->kw);
if (STRESS_RUN1(applet_putchk_stress(appctx, &trash) == -1,
applet_putchk(appctx, &trash) == -1)) {
goto full;
}
}
kwl = LIST_NEXT(&kwl->list, struct srv_kw_list *, list);
ctx->obj1 = kwl;
ctx->obj2 = kwl->kw;
}
break;
case ADD_SRV_MODE_DEF:
/* Add srv parser must return 1 to prevent I/O handler execution in default mode. */
ABORT_NOW();
break;
}
return 1;
full:
return 0;
}
/* Parse a "add server" command.
*
* Returns 1 to skip I/O handler processing, unless a sub-command is executed.
*/
static int cli_parse_add_server(char **args, char *payload, struct appctx *appctx, void *private)
{
struct add_srv_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
struct proxy *be;
struct server *srv;
char *be_name, *sv_name, *errmsg;
int errcode, argc;
int next_id;
const int parse_flags = SRV_PARSE_DYNAMIC|SRV_PARSE_PARSE_ADDR;
usermsgs_clr("CLI");
if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
return 1;
++args;
if (strcmp(args[1], "help") == 0) {
ctx->mode = ADD_SRV_MODE_HELP;
ctx->obj2 = ctx->obj1 = NULL;
return 0;
}
ctx->mode = ADD_SRV_MODE_DEF;
sv_name = be_name = args[1];
/* split backend/server arg */
while (*sv_name && *(++sv_name)) {
if (*sv_name == '/') {
*sv_name = '\0';
++sv_name;
break;
}
}
if (!*sv_name)
return cli_err(appctx, "Require 'backend/server'.\n");
be = proxy_be_by_name(be_name);
if (!be)
return cli_err(appctx, "No such backend.\n");
errmsg = NULL;
if (!be_supports_dynamic_srv(be, &errmsg)) {
cli_dynerr(appctx, memprintf(&errmsg, "Backend does not support dynamic servers : %s.\n", errmsg));
return 1;
}
/* At this point, some operations might not be thread-safe anymore. This
* might be the case for parsing handlers which were designed to run
* only at the starting stage on single-thread mode.
*
* Activate thread isolation to ensure thread-safety.
*/
thread_isolate();
MEDIUM: server: automatically add server to proxy list in new_server() while new_server() takes the parent proxy as argument and even assigns srv->proxy to the parent proxy, it didn't actually inserted the server to the parent proxy server list on success. The result is that sometimes we add the server to the list after new_server() is called, and sometimes we don't. This is really error-prone and because of that hooks such as REGISTER_POST_SERVER_CHECK() which as run for all servers listed in all proxies may not be relied upon for servers which are not actually inserted in their parent proxy server list. Plus it feels very strange to have a server that points to a proxy, but then the proxy doesn't know about it because it cannot find it in its server list. To prevent errors and make proxy->srv list reliable, we move the insertion logic directly under new_server(). This requires to know if we are called during parsing or during runtime to either insert or append the server to the parent proxy list. For that we use PR_FL_CHECKED flag from the parent proxy (if the flag is set, then the proxy was checked so we are past the init phase, thus we assume we are called during runtime) This implies that during startup if new_server() has to be cancelled on error paths we need to call srv_detach() (which is now exposed in server.h) before srv_drop(). The consequence of this commit is that REGISTER_POST_SERVER_CHECK() should not run reliably on all servers created using new_server() (without having to manually loop on global servers_list)
2025-05-09 13:24:55 -04:00
/*
* If a server with the same name is found, reject the new one.
*/
if (server_find(be, sv_name)) {
MEDIUM: server: automatically add server to proxy list in new_server() while new_server() takes the parent proxy as argument and even assigns srv->proxy to the parent proxy, it didn't actually inserted the server to the parent proxy server list on success. The result is that sometimes we add the server to the list after new_server() is called, and sometimes we don't. This is really error-prone and because of that hooks such as REGISTER_POST_SERVER_CHECK() which as run for all servers listed in all proxies may not be relied upon for servers which are not actually inserted in their parent proxy server list. Plus it feels very strange to have a server that points to a proxy, but then the proxy doesn't know about it because it cannot find it in its server list. To prevent errors and make proxy->srv list reliable, we move the insertion logic directly under new_server(). This requires to know if we are called during parsing or during runtime to either insert or append the server to the parent proxy list. For that we use PR_FL_CHECKED flag from the parent proxy (if the flag is set, then the proxy was checked so we are past the init phase, thus we assume we are called during runtime) This implies that during startup if new_server() has to be cancelled on error paths we need to call srv_detach() (which is now exposed in server.h) before srv_drop(). The consequence of this commit is that REGISTER_POST_SERVER_CHECK() should not run reliably on all servers created using new_server() (without having to manually loop on global servers_list)
2025-05-09 13:24:55 -04:00
thread_release();
cli_err(appctx, "Already exists a server with the same name in backend.\n");
return 1;
}
args[1] = sv_name;
errcode = _srv_parse_init(&srv, args, &argc, be, parse_flags);
if (errcode)
goto out;
while (*args[argc]) {
errcode = _srv_parse_kw(srv, args, &argc, be, parse_flags);
if (errcode)
goto out;
}
errcode = _srv_parse_finalize(args, argc, srv, be, parse_flags);
if (errcode)
goto out;
/* A dynamic server does not currently support resolution.
*
* Initialize it explicitly to the "none" method to ensure no
* resolution will ever be executed.
*/
srv->init_addr_methods = SRV_IADDR_NONE;
if (!srv->mux_proto && srv_is_quic(srv)) {
/* Force QUIC as mux-proto on server with quic addresses.
* Incompatibilities with TCP proxy mode will be catch by the
* next code block.
*/
srv->mux_proto = get_mux_proto(ist("quic"));
}
if (srv->mux_proto) {
BUG/MEDIUM: server: "proto" not working for dynamic servers In 304672320e ("MINOR: server: support keyword proto in 'add server' cli") improper use of conn_get_best_mux_entry() function was made: First, server's proxy mode was directly passed as "proto_mode" argument to conn_get_best_mux_entry(), but this is strictly invalid because while there is some relationship between proto modes and proxy modes, they don't use the same storage mechanism and cannot be used interchangeably. Because of this bug, conn_get_best_mux_entry() would not work at all for TCP because PR_MODE_TCP equals 0, where PROTO_MODE_TCP normally equals 1. Then another, less sensitive bug, remains: as its name and description implies, conn_get_best_mux_entry() will try its best to return something to the user, only using keyword (mux_proto) input as an hint to return the most relevant mux within the list of mux that are compatibles with proto_side and proto_mode values. This means that even if mux_proto cannot be found or is not available with current proto_side and proto_mode values, conn_get_best_mux_entry() will most probably fallback to a more generic mux. However in cli_parse_add_server(), we directly check the result of conn_get_best_mux_entry() and consider that it will return NULL if the provided keyword hint for mux_proto cannot be found. This will result in the function not raising errors as expected, because most of the times if the expected proto cannot be found, then we'll silently switch to the fallback one, despite the user providing an explicit proto. To fix that, we store the result of conn_get_best_mux_entry() to compare the returned mux proto name with the one we're expecting to get, as it is originally performed in cfgparse during initial server keyword parsing. This patch depends on - "MINOR: connection: add conn_pr_mode_to_proto_mode() helper func") It must be backported up to 2.6.
2023-10-19 10:15:50 -04:00
int proto_mode = conn_pr_mode_to_proto_mode(be->mode);
const struct mux_proto_list *mux_ent;
mux_ent = conn_get_best_mux_entry(srv->mux_proto->token, PROTO_SIDE_BE, proto_mode);
if (!mux_ent || !isteq(mux_ent->token, srv->mux_proto->token)) {
ha_alert("MUX protocol is not usable for server.\n");
goto out;
}
else {
if ((mux_ent->mux->flags & MX_FL_FRAMED) && !srv_is_quic(srv)) {
ha_alert("MUX protocol is incompatible with stream transport used by server.\n");
goto out;
}
else if (!(mux_ent->mux->flags & MX_FL_FRAMED) && srv_is_quic(srv)) {
ha_alert("MUX protocol is incompatible with framed transport used by server.\n");
goto out;
}
}
}
BUG/MINOR: server/add: ensure minconn/maxconn consistency when adding server When a new server was added through the cli using "server add" command, the maxconn/minconn consistency check historically implemented in check_config_validity() for static servers was missing. As a result, when adding a server with the maxconn parameter without the minconn set, the server was unable to handle any connection because srv_dynamic_maxconn() would always return 0. Consider the following reproducer: | global | stats socket /tmp/ha.sock mode 660 level admin expose-fd listeners | | defaults | timeout client 5s | timeout server 5s | timeout connect 5s | | frontend test | mode http | bind *:8081 | use_backend farm | | listen dummyok | bind localhost:18999 | mode http | http-request return status 200 hdr test "ok" | | backend farm | mode http Start haproxy and perform the following : echo "add server farm/t1 127.0.0.1:18999 maxconn 100" | nc -U /tmp/ha.sock echo "enable server farm/t1" | nc -U /tmp/ha.sock curl localhost:8081 # -> 503 after 5s connect timeout Thanks to ("MINOR: cfgparse/server: move (min/max)conn postparsing logic into dedicated function"), we are now able to perform the consistency check after the new dynamic server has been parsed. This is enough to fix the issue documented here that was reported by Thomas Pedoussaut on the ML. This commit depends on: - ("MINOR: cfgparse/server: move (min/max)conn postparsing logic into dedicated function") It must be backported to 2.6 and 2.7
2023-02-08 05:55:08 -05:00
/* ensure minconn/maxconn consistency */
srv_minmax_conn_apply(srv);
if (srv->use_ssl == 1 || (srv->proxy->options & PR_O_TCPCHK_SSL) ||
srv->check.use_ssl == 1) {
if (xprt_get(XPRT_SSL) && xprt_get(XPRT_SSL)->prepare_srv) {
if (xprt_get(XPRT_SSL)->prepare_srv(srv))
goto out;
}
else if (xprt_get(XPRT_QUIC) && xprt_get(XPRT_QUIC)->prepare_srv) {
if (xprt_get(XPRT_QUIC)->prepare_srv(srv))
goto out;
}
}
/* Define default SNI from host header if needed. */
if (srv->proxy->mode == PR_MODE_HTTP && srv->use_ssl == 1 &&
!srv->sni_expr && !(srv->ssl_ctx.options & SRV_SSL_O_NO_AUTO_SNI)) {
if (srv_configure_auto_sni(srv, &errcode, &errmsg)) {
ha_alert("%s.\n", errmsg);
goto out;
}
}
if (srv->trackit) {
if (srv_apply_track(srv, be))
goto out;
}
/* Init check/agent if configured. The check is manually disabled
* because a dynamic server is started in a disable state. It must be
* manually activated via a "enable health/agent" command.
*/
if (srv->do_check) {
if (init_srv_check(srv))
goto out;
srv->check.state &= ~CHK_ST_ENABLED;
}
if (srv->do_agent) {
if (init_srv_agent_check(srv))
goto out;
srv->agent.state &= ~CHK_ST_ENABLED;
}
errcode = srv_preinit(srv);
if (errcode)
goto out;
if (!srv_alloc_lb(srv, be)) {
ha_alert("Failed to initialize load-balancing data.\n");
goto out;
}
if (!stats_allocate_proxy_counters_internal(&srv->extra_counters,
COUNTERS_SV,
STATS_PX_CAP_SRV,
&srv->per_tgrp->extra_counters_storage,
&srv->per_tgrp[1].extra_counters_storage -
&srv->per_tgrp[0].extra_counters_storage)) {
ha_alert("failed to allocate extra counters for server.\n");
goto out;
}
errcode = srv_postinit(srv);
if (errcode)
goto out;
/* generate the server id if not manually specified */
if (!srv->puid) {
next_id = server_get_next_id(be, 1);
if (!next_id) {
ha_alert("Cannot attach server : no id left in proxy\n");
goto out;
}
srv->puid = next_id;
}
/* insert the server in the backend trees */
server_index_id(be, srv);
cebis_item_insert(&be->conf.used_server_name, conf.name_node, id, srv);
/* addr_key could be NULL if FQDN resolution is postponed (ie: add server from cli) */
if (srv->addr_key)
cebuis_item_insert(&be->used_server_addr, addr_node, addr_key, srv);
/* check if LSB bit (odd bit) is set for reuse_cnt */
if (srv_id_reuse_cnt & 1) {
/* cnt must be increased */
srv_id_reuse_cnt++;
}
/* srv_id_reuse_cnt is always even at this stage, divide by 2 to
* save some space
* (sizeof(srv->rid) is half of sizeof(srv_id_reuse_cnt))
*/
srv->rid = (srv_id_reuse_cnt) ? (srv_id_reuse_cnt / 2) : 0;
/* generate new server's dynamic cookie if enabled on backend */
if (be->ck_opts & PR_CK_DYNAMIC) {
srv_set_dyncookie(srv);
}
/* adding server cannot fail when we reach this:
* publishing EVENT_HDL_SUB_SERVER_ADD
*/
srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_ADD, srv, 1);
thread_release();
/* Start the check task. The server must be fully initialized.
*
* <srvpos> and <nbcheck> parameters are set to 1 as there should be no
* need to randomly spread the task interval for dynamic servers.
*/
if (srv->check.state & CHK_ST_CONFIGURED) {
if (!start_check_task(&srv->check, 0, 1, 1))
ha_alert("System might be unstable, consider to execute a reload\n");
}
if (srv->agent.state & CHK_ST_CONFIGURED) {
if (!start_check_task(&srv->agent, 0, 1, 1))
ha_alert("System might be unstable, consider to execute a reload\n");
}
if (srv->cklen && be->mode != PR_MODE_HTTP)
ha_warning("Ignoring cookie as HTTP mode is disabled.\n");
ha_notice("New server registered.\n");
return cli_umsg(appctx, LOG_INFO);
out:
if (srv) {
if (srv->track)
release_server_track(srv);
if (srv->check.state & CHK_ST_CONFIGURED) {
free_check(&srv->check);
srv_drop(srv);
}
if (srv->agent.state & CHK_ST_CONFIGURED) {
free_check(&srv->agent);
srv_drop(srv);
}
/* remove the server from the proxy linked list */
MEDIUM: server: automatically add server to proxy list in new_server() while new_server() takes the parent proxy as argument and even assigns srv->proxy to the parent proxy, it didn't actually inserted the server to the parent proxy server list on success. The result is that sometimes we add the server to the list after new_server() is called, and sometimes we don't. This is really error-prone and because of that hooks such as REGISTER_POST_SERVER_CHECK() which as run for all servers listed in all proxies may not be relied upon for servers which are not actually inserted in their parent proxy server list. Plus it feels very strange to have a server that points to a proxy, but then the proxy doesn't know about it because it cannot find it in its server list. To prevent errors and make proxy->srv list reliable, we move the insertion logic directly under new_server(). This requires to know if we are called during parsing or during runtime to either insert or append the server to the parent proxy list. For that we use PR_FL_CHECKED flag from the parent proxy (if the flag is set, then the proxy was checked so we are past the init phase, thus we assume we are called during runtime) This implies that during startup if new_server() has to be cancelled on error paths we need to call srv_detach() (which is now exposed in server.h) before srv_drop(). The consequence of this commit is that REGISTER_POST_SERVER_CHECK() should not run reliably on all servers created using new_server() (without having to manually loop on global servers_list)
2025-05-09 13:24:55 -04:00
srv_detach(srv);
}
thread_release();
if (!usermsgs_empty())
cli_umsgerr(appctx);
if (srv)
srv_drop(srv);
return 1;
}
/* Check if the server <bename>/<svname> exists and is ready for being deleted.
* This means that the server is in maintenance with no streams attached to it,
* no queue and no used idle conns. This is not supposed to change during all
* the maintenance phase (except for force-persist etc, which are not covered).
* Both <bename> and <svname> must be valid strings. If pb/ps are not null,
* upon success, the pointer to the backend and server respectively will be put
* there. If pm is not null, a pointer to an error/success message is returned
* there (possibly NULL if nothing to say). Returned values:
* >0 if OK
* 0 if not yet (should wait if it can)
* <0 if not possible
*/
int srv_check_for_deletion(const char *bename, const char *svname, struct proxy **pb, struct server **ps, const char **pm)
{
struct server *srv = NULL;
struct proxy *be = NULL;
const char *msg = NULL;
int ret;
/* First, unrecoverable errors */
ret = -1;
if (!(be = proxy_be_by_name(bename))) {
msg = "No such backend.";
goto leave;
}
if (!(srv = server_find(be, svname))) {
msg = "No such server.";
goto leave;
}
if (srv->flags & SRV_F_NON_PURGEABLE) {
msg = "This server cannot be removed at runtime due to other configuration elements pointing to it.";
goto leave;
}
/* Only servers in maintenance can be deleted. This ensures that the
* server is not present anymore in the lb structures (through
* lbprm.set_server_status_down).
*/
if (!(srv->cur_admin & SRV_ADMF_MAINT)) {
msg = "Only servers in maintenance mode can be deleted.";
goto leave;
}
/* Second, conditions that may change over time */
ret = 0;
MAJOR: server: do not remove idle conns in del server Do not remove anymore idle and purgeable connections directly under the "del server" handler. The main objective of this patch is to reduce the amount of work performed under thread isolation. This should improve "del server" scheduling with other haproxy tasks. Another objective is to be able to properly support dynamic servers with QUIC. Indeed, takeover is not yet implemented for this protocol, hence it is not possible to rely on cleanup of idle connections performed by a single thread under "del server" handler. With this change it is not possible anymore to remove a server if there is still idle connections referencing it. To ensure this cannot be performed, srv_check_for_deletion() has been extended to check server counters for idle and idle private connections. Server deletion should still remain a viable procedure, as first it is mandatory to put the targetted server into maintenance. This step forces the cleanup of its existing idle connections. Thanks to a recent change, all finishing connections are also removed immediately instead of becoming idle. In short, this patch transforms idle connections removal from a synchronous to an asynchronous procedure. However, this should remain a steadfast and quick method achievable in less than a second. This patch is considered major as some users may notice this change when removing a server. In particular with the following CLI commands pipeline: "disable server <X>; shutdown sessions server <X>; del server <X>" Server deletion will now probably fail, as idle connections purge cannot be completed immediately. Thus, it is now highly advise to always use a small delay "wait srv-removable" before "del server" to ensure that idle connections purge is executed prior. Along with this change, documentation for "del server" and related "shutdown sessions server" has been refined, in particular to better highlight under what conditions a server can be removed.
2025-08-01 11:51:16 -04:00
/* Ensure that there is no active/pending/idle connection on the server.
* Note that idle conns scheduled for purging are still accounted in idle counter.
*/
if (_HA_ATOMIC_LOAD(&srv->curr_used_conns) ||
MAJOR: server: do not remove idle conns in del server Do not remove anymore idle and purgeable connections directly under the "del server" handler. The main objective of this patch is to reduce the amount of work performed under thread isolation. This should improve "del server" scheduling with other haproxy tasks. Another objective is to be able to properly support dynamic servers with QUIC. Indeed, takeover is not yet implemented for this protocol, hence it is not possible to rely on cleanup of idle connections performed by a single thread under "del server" handler. With this change it is not possible anymore to remove a server if there is still idle connections referencing it. To ensure this cannot be performed, srv_check_for_deletion() has been extended to check server counters for idle and idle private connections. Server deletion should still remain a viable procedure, as first it is mandatory to put the targetted server into maintenance. This step forces the cleanup of its existing idle connections. Thanks to a recent change, all finishing connections are also removed immediately instead of becoming idle. In short, this patch transforms idle connections removal from a synchronous to an asynchronous procedure. However, this should remain a steadfast and quick method achievable in less than a second. This patch is considered major as some users may notice this change when removing a server. In particular with the following CLI commands pipeline: "disable server <X>; shutdown sessions server <X>; del server <X>" Server deletion will now probably fail, as idle connections purge cannot be completed immediately. Thus, it is now highly advise to always use a small delay "wait srv-removable" before "del server" to ensure that idle connections purge is executed prior. Along with this change, documentation for "del server" and related "shutdown sessions server" has been refined, in particular to better highlight under what conditions a server can be removed.
2025-08-01 11:51:16 -04:00
_HA_ATOMIC_LOAD(&srv->queueslength) || srv_has_streams(srv) ||
_HA_ATOMIC_LOAD(&srv->curr_idle_conns) || _HA_ATOMIC_LOAD(&srv->curr_sess_idle_conns)) {
msg = "Server still has connections attached to it, cannot remove it.";
goto leave;
}
/* OK, let's go */
ret = 1;
leave:
if (pb)
*pb = be;
if (ps)
*ps = srv;
if (pm)
*pm = msg;
return ret;
}
/* Parse a "del server" command
* Returns 0 if the server has been successfully initialized, 1 on failure.
*/
static int cli_parse_delete_server(char **args, char *payload, struct appctx *appctx, void *private)
{
struct proxy *be;
struct server *srv;
struct ist be_name, sv_name;
2024-10-23 05:33:34 -04:00
struct watcher *srv_watch;
const char *msg;
MAJOR: server: do not remove idle conns in del server Do not remove anymore idle and purgeable connections directly under the "del server" handler. The main objective of this patch is to reduce the amount of work performed under thread isolation. This should improve "del server" scheduling with other haproxy tasks. Another objective is to be able to properly support dynamic servers with QUIC. Indeed, takeover is not yet implemented for this protocol, hence it is not possible to rely on cleanup of idle connections performed by a single thread under "del server" handler. With this change it is not possible anymore to remove a server if there is still idle connections referencing it. To ensure this cannot be performed, srv_check_for_deletion() has been extended to check server counters for idle and idle private connections. Server deletion should still remain a viable procedure, as first it is mandatory to put the targetted server into maintenance. This step forces the cleanup of its existing idle connections. Thanks to a recent change, all finishing connections are also removed immediately instead of becoming idle. In short, this patch transforms idle connections removal from a synchronous to an asynchronous procedure. However, this should remain a steadfast and quick method achievable in less than a second. This patch is considered major as some users may notice this change when removing a server. In particular with the following CLI commands pipeline: "disable server <X>; shutdown sessions server <X>; del server <X>" Server deletion will now probably fail, as idle connections purge cannot be completed immediately. Thus, it is now highly advise to always use a small delay "wait srv-removable" before "del server" to ensure that idle connections purge is executed prior. Along with this change, documentation for "del server" and related "shutdown sessions server" has been refined, in particular to better highlight under what conditions a server can be removed.
2025-08-01 11:51:16 -04:00
int ret;
if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
return 1;
++args;
/* The proxy servers list is currently not protected by a lock so this
* requires thread isolation. In addition, any place referencing the
* server about to be deleted would be unsafe after our operation, so
* we must be certain to be alone so that no other thread has even
* started to grab a temporary reference to this server.
*/
thread_isolate_full();
sv_name = ist(args[1]);
be_name = istsplit(&sv_name, '/');
if (!istlen(sv_name)) {
cli_err(appctx, "Require 'backend/server'.\n");
goto out;
}
ret = srv_check_for_deletion(ist0(be_name), ist0(sv_name), &be, &srv, &msg);
if (ret <= 0) {
/* failure (recoverable or not) */
cli_err(appctx, msg);
goto out;
}
/* removing cannot fail anymore when we reach this:
* publishing EVENT_HDL_SUB_SERVER_DEL
*/
srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_DEL, srv, 1);
/* remove srv from tracking list */
if (srv->track)
release_server_track(srv);
/* stop the check task if running */
if (srv->check.state & CHK_ST_CONFIGURED)
check_purge(&srv->check);
if (srv->agent.state & CHK_ST_CONFIGURED)
check_purge(&srv->agent);
if (srv->proxy->lbprm.server_deinit)
srv->proxy->lbprm.server_deinit(srv);
2024-10-23 05:33:34 -04:00
while (!MT_LIST_ISEMPTY(&srv->watcher_list)) {
srv_watch = MT_LIST_NEXT(&srv->watcher_list, struct watcher *, el);
BUG_ON(srv->next && srv->next->flags & SRV_F_DELETED);
watcher_next(srv_watch, srv->next);
}
/* detach the server from the proxy linked list
* The proxy servers list is currently not protected by a lock, so this
* requires thread_isolate/release.
*/
MEDIUM: server: automatically add server to proxy list in new_server() while new_server() takes the parent proxy as argument and even assigns srv->proxy to the parent proxy, it didn't actually inserted the server to the parent proxy server list on success. The result is that sometimes we add the server to the list after new_server() is called, and sometimes we don't. This is really error-prone and because of that hooks such as REGISTER_POST_SERVER_CHECK() which as run for all servers listed in all proxies may not be relied upon for servers which are not actually inserted in their parent proxy server list. Plus it feels very strange to have a server that points to a proxy, but then the proxy doesn't know about it because it cannot find it in its server list. To prevent errors and make proxy->srv list reliable, we move the insertion logic directly under new_server(). This requires to know if we are called during parsing or during runtime to either insert or append the server to the parent proxy list. For that we use PR_FL_CHECKED flag from the parent proxy (if the flag is set, then the proxy was checked so we are past the init phase, thus we assume we are called during runtime) This implies that during startup if new_server() has to be cancelled on error paths we need to call srv_detach() (which is now exposed in server.h) before srv_drop(). The consequence of this commit is that REGISTER_POST_SERVER_CHECK() should not run reliably on all servers created using new_server() (without having to manually loop on global servers_list)
2025-05-09 13:24:55 -04:00
srv_detach(srv);
/* Mark the server as being deleted (ie removed from its proxy list)
* but not yet purged from memory. Any module still referencing this
* server must manipulate it with precaution and are expected to
* release its refcount as soon as possible.
*/
srv->flags |= SRV_F_DELETED;
/* Inc proxy refcount until the server is finally freed. */
proxy_take(srv->proxy);
/* remove srv from addr_node tree */
ceb32_item_delete(&be->conf.used_server_id, conf.puid_node, puid, srv);
cebis_item_delete(&be->conf.used_server_name, conf.name_node, id, srv);
cebuis_item_delete(&be->used_server_addr, addr_node, addr_key, srv);
/* remove srv from idle_node tree for idle conn cleanup */
eb32_delete(&srv->idle_node);
/* set LSB bit (odd bit) for reuse_cnt */
srv_id_reuse_cnt |= 1;
thread_release();
ha_notice("Server deleted.\n");
srv_drop(srv);
cli_msg(appctx, LOG_INFO, "Server deleted.\n");
return 0;
out:
thread_release();
return 1;
}
/* register cli keywords */
static struct cli_kw_list cli_kws = {{ },{
{ { "disable", "agent", NULL }, "disable agent : disable agent checks", cli_parse_disable_agent, NULL },
{ { "disable", "health", NULL }, "disable health : disable health checks", cli_parse_disable_health, NULL },
{ { "disable", "server", NULL }, "disable server (DEPRECATED) : disable a server for maintenance (use 'set server' instead)", cli_parse_disable_server, NULL },
{ { "enable", "agent", NULL }, "enable agent : enable agent checks", cli_parse_enable_agent, NULL },
{ { "enable", "health", NULL }, "enable health : enable health checks", cli_parse_enable_health, NULL },
{ { "enable", "server", NULL }, "enable server (DEPRECATED) : enable a disabled server (use 'set server' instead)", cli_parse_enable_server, NULL },
{ { "set", "maxconn", "server", NULL }, "set maxconn server <bk>/<srv> : change a server's maxconn setting", cli_parse_set_maxconn_server, NULL },
{ { "set", "server", NULL }, "set server <bk>/<srv> [opts] : change a server's state, weight, address or ssl", cli_parse_set_server },
{ { "get", "weight", NULL }, "get weight <bk>/<srv> : report a server's current weight", cli_parse_get_weight },
{ { "set", "weight", NULL }, "set weight <bk>/<srv> (DEPRECATED) : change a server's weight (use 'set server' instead)", cli_parse_set_weight },
{ { "add", "server", NULL }, "add server <bk>/<srv> : create a new server", cli_parse_add_server, cli_io_handler_add_server },
{ { "del", "server", NULL }, "del server <bk>/<srv> : remove a dynamically added server", cli_parse_delete_server, NULL },
{{},}
}};
INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
/* Prepare a server <srv> to track check status of another one. <srv>.<trackit>
* field is used to retrieve the identifier of the tracked server, either with
* the format "proxy/server" or just "server". <curproxy> must point to the
* backend owning <srv>; if no proxy is specified in <trackit>, it will be used
* to find the tracked server.
*
* Returns 0 if the server track has been activated else non-zero.
*
* Not thread-safe.
*/
int srv_apply_track(struct server *srv, struct proxy *curproxy)
{
struct proxy *px;
struct server *strack, *loop;
char *pname, *sname;
if (!srv->trackit)
return 1;
pname = srv->trackit;
sname = strrchr(pname, '/');
if (sname) {
*sname++ = '\0';
}
else {
sname = pname;
pname = NULL;
}
if (pname) {
px = proxy_be_by_name(pname);
if (!px) {
ha_alert("unable to find required proxy '%s' for tracking.\n",
pname);
return 1;
}
}
else {
px = curproxy;
}
strack = server_find_by_name(px, sname);
if (!strack) {
ha_alert("unable to find required server '%s' for tracking.\n",
sname);
return 1;
}
if (strack->flags & SRV_F_DYNAMIC) {
ha_alert("unable to use %s/%s for tracking as it is a dynamic server.\n",
px->id, strack->id);
return 1;
}
if (!strack->do_check && !strack->do_agent && !strack->track &&
!strack->trackit) {
ha_alert("unable to use %s/%s for "
"tracking as it does not have any check nor agent enabled.\n",
px->id, strack->id);
return 1;
}
for (loop = strack->track; loop && loop != srv; loop = loop->track)
;
if (srv == strack || loop) {
ha_alert("unable to track %s/%s as it "
"belongs to a tracking chain looping back to %s/%s.\n",
px->id, strack->id, px->id,
srv == strack ? strack->id : loop->id);
return 1;
}
if (curproxy != px &&
(curproxy->options & PR_O_DISABLE404) != (px->options & PR_O_DISABLE404)) {
ha_alert("unable to use %s/%s for"
"tracking: disable-on-404 option inconsistency.\n",
px->id, strack->id);
return 1;
}
srv->track = strack;
srv->tracknext = strack->trackers;
strack->trackers = srv;
strack->flags |= SRV_F_NON_PURGEABLE;
ha_free(&srv->trackit);
return 0;
}
/* This function propagates srv state change to lb algorithms */
static void srv_lb_propagate(struct server *s)
{
struct proxy *px = s->proxy;
if (px->lbprm.update_server_eweight)
px->lbprm.update_server_eweight(s);
else if (srv_willbe_usable(s)) {
if (px->lbprm.set_server_status_up)
px->lbprm.set_server_status_up(s);
}
else {
if (px->lbprm.set_server_status_down)
px->lbprm.set_server_status_down(s);
}
}
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
/* directly update server state based on an operational change
* (compare current and next state to know which transition to apply)
*
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
* The function returns the number of requeued sessions (either taken by
* the server or redispatched to others servers) due to the server state
* change.
*/
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
static int _srv_update_status_op(struct server *s, enum srv_op_st_chg_cause cause)
{
struct buffer *tmptrash = NULL;
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
int log_level;
int srv_was_stopping = (s->cur_state == SRV_ST_STOPPING) || (s->cur_admin & SRV_ADMF_DRAIN);
int xferred = 0;
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
if ((s->cur_state != SRV_ST_STOPPED) && (s->next_state == SRV_ST_STOPPED)) {
srv_lb_propagate(s);
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
if (s->onmarkeddown & HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS)
srv_shutdown_streams(s, SF_ERR_DOWN);
srv_reset_path_parameters(s);
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
/* we might have streams queued on this server and waiting for
* a connection. Those which are redispatchable will be queued
* to another server or to the proxy itself.
*/
xferred = pendconn_redistribute(s);
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
tmptrash = alloc_trash_chunk();
if (tmptrash) {
chunk_printf(tmptrash,
"%sServer %s/%s is DOWN", s->flags & SRV_F_BACKUP ? "Backup " : "",
s->proxy->id, s->id);
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
srv_append_op_chg_cause(tmptrash, s, cause);
srv_append_more(tmptrash, s, xferred, 0);
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
ha_warning("%s.\n", tmptrash->area);
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
/* we don't send an alert if the server was previously paused */
log_level = srv_was_stopping ? LOG_NOTICE : LOG_ALERT;
send_log(s->proxy, log_level, "%s.\n",
tmptrash->area);
free_trash_chunk(tmptrash);
}
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
}
else if ((s->cur_state != SRV_ST_STOPPING) && (s->next_state == SRV_ST_STOPPING)) {
srv_lb_propagate(s);
srv_reset_path_parameters(s);
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
/* we might have streams queued on this server and waiting for
* a connection. Those which are redispatchable will be queued
* to another server or to the proxy itself.
*/
xferred = pendconn_redistribute(s);
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
tmptrash = alloc_trash_chunk();
if (tmptrash) {
chunk_printf(tmptrash,
"%sServer %s/%s is stopping", s->flags & SRV_F_BACKUP ? "Backup " : "",
s->proxy->id, s->id);
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
srv_append_op_chg_cause(tmptrash, s, cause);
srv_append_more(tmptrash, s, xferred, 0);
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
ha_warning("%s.\n", tmptrash->area);
send_log(s->proxy, LOG_NOTICE, "%s.\n",
tmptrash->area);
free_trash_chunk(tmptrash);
}
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
}
else if (((s->cur_state != SRV_ST_RUNNING) && (s->next_state == SRV_ST_RUNNING))
|| ((s->cur_state != SRV_ST_STARTING) && (s->next_state == SRV_ST_STARTING))) {
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
if (s->next_state == SRV_ST_STARTING && s->warmup)
task_schedule(s->warmup, tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20))));
srv_reset_path_parameters(s);
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
server_recalc_eweight(s, 0);
/* now propagate the status change to any LB algorithms */
srv_lb_propagate(s);
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
/* If the server is set with "on-marked-up shutdown-backup-sessions",
* and it's not a backup server and its effective weight is > 0,
* then it can accept new connections, so we shut down all streams
* on all backup servers.
*/
if ((s->onmarkedup & HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS) &&
!(s->flags & SRV_F_BACKUP) && s->next_eweight)
srv_shutdown_backup_streams(s->proxy, SF_ERR_UP);
BUG/MEDIUM: queues: Do not use pendconn_grab_from_px(). pendconn_grab_from_px() was called when a server was brought back up, to get some streams waiting in the proxy's queue and get them to run on the newly available server. It is very similar to process_srv_queue(), except it only goes through the proxy's queue, which can be a problem, because there is a small race condition that could lead us to add more streams to the server queue just as it's going down. If that happens, the server would just be ignored when back up by new streams, as its queue is not empty, and it would never try to process its queue. The other problem with pendconn_grab_from_px() is that it is very liberal with how it dequeues streams, and it is not very good at enforcing maxconn, it could lead to having 3*maxconn connections. For both those reasons, just get rid of pendconn_grab_from_px(), and just use process_srv_queue(). Both problems are easy to reproduce, especially on a 64 threads machine, set a maxconn to 100, inject in H2 with 1000 concurrent connections containing up to 100 streams each, and after a few seconds/minutes the max number of concurrent output streams will be much higher than maxconn, and eventually the server will stop processing connections. It may be related to github issue #2744. Note that it doesn't totally fix the problem, we can occasionally see a few more connections than maxconn, but the max that have been observed is 4 more connections, we no longer get multiple times maxconn. have more outgoing connections than maxconn, This should be backported up to 2.6.
2024-12-17 09:39:21 -05:00
/* check if we can handle some connections queued.
* We will take as many as we can handle.
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
*/
xferred = process_srv_queue(s);
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
tmptrash = alloc_trash_chunk();
if (tmptrash) {
chunk_printf(tmptrash,
"%sServer %s/%s is UP", s->flags & SRV_F_BACKUP ? "Backup " : "",
s->proxy->id, s->id);
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
srv_append_op_chg_cause(tmptrash, s, cause);
srv_append_more(tmptrash, s, xferred, 0);
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
ha_warning("%s.\n", tmptrash->area);
send_log(s->proxy, LOG_NOTICE, "%s.\n",
tmptrash->area);
free_trash_chunk(tmptrash);
}
}
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
else if (s->cur_eweight != s->next_eweight) {
/* now propagate the status change to any LB algorithms */
srv_lb_propagate(s);
}
return xferred;
}
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
/* deduct and update server state from an administrative change
* (use current and next admin to deduct the administrative transition that
* may result in server state update)
*
* The function returns the number of requeued sessions (either taken by
* the server or redispatched to others servers) due to the server state
* change.
*/
static int _srv_update_status_adm(struct server *s, enum srv_adm_st_chg_cause cause)
{
struct buffer *tmptrash = NULL;
int srv_was_stopping = (s->cur_state == SRV_ST_STOPPING) || (s->cur_admin & SRV_ADMF_DRAIN);
int xferred = 0;
/* Maintenance must also disable health checks */
if (!(s->cur_admin & SRV_ADMF_MAINT) && (s->next_admin & SRV_ADMF_MAINT)) {
if (s->check.state & CHK_ST_ENABLED) {
s->check.state |= CHK_ST_PAUSED;
s->check.health = 0;
}
if (s->cur_state == SRV_ST_STOPPED) { /* server was already down */
tmptrash = alloc_trash_chunk();
if (tmptrash) {
chunk_printf(tmptrash,
"%sServer %s/%s was DOWN and now enters maintenance",
s->flags & SRV_F_BACKUP ? "Backup " : "", s->proxy->id, s->id);
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
srv_append_adm_chg_cause(tmptrash, s, cause);
srv_append_more(tmptrash, s, -1, (s->next_admin & SRV_ADMF_FMAINT));
if (!(global.mode & MODE_STARTING)) {
ha_warning("%s.\n", tmptrash->area);
send_log(s->proxy, LOG_NOTICE, "%s.\n",
tmptrash->area);
}
free_trash_chunk(tmptrash);
}
/* force connection cleanup on the given server */
srv_cleanup_connections(s);
}
else { /* server was still running */
s->check.health = 0; /* failure */
s->next_state = SRV_ST_STOPPED;
srv_lb_propagate(s);
srv_reset_path_parameters(s);
if (s->onmarkeddown & HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS)
srv_shutdown_streams(s, SF_ERR_DOWN);
BUG/MEDIUM: connections: force connections cleanup on server changes I've been trying to understand a change of behaviour between v2.2dev5 and v2.2dev6. Indeed our probe is regularly testing to add and remove servers on a given backend such as: # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 263 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 0.0.0.0 0 1 256 256 0 15 3 0 14 0 0 0 - 0 - -> curl on the corresponding frontend: reply from server:31255 # echo "set server be_foo/srv1 addr 10.236.139.34 port 31257" | sudo socat stdio /var/lib/haproxy/stats IP changed from '0.0.0.0' to '10.236.139.34', port changed from '0' to '31257' by 'stats socket command' # echo "set server be_foo/srv1 weight 256" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 check-port 8500" | sudo socat stdio /var/lib/haproxy/stats health check port updated. # echo "set server be_foo/srv1 state ready" | sudo socat stdio /var/lib/haproxy/stats # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 105 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 10.236.139.34 2 0 256 256 2319 15 3 2 6 0 0 0 - 31257 - -> curl on the corresponding frontend: reply for server:31257 (notice the difference of weight) # echo "set server be_foo/srv1 state maint" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 addr 0.0.0.0 port 0" | sudo socat stdio /var/lib/haproxy/stats IP changed from '10.236.139.34' to '0.0.0.0', port changed from '31257' to '0' by 'stats socket command' # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 263 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 0.0.0.0 0 1 256 256 0 15 3 0 14 0 0 0 - 0 - -> curl on the corresponding frontend: reply from server:31255 # echo "set server be_foo/srv1 addr 10.236.139.34 port 31256" | sudo socat stdio /var/lib/haproxy/stats IP changed from '0.0.0.0' to '10.236.139.34', port changed from '0' to '31256' by 'stats socket command' # echo "set server be_foo/srv1 weight 256" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 check-port 8500" | sudo socat stdio /var/lib/haproxy/stats health check port updated. # echo "set server be_foo/srv1 state ready" | sudo socat stdio /var/lib/haproxy/stats # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 105 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 10.236.139.34 2 0 256 256 2319 15 3 2 6 0 0 0 - 31256 - -> curl on the corresponding frontend: reply from server:31257 (!) Here we indeed would expect to get an anver from server:31256. The issue is highly linked to the usage of `pool-purge-delay`, with a value which is higher than the duration of the test, 10s in our case. a git bisect between dev5 and dev6 seems to show commit 079cb9af22da6 ("MEDIUM: connections: Revamp the way idle connections are killed") being the origin of this new behaviour. So if I understand the later correctly, it seems that it was more a matter of chance that we did not saw the issue earlier. My patch proposes to force clean idle connections in the two following cases: - we set a (still running) server to maintenance - we change the ip/port of a server This commit should be backported to 2.1, 2.0, and 1.9. Signed-off-by: William Dauchy <w.dauchy@criteo.com>
2020-05-02 15:52:36 -04:00
/* force connection cleanup on the given server */
srv_cleanup_connections(s);
/* we might have streams queued on this server and waiting for
* a connection. Those which are redispatchable will be queued
* to another server or to the proxy itself.
*/
xferred = pendconn_redistribute(s);
tmptrash = alloc_trash_chunk();
if (tmptrash) {
chunk_printf(tmptrash,
"%sServer %s/%s is going DOWN for maintenance",
s->flags & SRV_F_BACKUP ? "Backup " : "",
s->proxy->id, s->id);
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
srv_append_adm_chg_cause(tmptrash, s, cause);
srv_append_more(tmptrash, s, xferred, (s->next_admin & SRV_ADMF_FMAINT));
if (!(global.mode & MODE_STARTING)) {
ha_warning("%s.\n", tmptrash->area);
send_log(s->proxy, srv_was_stopping ? LOG_NOTICE : LOG_ALERT, "%s.\n",
tmptrash->area);
}
free_trash_chunk(tmptrash);
}
}
}
else if ((s->cur_admin & SRV_ADMF_MAINT) && !(s->next_admin & SRV_ADMF_MAINT)) {
/* OK here we're leaving maintenance, we have many things to check,
* because the server might possibly be coming back up depending on
* its state. In practice, leaving maintenance means that we should
* immediately turn to UP (more or less the slowstart) under the
* following conditions :
* - server is neither checked nor tracked
* - server tracks another server which is not checked
* - server tracks another server which is already up
* Which sums up as something simpler :
* "either the tracking server is up or the server's checks are disabled
* or up". Otherwise we only re-enable health checks. There's a special
* case associated to the stopping state which can be inherited. Note
* that the server might still be in drain mode, which is naturally dealt
* with by the lower level functions.
*/
if (s->check.state & CHK_ST_ENABLED) {
s->check.state &= ~CHK_ST_PAUSED;
if(s->init_state == SRV_INIT_STATE_FULLY_UP) {
s->check.health = s->check.rise + s->check.fall - 1; /* initially UP, when all checks fail to bring server DOWN */
}
else if(s->init_state == SRV_INIT_STATE_DOWN) {
s->check.health = s->check.rise - 1; /* initially DOWN, when one check is successful bring server UP */
}
else if(s->init_state == SRV_INIT_STATE_FULLY_DOWN) {
s->check.health = 0; /* initially DOWN, when all checks are successful bring server UP */
} else {
s->check.health = s->check.rise; /* initially UP, when one check fails check brings server DOWN */
}
}
if ((!s->track || s->track->next_state != SRV_ST_STOPPED) &&
(!(s->agent.state & CHK_ST_ENABLED) || (s->agent.health >= s->agent.rise)) &&
(!(s->check.state & CHK_ST_ENABLED) || (s->check.health >= s->check.rise))) {
if (s->track && s->track->next_state == SRV_ST_STOPPING) {
s->next_state = SRV_ST_STOPPING;
}
else {
s->next_state = SRV_ST_STARTING;
if (s->slowstart > 0) {
if (s->warmup)
task_schedule(s->warmup, tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20))));
}
else
s->next_state = SRV_ST_RUNNING;
}
}
tmptrash = alloc_trash_chunk();
if (tmptrash) {
if (!(s->next_admin & SRV_ADMF_FMAINT) && (s->cur_admin & SRV_ADMF_FMAINT)) {
chunk_printf(tmptrash,
"%sServer %s/%s is %s/%s (leaving forced maintenance)",
s->flags & SRV_F_BACKUP ? "Backup " : "",
s->proxy->id, s->id,
(s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP",
(s->next_admin & SRV_ADMF_DRAIN) ? "DRAIN" : "READY");
}
if (!(s->next_admin & SRV_ADMF_RMAINT) && (s->cur_admin & SRV_ADMF_RMAINT)) {
chunk_printf(tmptrash,
"%sServer %s/%s ('%s') is %s/%s (resolves again)",
s->flags & SRV_F_BACKUP ? "Backup " : "",
s->proxy->id, s->id, s->hostname,
(s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP",
(s->next_admin & SRV_ADMF_DRAIN) ? "DRAIN" : "READY");
}
if (!(s->next_admin & SRV_ADMF_IMAINT) && (s->cur_admin & SRV_ADMF_IMAINT)) {
chunk_printf(tmptrash,
"%sServer %s/%s is %s/%s (leaving maintenance)",
s->flags & SRV_F_BACKUP ? "Backup " : "",
s->proxy->id, s->id,
(s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP",
(s->next_admin & SRV_ADMF_DRAIN) ? "DRAIN" : "READY");
}
ha_warning("%s.\n", tmptrash->area);
send_log(s->proxy, LOG_NOTICE, "%s.\n",
tmptrash->area);
free_trash_chunk(tmptrash);
}
server_recalc_eweight(s, 0);
/* now propagate the status change to any LB algorithms */
srv_lb_propagate(s);
/* If the server is set with "on-marked-up shutdown-backup-sessions",
* and it's not a backup server and its effective weight is > 0,
* then it can accept new connections, so we shut down all streams
* on all backup servers.
*/
if ((s->onmarkedup & HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS) &&
!(s->flags & SRV_F_BACKUP) && s->next_eweight)
srv_shutdown_backup_streams(s->proxy, SF_ERR_UP);
BUG/MEDIUM: queues: Do not use pendconn_grab_from_px(). pendconn_grab_from_px() was called when a server was brought back up, to get some streams waiting in the proxy's queue and get them to run on the newly available server. It is very similar to process_srv_queue(), except it only goes through the proxy's queue, which can be a problem, because there is a small race condition that could lead us to add more streams to the server queue just as it's going down. If that happens, the server would just be ignored when back up by new streams, as its queue is not empty, and it would never try to process its queue. The other problem with pendconn_grab_from_px() is that it is very liberal with how it dequeues streams, and it is not very good at enforcing maxconn, it could lead to having 3*maxconn connections. For both those reasons, just get rid of pendconn_grab_from_px(), and just use process_srv_queue(). Both problems are easy to reproduce, especially on a 64 threads machine, set a maxconn to 100, inject in H2 with 1000 concurrent connections containing up to 100 streams each, and after a few seconds/minutes the max number of concurrent output streams will be much higher than maxconn, and eventually the server will stop processing connections. It may be related to github issue #2744. Note that it doesn't totally fix the problem, we can occasionally see a few more connections than maxconn, but the max that have been observed is 4 more connections, we no longer get multiple times maxconn. have more outgoing connections than maxconn, This should be backported up to 2.6.
2024-12-17 09:39:21 -05:00
/* check if we can handle some connections queued.
* We will take as many as we can handle.
*/
xferred = process_srv_queue(s);
}
else if (s->next_admin & SRV_ADMF_MAINT) {
/* remaining in maintenance mode, let's inform precisely about the
* situation.
*/
if (!(s->next_admin & SRV_ADMF_FMAINT) && (s->cur_admin & SRV_ADMF_FMAINT)) {
tmptrash = alloc_trash_chunk();
if (tmptrash) {
chunk_printf(tmptrash,
"%sServer %s/%s is leaving forced maintenance but remains in maintenance",
s->flags & SRV_F_BACKUP ? "Backup " : "",
s->proxy->id, s->id);
if (s->track) /* normally it's mandatory here */
chunk_appendf(tmptrash, " via %s/%s",
s->track->proxy->id, s->track->id);
ha_warning("%s.\n", tmptrash->area);
send_log(s->proxy, LOG_NOTICE, "%s.\n",
tmptrash->area);
free_trash_chunk(tmptrash);
}
}
if (!(s->next_admin & SRV_ADMF_RMAINT) && (s->cur_admin & SRV_ADMF_RMAINT)) {
tmptrash = alloc_trash_chunk();
if (tmptrash) {
chunk_printf(tmptrash,
"%sServer %s/%s ('%s') resolves again but remains in maintenance",
s->flags & SRV_F_BACKUP ? "Backup " : "",
s->proxy->id, s->id, s->hostname);
if (s->track) /* normally it's mandatory here */
chunk_appendf(tmptrash, " via %s/%s",
s->track->proxy->id, s->track->id);
ha_warning("%s.\n", tmptrash->area);
send_log(s->proxy, LOG_NOTICE, "%s.\n",
tmptrash->area);
free_trash_chunk(tmptrash);
}
}
else if (!(s->next_admin & SRV_ADMF_IMAINT) && (s->cur_admin & SRV_ADMF_IMAINT)) {
tmptrash = alloc_trash_chunk();
if (tmptrash) {
chunk_printf(tmptrash,
"%sServer %s/%s remains in forced maintenance",
s->flags & SRV_F_BACKUP ? "Backup " : "",
s->proxy->id, s->id);
ha_warning("%s.\n", tmptrash->area);
send_log(s->proxy, LOG_NOTICE, "%s.\n",
tmptrash->area);
free_trash_chunk(tmptrash);
}
}
/* don't report anything when leaving drain mode and remaining in maintenance */
}
if (!(s->next_admin & SRV_ADMF_MAINT)) {
if (!(s->cur_admin & SRV_ADMF_DRAIN) && (s->next_admin & SRV_ADMF_DRAIN)) {
/* drain state is applied only if not yet in maint */
srv_lb_propagate(s);
/* we might have streams queued on this server and waiting for
* a connection. Those which are redispatchable will be queued
* to another server or to the proxy itself.
*/
xferred = pendconn_redistribute(s);
tmptrash = alloc_trash_chunk();
if (tmptrash) {
chunk_printf(tmptrash, "%sServer %s/%s enters drain state",
s->flags & SRV_F_BACKUP ? "Backup " : "", s->proxy->id, s->id);
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
srv_append_adm_chg_cause(tmptrash, s, cause);
srv_append_more(tmptrash, s, xferred, (s->next_admin & SRV_ADMF_FDRAIN));
if (!(global.mode & MODE_STARTING)) {
ha_warning("%s.\n", tmptrash->area);
send_log(s->proxy, LOG_NOTICE, "%s.\n",
tmptrash->area);
}
free_trash_chunk(tmptrash);
}
}
else if ((s->cur_admin & SRV_ADMF_DRAIN) && !(s->next_admin & SRV_ADMF_DRAIN)) {
/* OK completely leaving drain mode */
server_recalc_eweight(s, 0);
tmptrash = alloc_trash_chunk();
if (tmptrash) {
if (s->cur_admin & SRV_ADMF_FDRAIN) {
chunk_printf(tmptrash,
"%sServer %s/%s is %s (leaving forced drain)",
s->flags & SRV_F_BACKUP ? "Backup " : "",
s->proxy->id, s->id,
(s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP");
}
else {
chunk_printf(tmptrash,
"%sServer %s/%s is %s (leaving drain)",
s->flags & SRV_F_BACKUP ? "Backup " : "",
s->proxy->id, s->id,
(s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP");
if (s->track) /* normally it's mandatory here */
chunk_appendf(tmptrash, " via %s/%s",
s->track->proxy->id, s->track->id);
}
ha_warning("%s.\n", tmptrash->area);
send_log(s->proxy, LOG_NOTICE, "%s.\n",
tmptrash->area);
free_trash_chunk(tmptrash);
}
/* now propagate the status change to any LB algorithms */
srv_lb_propagate(s);
}
else if ((s->next_admin & SRV_ADMF_DRAIN)) {
/* remaining in drain mode after removing one of its flags */
tmptrash = alloc_trash_chunk();
if (tmptrash) {
if (!(s->next_admin & SRV_ADMF_FDRAIN)) {
chunk_printf(tmptrash,
"%sServer %s/%s remains in drain mode",
s->flags & SRV_F_BACKUP ? "Backup " : "",
s->proxy->id, s->id);
if (s->track) /* normally it's mandatory here */
chunk_appendf(tmptrash, " via %s/%s",
s->track->proxy->id, s->track->id);
}
else {
chunk_printf(tmptrash,
"%sServer %s/%s remains in forced drain mode",
s->flags & SRV_F_BACKUP ? "Backup " : "",
s->proxy->id, s->id);
}
ha_warning("%s.\n", tmptrash->area);
send_log(s->proxy, LOG_NOTICE, "%s.\n",
tmptrash->area);
free_trash_chunk(tmptrash);
}
}
}
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
return xferred;
}
/*
* This function applies server's status changes.
*
* Must be called with the server lock held. This may also be called at init
* time as the result of parsing the state file, in which case no lock will be
* held, and the server's warmup task can be null.
* <type> should be 0 for operational and 1 for administrative
* <cause> must be srv_op_st_chg_cause enum for operational and
* srv_adm_st_chg_cause enum for administrative
*/
static void srv_update_status(struct server *s, int type, int cause)
{
int prev_srv_count = s->proxy->srv_bck + s->proxy->srv_act;
enum srv_state srv_prev_state = s->cur_state;
union {
struct event_hdl_cb_data_server_state state;
struct event_hdl_cb_data_server_admin admin;
struct event_hdl_cb_data_server common;
} cb_data;
int requeued;
/* prepare common server event data */
_srv_event_hdl_prepare(&cb_data.common, s, 0);
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
if (type) {
cb_data.admin.safe.cause = cause;
cb_data.admin.safe.old_admin = s->cur_admin;
cb_data.admin.safe.new_admin = s->next_admin;
requeued = _srv_update_status_adm(s, cause);
cb_data.admin.safe.requeued = requeued;
/* publish admin change */
_srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_ADMIN, cb_data.admin, s);
}
MEDIUM: server: split srv_update_status() in two functions Considering that srv_update_status() is now synchronous again since 3ff577e1 ("MAJOR: server: make server state changes synchronous again"), and that we can easily identify if the update is from an operational or administrative context thanks to "MINOR: server: pass adm and op cause to srv_update_status()". And given that administrative and operational updates cannot be cumulated (since srv_update_status() is called synchronously and independently for admin updates and state/operational updates, and the function directly consumes the changes). We split srv_update_status() in 2 distinct parts: Either <type> is 0, meaning the update is an operational update which is handled by directly looking at cur_state and next_state to apply the proper transition. Also, the check to prevent operational state from being applied if MAINT admin flag is set is no longer needed given that the calling functions already ensure this (ie: srv_set_{running,stopping,stopped) Or <type> is 1, meaning the update is an administrative update, where cur_admin and next_admin are evaluated to apply the proper transition and deduct the resulting server state (next_state is updated implicitly). Once this is done, both operations share a common code path in srv_update_status() to update proxy and servers stats if required. Thanks to this change, the function's behavior is much more predictable, it is not an all-in-one function anymore. Either we apply an operational change, else it is an administrative change. That's it, we cannot mix the 2 since both code paths are now properly separated.
2023-04-19 10:19:58 -04:00
else
requeued = _srv_update_status_op(s, cause);
/* explicitly commit state changes (even if it was already applied implicitly
* by some lb state change function), so we don't miss anything
*/
srv_lb_commit_status(s);
/* check if server stats must be updated due the the server state change */
if (srv_prev_state != s->cur_state) {
if (srv_prev_state == SRV_ST_STOPPED) {
/* server was down and no longer is */
if (s->last_change < ns_to_sec(now_ns)) // ignore negative times
s->down_time += ns_to_sec(now_ns) - s->last_change;
_srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_UP, cb_data.common, s);
}
else if (s->cur_state == SRV_ST_STOPPED) {
/* server was up and is currently down */
if (s->counters.shared.tg)
HA_ATOMIC_INC(&s->counters.shared.tg[tgid - 1]->down_trans);
_srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_DOWN, cb_data.common, s);
}
/*
* If the server is no longer running, let's not pretend
* it can handle requests.
*/
if (s->cur_state != SRV_ST_RUNNING) {
struct server *srv = s;
HA_ATOMIC_CAS(&s->proxy->ready_srv, &srv, NULL);
}
s->last_change = ns_to_sec(now_ns);
if (s->counters.shared.tg)
HA_ATOMIC_STORE(&s->counters.shared.tg[tgid - 1]->last_state_change, s->last_change);
/* publish the state change */
_srv_event_hdl_prepare_state(&cb_data.state,
s, type, cause, srv_prev_state, requeued);
_srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_STATE, cb_data.state, s);
}
/* check if backend stats must be updated due to the server state change */
if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
set_backend_down(s->proxy); /* backend going down */
else if (!prev_srv_count && (s->proxy->srv_bck || s->proxy->srv_act)) {
unsigned long last_change = s->proxy->last_change;
/* backend was down and is back up again:
* no helper function, updating last_change and backend downtime stats
*/
if (last_change < ns_to_sec(now_ns)) // ignore negative times
s->proxy->down_time += ns_to_sec(now_ns) - last_change;
s->proxy->last_change = ns_to_sec(now_ns);
if (s->proxy->be_counters.shared.tg)
HA_ATOMIC_STORE(&s->proxy->be_counters.shared.tg[tgid - 1]->last_state_change, s->proxy->last_change);
}
}
struct task *srv_cleanup_toremove_conns(struct task *task, void *context, unsigned int state)
{
struct connection *conn;
while ((conn = MT_LIST_POP(&idle_conns[tid].toremove_conns,
struct connection *, toremove_list)) != NULL) {
conn->mux->destroy(conn->ctx);
}
return task;
}
/* Move <toremove_nb> count connections from server <srv> list storage
* ->idle_conn_list to the idle_conns list 'toremove_conns' for thread <thr>.
* -1 means moving all of them.
*
* Returns the number of connections moved.
*
* Must be called with idle_conns_lock held.
*/
static int srv_migrate_conns_to_remove(struct server *srv, int thr, int toremove_nb)
{
struct connection *conn;
int i = 0;
while (!LIST_ISEMPTY(&srv->per_thr[thr].idle_conn_list)) {
if (toremove_nb != -1 && i >= toremove_nb)
break;
conn = LIST_ELEM(srv->per_thr[thr].idle_conn_list.n, struct connection *, idle_list);
conn_delete_from_tree(conn, thr);
MT_LIST_APPEND(&idle_conns[thr].toremove_conns, &conn->toremove_list);
i++;
}
return i;
}
BUG/MEDIUM: connections: force connections cleanup on server changes I've been trying to understand a change of behaviour between v2.2dev5 and v2.2dev6. Indeed our probe is regularly testing to add and remove servers on a given backend such as: # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 263 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 0.0.0.0 0 1 256 256 0 15 3 0 14 0 0 0 - 0 - -> curl on the corresponding frontend: reply from server:31255 # echo "set server be_foo/srv1 addr 10.236.139.34 port 31257" | sudo socat stdio /var/lib/haproxy/stats IP changed from '0.0.0.0' to '10.236.139.34', port changed from '0' to '31257' by 'stats socket command' # echo "set server be_foo/srv1 weight 256" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 check-port 8500" | sudo socat stdio /var/lib/haproxy/stats health check port updated. # echo "set server be_foo/srv1 state ready" | sudo socat stdio /var/lib/haproxy/stats # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 105 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 10.236.139.34 2 0 256 256 2319 15 3 2 6 0 0 0 - 31257 - -> curl on the corresponding frontend: reply for server:31257 (notice the difference of weight) # echo "set server be_foo/srv1 state maint" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 addr 0.0.0.0 port 0" | sudo socat stdio /var/lib/haproxy/stats IP changed from '10.236.139.34' to '0.0.0.0', port changed from '31257' to '0' by 'stats socket command' # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 263 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 0.0.0.0 0 1 256 256 0 15 3 0 14 0 0 0 - 0 - -> curl on the corresponding frontend: reply from server:31255 # echo "set server be_foo/srv1 addr 10.236.139.34 port 31256" | sudo socat stdio /var/lib/haproxy/stats IP changed from '0.0.0.0' to '10.236.139.34', port changed from '0' to '31256' by 'stats socket command' # echo "set server be_foo/srv1 weight 256" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 check-port 8500" | sudo socat stdio /var/lib/haproxy/stats health check port updated. # echo "set server be_foo/srv1 state ready" | sudo socat stdio /var/lib/haproxy/stats # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 105 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 10.236.139.34 2 0 256 256 2319 15 3 2 6 0 0 0 - 31256 - -> curl on the corresponding frontend: reply from server:31257 (!) Here we indeed would expect to get an anver from server:31256. The issue is highly linked to the usage of `pool-purge-delay`, with a value which is higher than the duration of the test, 10s in our case. a git bisect between dev5 and dev6 seems to show commit 079cb9af22da6 ("MEDIUM: connections: Revamp the way idle connections are killed") being the origin of this new behaviour. So if I understand the later correctly, it seems that it was more a matter of chance that we did not saw the issue earlier. My patch proposes to force clean idle connections in the two following cases: - we set a (still running) server to maintenance - we change the ip/port of a server This commit should be backported to 2.1, 2.0, and 1.9. Signed-off-by: William Dauchy <w.dauchy@criteo.com>
2020-05-02 15:52:36 -04:00
/* cleanup connections for a given server
* might be useful when going on forced maintenance or live changing ip/port
*/
static void srv_cleanup_connections(struct server *srv)
BUG/MEDIUM: connections: force connections cleanup on server changes I've been trying to understand a change of behaviour between v2.2dev5 and v2.2dev6. Indeed our probe is regularly testing to add and remove servers on a given backend such as: # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 263 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 0.0.0.0 0 1 256 256 0 15 3 0 14 0 0 0 - 0 - -> curl on the corresponding frontend: reply from server:31255 # echo "set server be_foo/srv1 addr 10.236.139.34 port 31257" | sudo socat stdio /var/lib/haproxy/stats IP changed from '0.0.0.0' to '10.236.139.34', port changed from '0' to '31257' by 'stats socket command' # echo "set server be_foo/srv1 weight 256" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 check-port 8500" | sudo socat stdio /var/lib/haproxy/stats health check port updated. # echo "set server be_foo/srv1 state ready" | sudo socat stdio /var/lib/haproxy/stats # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 105 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 10.236.139.34 2 0 256 256 2319 15 3 2 6 0 0 0 - 31257 - -> curl on the corresponding frontend: reply for server:31257 (notice the difference of weight) # echo "set server be_foo/srv1 state maint" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 addr 0.0.0.0 port 0" | sudo socat stdio /var/lib/haproxy/stats IP changed from '10.236.139.34' to '0.0.0.0', port changed from '31257' to '0' by 'stats socket command' # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 263 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 0.0.0.0 0 1 256 256 0 15 3 0 14 0 0 0 - 0 - -> curl on the corresponding frontend: reply from server:31255 # echo "set server be_foo/srv1 addr 10.236.139.34 port 31256" | sudo socat stdio /var/lib/haproxy/stats IP changed from '0.0.0.0' to '10.236.139.34', port changed from '0' to '31256' by 'stats socket command' # echo "set server be_foo/srv1 weight 256" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 check-port 8500" | sudo socat stdio /var/lib/haproxy/stats health check port updated. # echo "set server be_foo/srv1 state ready" | sudo socat stdio /var/lib/haproxy/stats # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 105 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 10.236.139.34 2 0 256 256 2319 15 3 2 6 0 0 0 - 31256 - -> curl on the corresponding frontend: reply from server:31257 (!) Here we indeed would expect to get an anver from server:31256. The issue is highly linked to the usage of `pool-purge-delay`, with a value which is higher than the duration of the test, 10s in our case. a git bisect between dev5 and dev6 seems to show commit 079cb9af22da6 ("MEDIUM: connections: Revamp the way idle connections are killed") being the origin of this new behaviour. So if I understand the later correctly, it seems that it was more a matter of chance that we did not saw the issue earlier. My patch proposes to force clean idle connections in the two following cases: - we set a (still running) server to maintenance - we change the ip/port of a server This commit should be backported to 2.1, 2.0, and 1.9. Signed-off-by: William Dauchy <w.dauchy@criteo.com>
2020-05-02 15:52:36 -04:00
{
struct sess_priv_conns *sess_conns;
BUG/MEDIUM: connections: force connections cleanup on server changes I've been trying to understand a change of behaviour between v2.2dev5 and v2.2dev6. Indeed our probe is regularly testing to add and remove servers on a given backend such as: # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 263 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 0.0.0.0 0 1 256 256 0 15 3 0 14 0 0 0 - 0 - -> curl on the corresponding frontend: reply from server:31255 # echo "set server be_foo/srv1 addr 10.236.139.34 port 31257" | sudo socat stdio /var/lib/haproxy/stats IP changed from '0.0.0.0' to '10.236.139.34', port changed from '0' to '31257' by 'stats socket command' # echo "set server be_foo/srv1 weight 256" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 check-port 8500" | sudo socat stdio /var/lib/haproxy/stats health check port updated. # echo "set server be_foo/srv1 state ready" | sudo socat stdio /var/lib/haproxy/stats # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 105 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 10.236.139.34 2 0 256 256 2319 15 3 2 6 0 0 0 - 31257 - -> curl on the corresponding frontend: reply for server:31257 (notice the difference of weight) # echo "set server be_foo/srv1 state maint" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 addr 0.0.0.0 port 0" | sudo socat stdio /var/lib/haproxy/stats IP changed from '10.236.139.34' to '0.0.0.0', port changed from '31257' to '0' by 'stats socket command' # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 263 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 0.0.0.0 0 1 256 256 0 15 3 0 14 0 0 0 - 0 - -> curl on the corresponding frontend: reply from server:31255 # echo "set server be_foo/srv1 addr 10.236.139.34 port 31256" | sudo socat stdio /var/lib/haproxy/stats IP changed from '0.0.0.0' to '10.236.139.34', port changed from '0' to '31256' by 'stats socket command' # echo "set server be_foo/srv1 weight 256" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 check-port 8500" | sudo socat stdio /var/lib/haproxy/stats health check port updated. # echo "set server be_foo/srv1 state ready" | sudo socat stdio /var/lib/haproxy/stats # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 105 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 10.236.139.34 2 0 256 256 2319 15 3 2 6 0 0 0 - 31256 - -> curl on the corresponding frontend: reply from server:31257 (!) Here we indeed would expect to get an anver from server:31256. The issue is highly linked to the usage of `pool-purge-delay`, with a value which is higher than the duration of the test, 10s in our case. a git bisect between dev5 and dev6 seems to show commit 079cb9af22da6 ("MEDIUM: connections: Revamp the way idle connections are killed") being the origin of this new behaviour. So if I understand the later correctly, it seems that it was more a matter of chance that we did not saw the issue earlier. My patch proposes to force clean idle connections in the two following cases: - we set a (still running) server to maintenance - we change the ip/port of a server This commit should be backported to 2.1, 2.0, and 1.9. Signed-off-by: William Dauchy <w.dauchy@criteo.com>
2020-05-02 15:52:36 -04:00
int did_remove;
int i;
/* nothing to do if pool-max-conn is null */
if (!srv->max_idle_conns)
return;
/* check all threads starting with ours */
for (i = tid;;) {
BUG/MEDIUM: connections: force connections cleanup on server changes I've been trying to understand a change of behaviour between v2.2dev5 and v2.2dev6. Indeed our probe is regularly testing to add and remove servers on a given backend such as: # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 263 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 0.0.0.0 0 1 256 256 0 15 3 0 14 0 0 0 - 0 - -> curl on the corresponding frontend: reply from server:31255 # echo "set server be_foo/srv1 addr 10.236.139.34 port 31257" | sudo socat stdio /var/lib/haproxy/stats IP changed from '0.0.0.0' to '10.236.139.34', port changed from '0' to '31257' by 'stats socket command' # echo "set server be_foo/srv1 weight 256" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 check-port 8500" | sudo socat stdio /var/lib/haproxy/stats health check port updated. # echo "set server be_foo/srv1 state ready" | sudo socat stdio /var/lib/haproxy/stats # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 105 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 10.236.139.34 2 0 256 256 2319 15 3 2 6 0 0 0 - 31257 - -> curl on the corresponding frontend: reply for server:31257 (notice the difference of weight) # echo "set server be_foo/srv1 state maint" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 addr 0.0.0.0 port 0" | sudo socat stdio /var/lib/haproxy/stats IP changed from '10.236.139.34' to '0.0.0.0', port changed from '31257' to '0' by 'stats socket command' # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 263 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 0.0.0.0 0 1 256 256 0 15 3 0 14 0 0 0 - 0 - -> curl on the corresponding frontend: reply from server:31255 # echo "set server be_foo/srv1 addr 10.236.139.34 port 31256" | sudo socat stdio /var/lib/haproxy/stats IP changed from '0.0.0.0' to '10.236.139.34', port changed from '0' to '31256' by 'stats socket command' # echo "set server be_foo/srv1 weight 256" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 check-port 8500" | sudo socat stdio /var/lib/haproxy/stats health check port updated. # echo "set server be_foo/srv1 state ready" | sudo socat stdio /var/lib/haproxy/stats # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 105 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 10.236.139.34 2 0 256 256 2319 15 3 2 6 0 0 0 - 31256 - -> curl on the corresponding frontend: reply from server:31257 (!) Here we indeed would expect to get an anver from server:31256. The issue is highly linked to the usage of `pool-purge-delay`, with a value which is higher than the duration of the test, 10s in our case. a git bisect between dev5 and dev6 seems to show commit 079cb9af22da6 ("MEDIUM: connections: Revamp the way idle connections are killed") being the origin of this new behaviour. So if I understand the later correctly, it seems that it was more a matter of chance that we did not saw the issue earlier. My patch proposes to force clean idle connections in the two following cases: - we set a (still running) server to maintenance - we change the ip/port of a server This commit should be backported to 2.1, 2.0, and 1.9. Signed-off-by: William Dauchy <w.dauchy@criteo.com>
2020-05-02 15:52:36 -04:00
did_remove = 0;
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
/* idle connections */
if (srv_migrate_conns_to_remove(srv, i, -1) > 0)
BUG/MEDIUM: connections: force connections cleanup on server changes I've been trying to understand a change of behaviour between v2.2dev5 and v2.2dev6. Indeed our probe is regularly testing to add and remove servers on a given backend such as: # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 263 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 0.0.0.0 0 1 256 256 0 15 3 0 14 0 0 0 - 0 - -> curl on the corresponding frontend: reply from server:31255 # echo "set server be_foo/srv1 addr 10.236.139.34 port 31257" | sudo socat stdio /var/lib/haproxy/stats IP changed from '0.0.0.0' to '10.236.139.34', port changed from '0' to '31257' by 'stats socket command' # echo "set server be_foo/srv1 weight 256" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 check-port 8500" | sudo socat stdio /var/lib/haproxy/stats health check port updated. # echo "set server be_foo/srv1 state ready" | sudo socat stdio /var/lib/haproxy/stats # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 105 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 10.236.139.34 2 0 256 256 2319 15 3 2 6 0 0 0 - 31257 - -> curl on the corresponding frontend: reply for server:31257 (notice the difference of weight) # echo "set server be_foo/srv1 state maint" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 addr 0.0.0.0 port 0" | sudo socat stdio /var/lib/haproxy/stats IP changed from '10.236.139.34' to '0.0.0.0', port changed from '31257' to '0' by 'stats socket command' # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 263 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 0.0.0.0 0 1 256 256 0 15 3 0 14 0 0 0 - 0 - -> curl on the corresponding frontend: reply from server:31255 # echo "set server be_foo/srv1 addr 10.236.139.34 port 31256" | sudo socat stdio /var/lib/haproxy/stats IP changed from '0.0.0.0' to '10.236.139.34', port changed from '0' to '31256' by 'stats socket command' # echo "set server be_foo/srv1 weight 256" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 check-port 8500" | sudo socat stdio /var/lib/haproxy/stats health check port updated. # echo "set server be_foo/srv1 state ready" | sudo socat stdio /var/lib/haproxy/stats # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 105 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 10.236.139.34 2 0 256 256 2319 15 3 2 6 0 0 0 - 31256 - -> curl on the corresponding frontend: reply from server:31257 (!) Here we indeed would expect to get an anver from server:31256. The issue is highly linked to the usage of `pool-purge-delay`, with a value which is higher than the duration of the test, 10s in our case. a git bisect between dev5 and dev6 seems to show commit 079cb9af22da6 ("MEDIUM: connections: Revamp the way idle connections are killed") being the origin of this new behaviour. So if I understand the later correctly, it seems that it was more a matter of chance that we did not saw the issue earlier. My patch proposes to force clean idle connections in the two following cases: - we set a (still running) server to maintenance - we change the ip/port of a server This commit should be backported to 2.1, 2.0, and 1.9. Signed-off-by: William Dauchy <w.dauchy@criteo.com>
2020-05-02 15:52:36 -04:00
did_remove = 1;
/* session attached connections */
while ((sess_conns = MT_LIST_POP(&srv->per_thr[i].sess_conns, struct sess_priv_conns *, srv_el))) {
if (sess_conns_cleanup_all_idle(sess_conns)) {
did_remove = 1;
if (LIST_ISEMPTY(&sess_conns->conn_list)) {
LIST_DELETE(&sess_conns->sess_el);
pool_free(pool_head_sess_priv_conns, sess_conns);
}
}
}
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
BUG/MEDIUM: connections: force connections cleanup on server changes I've been trying to understand a change of behaviour between v2.2dev5 and v2.2dev6. Indeed our probe is regularly testing to add and remove servers on a given backend such as: # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 263 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 0.0.0.0 0 1 256 256 0 15 3 0 14 0 0 0 - 0 - -> curl on the corresponding frontend: reply from server:31255 # echo "set server be_foo/srv1 addr 10.236.139.34 port 31257" | sudo socat stdio /var/lib/haproxy/stats IP changed from '0.0.0.0' to '10.236.139.34', port changed from '0' to '31257' by 'stats socket command' # echo "set server be_foo/srv1 weight 256" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 check-port 8500" | sudo socat stdio /var/lib/haproxy/stats health check port updated. # echo "set server be_foo/srv1 state ready" | sudo socat stdio /var/lib/haproxy/stats # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 105 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 10.236.139.34 2 0 256 256 2319 15 3 2 6 0 0 0 - 31257 - -> curl on the corresponding frontend: reply for server:31257 (notice the difference of weight) # echo "set server be_foo/srv1 state maint" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 addr 0.0.0.0 port 0" | sudo socat stdio /var/lib/haproxy/stats IP changed from '10.236.139.34' to '0.0.0.0', port changed from '31257' to '0' by 'stats socket command' # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 263 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 0.0.0.0 0 1 256 256 0 15 3 0 14 0 0 0 - 0 - -> curl on the corresponding frontend: reply from server:31255 # echo "set server be_foo/srv1 addr 10.236.139.34 port 31256" | sudo socat stdio /var/lib/haproxy/stats IP changed from '0.0.0.0' to '10.236.139.34', port changed from '0' to '31256' by 'stats socket command' # echo "set server be_foo/srv1 weight 256" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 check-port 8500" | sudo socat stdio /var/lib/haproxy/stats health check port updated. # echo "set server be_foo/srv1 state ready" | sudo socat stdio /var/lib/haproxy/stats # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 105 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 10.236.139.34 2 0 256 256 2319 15 3 2 6 0 0 0 - 31256 - -> curl on the corresponding frontend: reply from server:31257 (!) Here we indeed would expect to get an anver from server:31256. The issue is highly linked to the usage of `pool-purge-delay`, with a value which is higher than the duration of the test, 10s in our case. a git bisect between dev5 and dev6 seems to show commit 079cb9af22da6 ("MEDIUM: connections: Revamp the way idle connections are killed") being the origin of this new behaviour. So if I understand the later correctly, it seems that it was more a matter of chance that we did not saw the issue earlier. My patch proposes to force clean idle connections in the two following cases: - we set a (still running) server to maintenance - we change the ip/port of a server This commit should be backported to 2.1, 2.0, and 1.9. Signed-off-by: William Dauchy <w.dauchy@criteo.com>
2020-05-02 15:52:36 -04:00
if (did_remove)
task_wakeup(idle_conns[i].cleanup_task, TASK_WOKEN_OTHER);
if ((i = ((i + 1 == global.nbthread) ? 0 : i + 1)) == tid)
break;
BUG/MEDIUM: connections: force connections cleanup on server changes I've been trying to understand a change of behaviour between v2.2dev5 and v2.2dev6. Indeed our probe is regularly testing to add and remove servers on a given backend such as: # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 263 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 0.0.0.0 0 1 256 256 0 15 3 0 14 0 0 0 - 0 - -> curl on the corresponding frontend: reply from server:31255 # echo "set server be_foo/srv1 addr 10.236.139.34 port 31257" | sudo socat stdio /var/lib/haproxy/stats IP changed from '0.0.0.0' to '10.236.139.34', port changed from '0' to '31257' by 'stats socket command' # echo "set server be_foo/srv1 weight 256" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 check-port 8500" | sudo socat stdio /var/lib/haproxy/stats health check port updated. # echo "set server be_foo/srv1 state ready" | sudo socat stdio /var/lib/haproxy/stats # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 105 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 10.236.139.34 2 0 256 256 2319 15 3 2 6 0 0 0 - 31257 - -> curl on the corresponding frontend: reply for server:31257 (notice the difference of weight) # echo "set server be_foo/srv1 state maint" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 addr 0.0.0.0 port 0" | sudo socat stdio /var/lib/haproxy/stats IP changed from '10.236.139.34' to '0.0.0.0', port changed from '31257' to '0' by 'stats socket command' # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 263 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 0.0.0.0 0 1 256 256 0 15 3 0 14 0 0 0 - 0 - -> curl on the corresponding frontend: reply from server:31255 # echo "set server be_foo/srv1 addr 10.236.139.34 port 31256" | sudo socat stdio /var/lib/haproxy/stats IP changed from '0.0.0.0' to '10.236.139.34', port changed from '0' to '31256' by 'stats socket command' # echo "set server be_foo/srv1 weight 256" | sudo socat stdio /var/lib/haproxy/stats # echo "set server be_foo/srv1 check-port 8500" | sudo socat stdio /var/lib/haproxy/stats health check port updated. # echo "set server be_foo/srv1 state ready" | sudo socat stdio /var/lib/haproxy/stats # echo "show servers state be_foo" | sudo socat stdio /var/lib/haproxy/stats 113 be_foo 1 srv0 10.236.139.34 2 0 1 1 105 15 3 4 6 0 0 0 - 31255 - 113 be_foo 2 srv1 10.236.139.34 2 0 256 256 2319 15 3 2 6 0 0 0 - 31256 - -> curl on the corresponding frontend: reply from server:31257 (!) Here we indeed would expect to get an anver from server:31256. The issue is highly linked to the usage of `pool-purge-delay`, with a value which is higher than the duration of the test, 10s in our case. a git bisect between dev5 and dev6 seems to show commit 079cb9af22da6 ("MEDIUM: connections: Revamp the way idle connections are killed") being the origin of this new behaviour. So if I understand the later correctly, it seems that it was more a matter of chance that we did not saw the issue earlier. My patch proposes to force clean idle connections in the two following cases: - we set a (still running) server to maintenance - we change the ip/port of a server This commit should be backported to 2.1, 2.0, and 1.9. Signed-off-by: William Dauchy <w.dauchy@criteo.com>
2020-05-02 15:52:36 -04:00
}
}
/* removes an idle conn after updating the server idle conns counters */
void srv_release_conn(struct server *srv, struct connection *conn)
{
MEDIUM: session: account on server idle conns attached to session This patch adds a new member <curr_sess_idle_conns> on the server. It serves as a counter of idle connections attached on a session instead of regular idle/safe trees. This is used only for private connections. The objective is to provide a method to detect if there is idle connections still referencing a server. This will be particularly useful to ensure that a server is removable. Currently, this is not yet necessary as idle connections are directly freed via "del server" handler under thread isolation. However, this procedure will be replaced by an asynchronous mechanism outside of thread isolation. Careful: connections attached to a session but not idle will not be accounted by this counter. These connections can still be detected via srv_has_streams() so "del server" will be safe. This counter is maintain during the whole lifetime of a private connection. This is mandatory to guarantee "del server" safety and is conform with other idle server counters. What this means it that decrement is performed only when the connection transitions from idle to in use, or just prior to its deletion. For the first case, this is covered by session_get_conn(). The second case is trickier. It cannot be done via session_unown_conn() as a private connection may still live a little longer after its removal from session, most notably when scheduled for idle purging. Thus, conn_free() has been adjusted to handle the final decrement. Now, conn_backend_deinit() is also called for private connections if CO_FL_SESS_IDLE flag is present. This results in a call to srv_release_conn() which is responsible to decrement server idle counters.
2025-08-08 09:56:47 -04:00
if (conn->flags & CO_FL_SESS_IDLE) {
_HA_ATOMIC_DEC(&srv->curr_sess_idle_conns);
conn->flags &= ~CO_FL_SESS_IDLE;
}
else if (conn->flags & CO_FL_LIST_MASK) {
/* The connection is currently in the server's idle list, so tell it
* there's one less connection available in that list.
*/
_HA_ATOMIC_DEC(&srv->curr_idle_conns);
_HA_ATOMIC_DEC(conn->flags & CO_FL_SAFE_LIST ? &srv->curr_safe_nb : &srv->curr_idle_nb);
_HA_ATOMIC_DEC(&srv->curr_idle_thr[tid]);
}
else {
/* The connection is not private and not in any server's idle
* list, so decrement the current number of used connections
*/
_HA_ATOMIC_DEC(&srv->curr_used_conns);
}
/* Remove the connection from any tree (safe, idle or available) */
if (ceb_intree(&conn->hash_node.node)) {
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
conn_delete_from_tree(conn, tid);
conn->flags &= ~CO_FL_LIST_MASK;
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
}
}
/* retrieve a connection from its <hash> in <tree>
* returns NULL if no connection found
*/
struct connection *srv_lookup_conn(struct ceb_root **tree, uint64_t hash)
{
return ceb64_item_lookup(tree, hash_node.node, hash_node.key, hash, struct connection);
}
/* retrieve the next connection sharing the same hash as <conn>
* returns NULL if no connection found
*/
struct connection *srv_lookup_conn_next(struct ceb_root **tree, struct connection *conn)
{
return ceb64_item_next_dup(tree, hash_node.node, hash_node.key, conn);
}
/* Add <conn> in <srv> idle trees. Set <is_safe> if connection is deemed safe
* for reuse.
*
* This function is a simple wrapper for tree insert. It should only be used
* for internal usage or when removing briefly the connection to avoid takeover
* on it before reinserting it with this function. In other context, prefer to
* use the full feature srv_add_to_idle_list().
*
* Must be called with idle_conns_lock.
*/
static inline void _srv_add_idle(struct server *srv, struct connection *conn, int is_safe)
{
struct ceb_root **tree = is_safe ? &srv->per_thr[tid].safe_conns :
&srv->per_thr[tid].idle_conns;
/* first insert in idle or safe tree. */
ceb64_item_insert(tree, hash_node.node, hash_node.key, conn);
BUG_ON_STRESS(!mt_list_isempty(&conn->toremove_list));
/* insert in list sorted by connection usage. */
LIST_APPEND(&srv->per_thr[tid].idle_conn_list, &conn->idle_list);
}
/* Add <conn> in <srv> idle trees. Set <is_safe> if connection is deemed safe
* for reuse.
*
* This function is a simple wrapper for tree insert. It should only be used
* for internal usage or when removing briefly the connection to avoid takeover
* on it before reinserting it with this function. In other context, prefer to
* use the full feature srv_add_to_idle_list(). This function takes the idle
* conns lock for the current thread (thus the owner must not already have it).
*/
void srv_add_idle(struct server *srv, struct connection *conn, int is_safe)
{
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
_srv_add_idle(srv, conn, is_safe);
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
}
/* This adds an idle connection to the server's list if the connection is
* reusable, not held by any owner anymore, but still has available streams.
*/
int srv_add_to_idle_list(struct server *srv, struct connection *conn, int is_safe)
{
/* we try to keep the connection in the server's idle list
* if we don't have too many FD in use, and if the number of
* idle+current conns is lower than what was observed before
* last purge, or if we already don't have idle conns for the
* current thread and we don't exceed last count by global.nbthread.
*/
if (!(conn->flags & CO_FL_PRIVATE) &&
srv && srv->pool_purge_delay > 0 &&
!(srv->cur_admin & SRV_ADMF_MAINT) &&
((srv->proxy->options & PR_O_REUSE_MASK) != PR_O_REUSE_NEVR) &&
ha_used_fds < global.tune.pool_high_count &&
(srv->max_idle_conns == -1 || srv->max_idle_conns > srv->curr_idle_conns) &&
((ceb_isempty(&srv->per_thr[tid].safe_conns) &&
(is_safe || ceb_isempty(&srv->per_thr[tid].idle_conns))) ||
(ha_used_fds < global.tune.pool_low_count &&
(srv->curr_used_conns + srv->curr_idle_conns <=
MAX(srv->curr_used_conns, srv->est_need_conns) + srv->low_idle_conns ||
(conn->flags & CO_FL_REVERSED)))) &&
!conn->mux->used_streams(conn) && conn->mux->avail_streams(conn)) {
int retadd;
retadd = _HA_ATOMIC_ADD_FETCH(&srv->curr_idle_conns, 1);
if (retadd > srv->max_idle_conns) {
_HA_ATOMIC_DEC(&srv->curr_idle_conns);
return 0;
}
_HA_ATOMIC_DEC(&srv->curr_used_conns);
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
conn_delete_from_tree(conn, tid);
if (is_safe) {
conn->flags = (conn->flags & ~CO_FL_LIST_MASK) | CO_FL_SAFE_LIST;
_srv_add_idle(srv, conn, 1);
_HA_ATOMIC_INC(&srv->curr_safe_nb);
} else {
conn->flags = (conn->flags & ~CO_FL_LIST_MASK) | CO_FL_IDLE_LIST;
_srv_add_idle(srv, conn, 0);
_HA_ATOMIC_INC(&srv->curr_idle_nb);
}
BUG_ON_STRESS(!mt_list_isempty(&conn->toremove_list));
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
_HA_ATOMIC_INC(&srv->curr_idle_thr[tid]);
if (HA_ATOMIC_LOAD(&srv->idle_node.node.leaf_p) == NULL) {
HA_SPIN_LOCK(OTHER_LOCK, &idle_conn_srv_lock);
if (_HA_ATOMIC_LOAD(&srv->idle_node.node.leaf_p) == NULL) {
srv->idle_node.key = tick_add(srv->pool_purge_delay,
now_ms);
eb32_insert(&idle_conn_srv, &srv->idle_node);
if (!task_in_wq(idle_conn_task) && !
task_in_rq(idle_conn_task)) {
task_schedule(idle_conn_task,
srv->idle_node.key);
}
BUG_ON_STRESS(!mt_list_isempty(&conn->toremove_list));
}
HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conn_srv_lock);
}
return 1;
}
return 0;
}
/* Insert <conn> connection in <srv> server available list. This is reserved
* for backend connection currently in used with usable streams left.
*/
void srv_add_to_avail_list(struct server *srv, struct connection *conn)
{
/* connection cannot be in idle list if used as an avail idle conn. */
BUG_ON(LIST_INLIST(&conn->idle_list));
BUG_ON_STRESS(!mt_list_isempty(&conn->toremove_list));
ceb64_item_insert(&srv->per_thr[tid].avail_conns, hash_node.node, hash_node.key, conn);
}
struct task *srv_cleanup_idle_conns(struct task *task, void *context, unsigned int state)
{
struct server *srv;
struct eb32_node *eb;
int i;
unsigned int next_wakeup;
next_wakeup = TICK_ETERNITY;
HA_SPIN_LOCK(OTHER_LOCK, &idle_conn_srv_lock);
while (1) {
int exceed_conns;
int to_kill;
int curr_idle;
eb = eb32_lookup_ge(&idle_conn_srv, now_ms - TIMER_LOOK_BACK);
if (!eb) {
/* we might have reached the end of the tree, typically because
* <now_ms> is in the first half and we're first scanning the last
* half. Let's loop back to the beginning of the tree now.
*/
eb = eb32_first(&idle_conn_srv);
if (likely(!eb))
break;
}
if (tick_is_lt(now_ms, eb->key)) {
/* timer not expired yet, revisit it later */
next_wakeup = eb->key;
break;
}
srv = eb32_entry(eb, struct server, idle_node);
/* Calculate how many idle connections we want to kill :
* we want to remove half the difference between the total
* of established connections (used or idle) and the max
* number of used connections.
*/
curr_idle = srv->curr_idle_conns;
if (curr_idle == 0)
goto remove;
MEDIUM: server: improve estimate of the need for idle connections Starting with commit 079cb9a ("MEDIUM: connections: Revamp the way idle connections are killed") we started to improve the way to compute the need for idle connections. But the condition to keep a connection idle or drop it when releasing it was not updated. This often results in storms of close when certain thresholds are met, and long series of takeover() when there aren't enough connections left for a thread on a server. This patch tries to improve the situation this way: - it keeps an estimate of the number of connections needed for a server. This estimate is a copy of the max over previous purge period, or is a max of what is seen over current period; it differs from max_used_conns in that this one is a counter that's reset on each purge period ; - when releasing, if the number of current idle+used connections is lower than this last estimate, then we'll keep the connection; - when releasing, if the current thread's idle conns head is empty, and we don't exceed the estimate by the number of threads, then we'll keep the connection. - when cleaning up connections, we consider the max of the last two periods to avoid killing too many idle conns when facing bursty traffic. Thanks to this we can better converge towards a situation where, provided there are enough FDs, each active server keeps at least one idle connection per thread all the time, with a total number close to what was needed over the previous measurement period (as defined by pool-purge-delay). On tests with large numbers of concurrent connections (30k) and many servers (200), this has quite smoothed the CPU usage pattern, increased the reuse rate and roughly halved the takeover rate.
2020-06-29 09:56:35 -04:00
exceed_conns = srv->curr_used_conns + curr_idle - MAX(srv->max_used_conns, srv->est_need_conns);
exceed_conns = to_kill = exceed_conns / 2 + (exceed_conns & 1);
MEDIUM: server: improve estimate of the need for idle connections Starting with commit 079cb9a ("MEDIUM: connections: Revamp the way idle connections are killed") we started to improve the way to compute the need for idle connections. But the condition to keep a connection idle or drop it when releasing it was not updated. This often results in storms of close when certain thresholds are met, and long series of takeover() when there aren't enough connections left for a thread on a server. This patch tries to improve the situation this way: - it keeps an estimate of the number of connections needed for a server. This estimate is a copy of the max over previous purge period, or is a max of what is seen over current period; it differs from max_used_conns in that this one is a counter that's reset on each purge period ; - when releasing, if the number of current idle+used connections is lower than this last estimate, then we'll keep the connection; - when releasing, if the current thread's idle conns head is empty, and we don't exceed the estimate by the number of threads, then we'll keep the connection. - when cleaning up connections, we consider the max of the last two periods to avoid killing too many idle conns when facing bursty traffic. Thanks to this we can better converge towards a situation where, provided there are enough FDs, each active server keeps at least one idle connection per thread all the time, with a total number close to what was needed over the previous measurement period (as defined by pool-purge-delay). On tests with large numbers of concurrent connections (30k) and many servers (200), this has quite smoothed the CPU usage pattern, increased the reuse rate and roughly halved the takeover rate.
2020-06-29 09:56:35 -04:00
srv->est_need_conns = (srv->est_need_conns + srv->max_used_conns) / 2;
MEDIUM: server: improve estimate of the need for idle connections Starting with commit 079cb9a ("MEDIUM: connections: Revamp the way idle connections are killed") we started to improve the way to compute the need for idle connections. But the condition to keep a connection idle or drop it when releasing it was not updated. This often results in storms of close when certain thresholds are met, and long series of takeover() when there aren't enough connections left for a thread on a server. This patch tries to improve the situation this way: - it keeps an estimate of the number of connections needed for a server. This estimate is a copy of the max over previous purge period, or is a max of what is seen over current period; it differs from max_used_conns in that this one is a counter that's reset on each purge period ; - when releasing, if the number of current idle+used connections is lower than this last estimate, then we'll keep the connection; - when releasing, if the current thread's idle conns head is empty, and we don't exceed the estimate by the number of threads, then we'll keep the connection. - when cleaning up connections, we consider the max of the last two periods to avoid killing too many idle conns when facing bursty traffic. Thanks to this we can better converge towards a situation where, provided there are enough FDs, each active server keeps at least one idle connection per thread all the time, with a total number close to what was needed over the previous measurement period (as defined by pool-purge-delay). On tests with large numbers of concurrent connections (30k) and many servers (200), this has quite smoothed the CPU usage pattern, increased the reuse rate and roughly halved the takeover rate.
2020-06-29 09:56:35 -04:00
if (srv->est_need_conns < srv->max_used_conns)
srv->est_need_conns = srv->max_used_conns;
HA_ATOMIC_STORE(&srv->max_used_conns, srv->curr_used_conns);
if (exceed_conns <= 0)
goto remove;
/* check all threads starting with ours */
for (i = tid;;) {
int max_conn;
int removed;
max_conn = (exceed_conns * srv->curr_idle_thr[i]) /
curr_idle + 1;
HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
removed = srv_migrate_conns_to_remove(srv, i, max_conn);
HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
if (removed)
task_wakeup(idle_conns[i].cleanup_task, TASK_WOKEN_OTHER);
if ((i = ((i + 1 == global.nbthread) ? 0 : i + 1)) == tid)
break;
}
remove:
eb32_delete(&srv->idle_node);
if (srv->curr_idle_conns) {
/* There are still more idle connections, add the
* server back in the tree.
*/
srv->idle_node.key = tick_add(srv->pool_purge_delay, now_ms);
eb32_insert(&idle_conn_srv, &srv->idle_node);
next_wakeup = tick_first(next_wakeup, srv->idle_node.key);
}
}
HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conn_srv_lock);
task->expire = next_wakeup;
return task;
}
/* Close remaining idle connections. This functions is designed to be run on
* process shutdown. This guarantees a proper socket shutdown to avoid
* TIME_WAIT state. For a quick operation, only ctrl is closed, xprt stack is
* bypassed.
*
* This function is not thread-safe so it must only be called via a global
* deinit function.
*/
static void srv_close_idle_conns(struct server *srv)
{
struct ceb_root ***cleaned_tree;
struct connection *conn;
int i;
for (i = 0; i < global.nbthread; ++i) {
struct ceb_root **conn_trees[] = {
&srv->per_thr[i].idle_conns,
&srv->per_thr[i].safe_conns,
&srv->per_thr[i].avail_conns,
NULL
};
for (cleaned_tree = conn_trees; *cleaned_tree; ++cleaned_tree) {
while ((conn = ceb64_item_first(*cleaned_tree, hash_node.node,
hash_node.key, struct connection))) {
if (conn->ctrl->ctrl_close)
conn->ctrl->ctrl_close(conn);
conn_delete_from_tree(conn, i);
}
}
}
}
REGISTER_SERVER_DEINIT(srv_close_idle_conns);
/* config parser for global "tune.idle-pool.shared", accepts "on" or "off" */
static int cfg_parse_idle_pool_shared(char **args, int section_type, struct proxy *curpx,
const struct proxy *defpx, const char *file, int line,
char **err)
{
if (too_many_args(1, args, err, NULL))
return -1;
if (strcmp(args[1], "on") == 0)
global.tune.options |= GTUNE_IDLE_POOL_SHARED;
else if (strcmp(args[1], "off") == 0)
global.tune.options &= ~GTUNE_IDLE_POOL_SHARED;
else {
memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]);
return -1;
}
return 0;
}
/* config parser for global "tune.pool-{low,high}-fd-ratio" */
static int cfg_parse_pool_fd_ratio(char **args, int section_type, struct proxy *curpx,
const struct proxy *defpx, const char *file, int line,
char **err)
{
int arg = -1;
if (too_many_args(1, args, err, NULL))
return -1;
if (*(args[1]) != 0)
arg = atoi(args[1]);
if (arg < 0 || arg > 100) {
memprintf(err, "'%s' expects an integer argument between 0 and 100.", args[0]);
return -1;
}
if (args[0][10] == 'h')
global.tune.pool_high_ratio = arg;
else
global.tune.pool_low_ratio = arg;
return 0;
}
/* config keyword parsers */
static struct cfg_kw_list cfg_kws = {ILH, {
{ CFG_GLOBAL, "tune.idle-pool.shared", cfg_parse_idle_pool_shared },
{ CFG_GLOBAL, "tune.pool-high-fd-ratio", cfg_parse_pool_fd_ratio },
{ CFG_GLOBAL, "tune.pool-low-fd-ratio", cfg_parse_pool_fd_ratio },
{ 0, NULL, NULL }
}};
INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
/*
* Local variables:
* c-indent-level: 8
* c-basic-offset: 8
* End:
*/